1c4e9adb004d5ed9a47e20a58c8b4c3fcb073c8c
[gcc.git] / gcc / lower-subreg.c
1 /* Decompose multiword subregs.
2 Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "machmode.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "tm_p.h"
29 #include "timevar.h"
30 #include "flags.h"
31 #include "insn-config.h"
32 #include "obstack.h"
33 #include "basic-block.h"
34 #include "recog.h"
35 #include "bitmap.h"
36 #include "dce.h"
37 #include "expr.h"
38 #include "except.h"
39 #include "regs.h"
40 #include "tree-pass.h"
41 #include "df.h"
42
43 #ifdef STACK_GROWS_DOWNWARD
44 # undef STACK_GROWS_DOWNWARD
45 # define STACK_GROWS_DOWNWARD 1
46 #else
47 # define STACK_GROWS_DOWNWARD 0
48 #endif
49
50 DEF_VEC_P (bitmap);
51 DEF_VEC_ALLOC_P (bitmap,heap);
52
53 /* Decompose multi-word pseudo-registers into individual
54 pseudo-registers when possible. This is possible when all the uses
55 of a multi-word register are via SUBREG, or are copies of the
56 register to another location. Breaking apart the register permits
57 more CSE and permits better register allocation. */
58
59 /* Bit N in this bitmap is set if regno N is used in a context in
60 which we can decompose it. */
61 static bitmap decomposable_context;
62
63 /* Bit N in this bitmap is set if regno N is used in a context in
64 which it can not be decomposed. */
65 static bitmap non_decomposable_context;
66
67 /* Bit N in this bitmap is set if regno N is used in a subreg
68 which changes the mode but not the size. This typically happens
69 when the register accessed as a floating-point value; we want to
70 avoid generating accesses to its subwords in integer modes. */
71 static bitmap subreg_context;
72
73 /* Bit N in the bitmap in element M of this array is set if there is a
74 copy from reg M to reg N. */
75 static VEC(bitmap,heap) *reg_copy_graph;
76
77 /* Return whether X is a simple object which we can take a word_mode
78 subreg of. */
79
80 static bool
81 simple_move_operand (rtx x)
82 {
83 if (GET_CODE (x) == SUBREG)
84 x = SUBREG_REG (x);
85
86 if (!OBJECT_P (x))
87 return false;
88
89 if (GET_CODE (x) == LABEL_REF
90 || GET_CODE (x) == SYMBOL_REF
91 || GET_CODE (x) == HIGH
92 || GET_CODE (x) == CONST)
93 return false;
94
95 if (MEM_P (x)
96 && (MEM_VOLATILE_P (x)
97 || mode_dependent_address_p (XEXP (x, 0))))
98 return false;
99
100 return true;
101 }
102
103 /* If INSN is a single set between two objects, return the single set.
104 Such an insn can always be decomposed. INSN should have been
105 passed to recog and extract_insn before this is called. */
106
107 static rtx
108 simple_move (rtx insn)
109 {
110 rtx x;
111 rtx set;
112 enum machine_mode mode;
113
114 if (recog_data.n_operands != 2)
115 return NULL_RTX;
116
117 set = single_set (insn);
118 if (!set)
119 return NULL_RTX;
120
121 x = SET_DEST (set);
122 if (x != recog_data.operand[0] && x != recog_data.operand[1])
123 return NULL_RTX;
124 if (!simple_move_operand (x))
125 return NULL_RTX;
126
127 x = SET_SRC (set);
128 if (x != recog_data.operand[0] && x != recog_data.operand[1])
129 return NULL_RTX;
130 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
131 things like x86 rdtsc which returns a DImode value. */
132 if (GET_CODE (x) != ASM_OPERANDS
133 && !simple_move_operand (x))
134 return NULL_RTX;
135
136 /* We try to decompose in integer modes, to avoid generating
137 inefficient code copying between integer and floating point
138 registers. That means that we can't decompose if this is a
139 non-integer mode for which there is no integer mode of the same
140 size. */
141 mode = GET_MODE (SET_SRC (set));
142 if (!SCALAR_INT_MODE_P (mode)
143 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
144 == BLKmode))
145 return NULL_RTX;
146
147 /* Reject PARTIAL_INT modes. They are used for processor specific
148 purposes and it's probably best not to tamper with them. */
149 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
150 return NULL_RTX;
151
152 return set;
153 }
154
155 /* If SET is a copy from one multi-word pseudo-register to another,
156 record that in reg_copy_graph. Return whether it is such a
157 copy. */
158
159 static bool
160 find_pseudo_copy (rtx set)
161 {
162 rtx dest = SET_DEST (set);
163 rtx src = SET_SRC (set);
164 unsigned int rd, rs;
165 bitmap b;
166
167 if (!REG_P (dest) || !REG_P (src))
168 return false;
169
170 rd = REGNO (dest);
171 rs = REGNO (src);
172 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
173 return false;
174
175 if (GET_MODE_SIZE (GET_MODE (dest)) <= UNITS_PER_WORD)
176 return false;
177
178 b = VEC_index (bitmap, reg_copy_graph, rs);
179 if (b == NULL)
180 {
181 b = BITMAP_ALLOC (NULL);
182 VEC_replace (bitmap, reg_copy_graph, rs, b);
183 }
184
185 bitmap_set_bit (b, rd);
186
187 return true;
188 }
189
190 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
191 where they are copied to another register, add the register to
192 which they are copied to DECOMPOSABLE_CONTEXT. Use
193 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
194 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
195
196 static void
197 propagate_pseudo_copies (void)
198 {
199 bitmap queue, propagate;
200
201 queue = BITMAP_ALLOC (NULL);
202 propagate = BITMAP_ALLOC (NULL);
203
204 bitmap_copy (queue, decomposable_context);
205 do
206 {
207 bitmap_iterator iter;
208 unsigned int i;
209
210 bitmap_clear (propagate);
211
212 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
213 {
214 bitmap b = VEC_index (bitmap, reg_copy_graph, i);
215 if (b)
216 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
217 }
218
219 bitmap_and_compl (queue, propagate, decomposable_context);
220 bitmap_ior_into (decomposable_context, propagate);
221 }
222 while (!bitmap_empty_p (queue));
223
224 BITMAP_FREE (queue);
225 BITMAP_FREE (propagate);
226 }
227
228 /* A pointer to one of these values is passed to
229 find_decomposable_subregs via for_each_rtx. */
230
231 enum classify_move_insn
232 {
233 /* Not a simple move from one location to another. */
234 NOT_SIMPLE_MOVE,
235 /* A simple move from one pseudo-register to another. */
236 SIMPLE_PSEUDO_REG_MOVE,
237 /* A simple move involving a non-pseudo-register. */
238 SIMPLE_MOVE
239 };
240
241 /* This is called via for_each_rtx. If we find a SUBREG which we
242 could use to decompose a pseudo-register, set a bit in
243 DECOMPOSABLE_CONTEXT. If we find an unadorned register which is
244 not a simple pseudo-register copy, DATA will point at the type of
245 move, and we set a bit in DECOMPOSABLE_CONTEXT or
246 NON_DECOMPOSABLE_CONTEXT as appropriate. */
247
248 static int
249 find_decomposable_subregs (rtx *px, void *data)
250 {
251 enum classify_move_insn *pcmi = (enum classify_move_insn *) data;
252 rtx x = *px;
253
254 if (x == NULL_RTX)
255 return 0;
256
257 if (GET_CODE (x) == SUBREG)
258 {
259 rtx inner = SUBREG_REG (x);
260 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
261
262 if (!REG_P (inner))
263 return 0;
264
265 regno = REGNO (inner);
266 if (HARD_REGISTER_NUM_P (regno))
267 return -1;
268
269 outer_size = GET_MODE_SIZE (GET_MODE (x));
270 inner_size = GET_MODE_SIZE (GET_MODE (inner));
271 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
272 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
273
274 /* We only try to decompose single word subregs of multi-word
275 registers. When we find one, we return -1 to avoid iterating
276 over the inner register.
277
278 ??? This doesn't allow, e.g., DImode subregs of TImode values
279 on 32-bit targets. We would need to record the way the
280 pseudo-register was used, and only decompose if all the uses
281 were the same number and size of pieces. Hopefully this
282 doesn't happen much. */
283
284 if (outer_words == 1 && inner_words > 1)
285 {
286 bitmap_set_bit (decomposable_context, regno);
287 return -1;
288 }
289
290 /* If this is a cast from one mode to another, where the modes
291 have the same size, and they are not tieable, then mark this
292 register as non-decomposable. If we decompose it we are
293 likely to mess up whatever the backend is trying to do. */
294 if (outer_words > 1
295 && outer_size == inner_size
296 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
297 {
298 bitmap_set_bit (non_decomposable_context, regno);
299 bitmap_set_bit (subreg_context, regno);
300 return -1;
301 }
302 }
303 else if (REG_P (x))
304 {
305 unsigned int regno;
306
307 /* We will see an outer SUBREG before we see the inner REG, so
308 when we see a plain REG here it means a direct reference to
309 the register.
310
311 If this is not a simple copy from one location to another,
312 then we can not decompose this register. If this is a simple
313 copy from one pseudo-register to another, and the mode is right
314 then we mark the register as decomposable.
315 Otherwise we don't say anything about this register --
316 it could be decomposed, but whether that would be
317 profitable depends upon how it is used elsewhere.
318
319 We only set bits in the bitmap for multi-word
320 pseudo-registers, since those are the only ones we care about
321 and it keeps the size of the bitmaps down. */
322
323 regno = REGNO (x);
324 if (!HARD_REGISTER_NUM_P (regno)
325 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
326 {
327 switch (*pcmi)
328 {
329 case NOT_SIMPLE_MOVE:
330 bitmap_set_bit (non_decomposable_context, regno);
331 break;
332 case SIMPLE_PSEUDO_REG_MOVE:
333 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
334 bitmap_set_bit (decomposable_context, regno);
335 break;
336 case SIMPLE_MOVE:
337 break;
338 default:
339 gcc_unreachable ();
340 }
341 }
342 }
343 else if (MEM_P (x))
344 {
345 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
346
347 /* Any registers used in a MEM do not participate in a
348 SIMPLE_MOVE or SIMPLE_PSEUDO_REG_MOVE. Do our own recursion
349 here, and return -1 to block the parent's recursion. */
350 for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
351 return -1;
352 }
353
354 return 0;
355 }
356
357 /* Decompose REGNO into word-sized components. We smash the REG node
358 in place. This ensures that (1) something goes wrong quickly if we
359 fail to make some replacement, and (2) the debug information inside
360 the symbol table is automatically kept up to date. */
361
362 static void
363 decompose_register (unsigned int regno)
364 {
365 rtx reg;
366 unsigned int words, i;
367 rtvec v;
368
369 reg = regno_reg_rtx[regno];
370
371 regno_reg_rtx[regno] = NULL_RTX;
372
373 words = GET_MODE_SIZE (GET_MODE (reg));
374 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
375
376 v = rtvec_alloc (words);
377 for (i = 0; i < words; ++i)
378 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
379
380 PUT_CODE (reg, CONCATN);
381 XVEC (reg, 0) = v;
382
383 if (dump_file)
384 {
385 fprintf (dump_file, "; Splitting reg %u ->", regno);
386 for (i = 0; i < words; ++i)
387 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
388 fputc ('\n', dump_file);
389 }
390 }
391
392 /* Get a SUBREG of a CONCATN. */
393
394 static rtx
395 simplify_subreg_concatn (enum machine_mode outermode, rtx op,
396 unsigned int byte)
397 {
398 unsigned int inner_size;
399 enum machine_mode innermode, partmode;
400 rtx part;
401 unsigned int final_offset;
402
403 gcc_assert (GET_CODE (op) == CONCATN);
404 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
405
406 innermode = GET_MODE (op);
407 gcc_assert (byte < GET_MODE_SIZE (innermode));
408 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
409
410 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
411 part = XVECEXP (op, 0, byte / inner_size);
412 partmode = GET_MODE (part);
413
414 if (partmode == VOIDmode)
415 {
416 gcc_assert (VECTOR_MODE_P (innermode));
417 partmode = GET_MODE_INNER (innermode);
418 }
419
420 final_offset = byte % inner_size;
421 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
422 return NULL_RTX;
423
424 return simplify_gen_subreg (outermode, part, partmode, final_offset);
425 }
426
427 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
428
429 static rtx
430 simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op,
431 enum machine_mode innermode, unsigned int byte)
432 {
433 rtx ret;
434
435 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
436 If OP is a SUBREG of a CONCATN, then it must be a simple mode
437 change with the same size and offset 0, or it must extract a
438 part. We shouldn't see anything else here. */
439 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
440 {
441 rtx op2;
442
443 if ((GET_MODE_SIZE (GET_MODE (op))
444 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
445 && SUBREG_BYTE (op) == 0)
446 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
447 GET_MODE (SUBREG_REG (op)), byte);
448
449 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
450 SUBREG_BYTE (op));
451 if (op2 == NULL_RTX)
452 {
453 /* We don't handle paradoxical subregs here. */
454 gcc_assert (GET_MODE_SIZE (outermode)
455 <= GET_MODE_SIZE (GET_MODE (op)));
456 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
457 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
458 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
459 byte + SUBREG_BYTE (op));
460 gcc_assert (op2 != NULL_RTX);
461 return op2;
462 }
463
464 op = op2;
465 gcc_assert (op != NULL_RTX);
466 gcc_assert (innermode == GET_MODE (op));
467 }
468
469 if (GET_CODE (op) == CONCATN)
470 return simplify_subreg_concatn (outermode, op, byte);
471
472 ret = simplify_gen_subreg (outermode, op, innermode, byte);
473
474 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
475 resolve_simple_move will ask for the high part of the paradoxical
476 subreg, which does not have a value. Just return a zero. */
477 if (ret == NULL_RTX
478 && GET_CODE (op) == SUBREG
479 && SUBREG_BYTE (op) == 0
480 && (GET_MODE_SIZE (innermode)
481 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
482 return CONST0_RTX (outermode);
483
484 gcc_assert (ret != NULL_RTX);
485 return ret;
486 }
487
488 /* Return whether we should resolve X into the registers into which it
489 was decomposed. */
490
491 static bool
492 resolve_reg_p (rtx x)
493 {
494 return GET_CODE (x) == CONCATN;
495 }
496
497 /* Return whether X is a SUBREG of a register which we need to
498 resolve. */
499
500 static bool
501 resolve_subreg_p (rtx x)
502 {
503 if (GET_CODE (x) != SUBREG)
504 return false;
505 return resolve_reg_p (SUBREG_REG (x));
506 }
507
508 /* This is called via for_each_rtx. Look for SUBREGs which need to be
509 decomposed. */
510
511 static int
512 resolve_subreg_use (rtx *px, void *data)
513 {
514 rtx insn = (rtx) data;
515 rtx x = *px;
516
517 if (x == NULL_RTX)
518 return 0;
519
520 if (resolve_subreg_p (x))
521 {
522 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
523 SUBREG_BYTE (x));
524
525 /* It is possible for a note to contain a reference which we can
526 decompose. In this case, return 1 to the caller to indicate
527 that the note must be removed. */
528 if (!x)
529 {
530 gcc_assert (!insn);
531 return 1;
532 }
533
534 validate_change (insn, px, x, 1);
535 return -1;
536 }
537
538 if (resolve_reg_p (x))
539 {
540 /* Return 1 to the caller to indicate that we found a direct
541 reference to a register which is being decomposed. This can
542 happen inside notes, multiword shift or zero-extend
543 instructions. */
544 return 1;
545 }
546
547 return 0;
548 }
549
550 /* This is called via for_each_rtx. Look for SUBREGs which can be
551 decomposed and decomposed REGs that need copying. */
552
553 static int
554 adjust_decomposed_uses (rtx *px, void *data ATTRIBUTE_UNUSED)
555 {
556 rtx x = *px;
557
558 if (x == NULL_RTX)
559 return 0;
560
561 if (resolve_subreg_p (x))
562 {
563 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
564 SUBREG_BYTE (x));
565
566 if (x)
567 *px = x;
568 else
569 x = copy_rtx (*px);
570 }
571
572 if (resolve_reg_p (x))
573 *px = copy_rtx (x);
574
575 return 0;
576 }
577
578 /* Resolve any decomposed registers which appear in register notes on
579 INSN. */
580
581 static void
582 resolve_reg_notes (rtx insn)
583 {
584 rtx *pnote, note;
585
586 note = find_reg_equal_equiv_note (insn);
587 if (note)
588 {
589 int old_count = num_validated_changes ();
590 if (for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL))
591 remove_note (insn, note);
592 else
593 if (old_count != num_validated_changes ())
594 df_notes_rescan (insn);
595 }
596
597 pnote = &REG_NOTES (insn);
598 while (*pnote != NULL_RTX)
599 {
600 bool del = false;
601
602 note = *pnote;
603 switch (REG_NOTE_KIND (note))
604 {
605 case REG_DEAD:
606 case REG_UNUSED:
607 if (resolve_reg_p (XEXP (note, 0)))
608 del = true;
609 break;
610
611 default:
612 break;
613 }
614
615 if (del)
616 *pnote = XEXP (note, 1);
617 else
618 pnote = &XEXP (note, 1);
619 }
620 }
621
622 /* Return whether X can be decomposed into subwords. */
623
624 static bool
625 can_decompose_p (rtx x)
626 {
627 if (REG_P (x))
628 {
629 unsigned int regno = REGNO (x);
630
631 if (HARD_REGISTER_NUM_P (regno))
632 return (validate_subreg (word_mode, GET_MODE (x), x, UNITS_PER_WORD)
633 && HARD_REGNO_MODE_OK (regno, word_mode));
634 else
635 return !bitmap_bit_p (subreg_context, regno);
636 }
637
638 return true;
639 }
640
641 /* Decompose the registers used in a simple move SET within INSN. If
642 we don't change anything, return INSN, otherwise return the start
643 of the sequence of moves. */
644
645 static rtx
646 resolve_simple_move (rtx set, rtx insn)
647 {
648 rtx src, dest, real_dest, insns;
649 enum machine_mode orig_mode, dest_mode;
650 unsigned int words;
651 bool pushing;
652
653 src = SET_SRC (set);
654 dest = SET_DEST (set);
655 orig_mode = GET_MODE (dest);
656
657 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
658 if (words <= 1)
659 return insn;
660
661 start_sequence ();
662
663 /* We have to handle copying from a SUBREG of a decomposed reg where
664 the SUBREG is larger than word size. Rather than assume that we
665 can take a word_mode SUBREG of the destination, we copy to a new
666 register and then copy that to the destination. */
667
668 real_dest = NULL_RTX;
669
670 if (GET_CODE (src) == SUBREG
671 && resolve_reg_p (SUBREG_REG (src))
672 && (SUBREG_BYTE (src) != 0
673 || (GET_MODE_SIZE (orig_mode)
674 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
675 {
676 real_dest = dest;
677 dest = gen_reg_rtx (orig_mode);
678 if (REG_P (real_dest))
679 REG_ATTRS (dest) = REG_ATTRS (real_dest);
680 }
681
682 /* Similarly if we are copying to a SUBREG of a decomposed reg where
683 the SUBREG is larger than word size. */
684
685 if (GET_CODE (dest) == SUBREG
686 && resolve_reg_p (SUBREG_REG (dest))
687 && (SUBREG_BYTE (dest) != 0
688 || (GET_MODE_SIZE (orig_mode)
689 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
690 {
691 rtx reg, minsn, smove;
692
693 reg = gen_reg_rtx (orig_mode);
694 minsn = emit_move_insn (reg, src);
695 smove = single_set (minsn);
696 gcc_assert (smove != NULL_RTX);
697 resolve_simple_move (smove, minsn);
698 src = reg;
699 }
700
701 /* If we didn't have any big SUBREGS of decomposed registers, and
702 neither side of the move is a register we are decomposing, then
703 we don't have to do anything here. */
704
705 if (src == SET_SRC (set)
706 && dest == SET_DEST (set)
707 && !resolve_reg_p (src)
708 && !resolve_subreg_p (src)
709 && !resolve_reg_p (dest)
710 && !resolve_subreg_p (dest))
711 {
712 end_sequence ();
713 return insn;
714 }
715
716 /* It's possible for the code to use a subreg of a decomposed
717 register while forming an address. We need to handle that before
718 passing the address to emit_move_insn. We pass NULL_RTX as the
719 insn parameter to resolve_subreg_use because we can not validate
720 the insn yet. */
721 if (MEM_P (src) || MEM_P (dest))
722 {
723 int acg;
724
725 if (MEM_P (src))
726 for_each_rtx (&XEXP (src, 0), resolve_subreg_use, NULL_RTX);
727 if (MEM_P (dest))
728 for_each_rtx (&XEXP (dest, 0), resolve_subreg_use, NULL_RTX);
729 acg = apply_change_group ();
730 gcc_assert (acg);
731 }
732
733 /* If SRC is a register which we can't decompose, or has side
734 effects, we need to move via a temporary register. */
735
736 if (!can_decompose_p (src)
737 || side_effects_p (src)
738 || GET_CODE (src) == ASM_OPERANDS)
739 {
740 rtx reg;
741
742 reg = gen_reg_rtx (orig_mode);
743 emit_move_insn (reg, src);
744 src = reg;
745 }
746
747 /* If DEST is a register which we can't decompose, or has side
748 effects, we need to first move to a temporary register. We
749 handle the common case of pushing an operand directly. We also
750 go through a temporary register if it holds a floating point
751 value. This gives us better code on systems which can't move
752 data easily between integer and floating point registers. */
753
754 dest_mode = orig_mode;
755 pushing = push_operand (dest, dest_mode);
756 if (!can_decompose_p (dest)
757 || (side_effects_p (dest) && !pushing)
758 || (!SCALAR_INT_MODE_P (dest_mode)
759 && !resolve_reg_p (dest)
760 && !resolve_subreg_p (dest)))
761 {
762 if (real_dest == NULL_RTX)
763 real_dest = dest;
764 if (!SCALAR_INT_MODE_P (dest_mode))
765 {
766 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
767 MODE_INT, 0);
768 gcc_assert (dest_mode != BLKmode);
769 }
770 dest = gen_reg_rtx (dest_mode);
771 if (REG_P (real_dest))
772 REG_ATTRS (dest) = REG_ATTRS (real_dest);
773 }
774
775 if (pushing)
776 {
777 unsigned int i, j, jinc;
778
779 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
780 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
781 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
782
783 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
784 {
785 j = 0;
786 jinc = 1;
787 }
788 else
789 {
790 j = words - 1;
791 jinc = -1;
792 }
793
794 for (i = 0; i < words; ++i, j += jinc)
795 {
796 rtx temp;
797
798 temp = copy_rtx (XEXP (dest, 0));
799 temp = adjust_automodify_address_nv (dest, word_mode, temp,
800 j * UNITS_PER_WORD);
801 emit_move_insn (temp,
802 simplify_gen_subreg_concatn (word_mode, src,
803 orig_mode,
804 j * UNITS_PER_WORD));
805 }
806 }
807 else
808 {
809 unsigned int i;
810
811 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
812 emit_clobber (dest);
813
814 for (i = 0; i < words; ++i)
815 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
816 dest_mode,
817 i * UNITS_PER_WORD),
818 simplify_gen_subreg_concatn (word_mode, src,
819 orig_mode,
820 i * UNITS_PER_WORD));
821 }
822
823 if (real_dest != NULL_RTX)
824 {
825 rtx mdest, minsn, smove;
826
827 if (dest_mode == orig_mode)
828 mdest = dest;
829 else
830 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
831 minsn = emit_move_insn (real_dest, mdest);
832
833 smove = single_set (minsn);
834 gcc_assert (smove != NULL_RTX);
835
836 resolve_simple_move (smove, minsn);
837 }
838
839 insns = get_insns ();
840 end_sequence ();
841
842 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
843
844 emit_insn_before (insns, insn);
845
846 delete_insn (insn);
847
848 return insns;
849 }
850
851 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
852 component registers. Return whether we changed something. */
853
854 static bool
855 resolve_clobber (rtx pat, rtx insn)
856 {
857 rtx reg;
858 enum machine_mode orig_mode;
859 unsigned int words, i;
860 int ret;
861
862 reg = XEXP (pat, 0);
863 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
864 return false;
865
866 orig_mode = GET_MODE (reg);
867 words = GET_MODE_SIZE (orig_mode);
868 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
869
870 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
871 simplify_gen_subreg_concatn (word_mode, reg,
872 orig_mode, 0),
873 0);
874 df_insn_rescan (insn);
875 gcc_assert (ret != 0);
876
877 for (i = words - 1; i > 0; --i)
878 {
879 rtx x;
880
881 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
882 i * UNITS_PER_WORD);
883 x = gen_rtx_CLOBBER (VOIDmode, x);
884 emit_insn_after (x, insn);
885 }
886
887 resolve_reg_notes (insn);
888
889 return true;
890 }
891
892 /* A USE of a decomposed register is no longer meaningful. Return
893 whether we changed something. */
894
895 static bool
896 resolve_use (rtx pat, rtx insn)
897 {
898 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
899 {
900 delete_insn (insn);
901 return true;
902 }
903
904 resolve_reg_notes (insn);
905
906 return false;
907 }
908
909 /* A VAR_LOCATION can be simplified. */
910
911 static void
912 resolve_debug (rtx insn)
913 {
914 for_each_rtx (&PATTERN (insn), adjust_decomposed_uses, NULL_RTX);
915
916 df_insn_rescan (insn);
917
918 resolve_reg_notes (insn);
919 }
920
921 /* Checks if INSN is a decomposable multiword-shift or zero-extend and
922 sets the decomposable_context bitmap accordingly. A non-zero value
923 is returned if a decomposable insn has been found. */
924
925 static int
926 find_decomposable_shift_zext (rtx insn)
927 {
928 rtx set;
929 rtx op;
930 rtx op_operand;
931
932 set = single_set (insn);
933 if (!set)
934 return 0;
935
936 op = SET_SRC (set);
937 if (GET_CODE (op) != ASHIFT
938 && GET_CODE (op) != LSHIFTRT
939 && GET_CODE (op) != ZERO_EXTEND)
940 return 0;
941
942 op_operand = XEXP (op, 0);
943 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
944 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
945 || HARD_REGISTER_NUM_P (REGNO (op_operand))
946 || !SCALAR_INT_MODE_P (GET_MODE (op)))
947 return 0;
948
949 if (GET_CODE (op) == ZERO_EXTEND)
950 {
951 if (GET_MODE (op_operand) != word_mode
952 || GET_MODE_BITSIZE (GET_MODE (op)) != 2 * BITS_PER_WORD)
953 return 0;
954 }
955 else /* left or right shift */
956 {
957 if (!CONST_INT_P (XEXP (op, 1))
958 || INTVAL (XEXP (op, 1)) < BITS_PER_WORD
959 || GET_MODE_BITSIZE (GET_MODE (op_operand)) != 2 * BITS_PER_WORD)
960 return 0;
961 }
962
963 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
964
965 if (GET_CODE (op) != ZERO_EXTEND)
966 bitmap_set_bit (decomposable_context, REGNO (op_operand));
967
968 return 1;
969 }
970
971 /* Decompose a more than word wide shift (in INSN) of a multiword
972 pseudo or a multiword zero-extend of a wordmode pseudo into a move
973 and 'set to zero' insn. Return a pointer to the new insn when a
974 replacement was done. */
975
976 static rtx
977 resolve_shift_zext (rtx insn)
978 {
979 rtx set;
980 rtx op;
981 rtx op_operand;
982 rtx insns;
983 rtx src_reg, dest_reg, dest_zero;
984 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
985
986 set = single_set (insn);
987 if (!set)
988 return NULL_RTX;
989
990 op = SET_SRC (set);
991 if (GET_CODE (op) != ASHIFT
992 && GET_CODE (op) != LSHIFTRT
993 && GET_CODE (op) != ZERO_EXTEND)
994 return NULL_RTX;
995
996 op_operand = XEXP (op, 0);
997
998 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
999 return NULL_RTX;
1000
1001 /* src_reg_num is the number of the word mode register which we
1002 are operating on. For a left shift and a zero_extend on little
1003 endian machines this is register 0. */
1004 src_reg_num = GET_CODE (op) == LSHIFTRT ? 1 : 0;
1005
1006 if (WORDS_BIG_ENDIAN
1007 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1008 src_reg_num = 1 - src_reg_num;
1009
1010 if (GET_CODE (op) == ZERO_EXTEND)
1011 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1012 else
1013 dest_reg_num = 1 - src_reg_num;
1014
1015 offset1 = UNITS_PER_WORD * dest_reg_num;
1016 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1017 src_offset = UNITS_PER_WORD * src_reg_num;
1018
1019 start_sequence ();
1020
1021 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1022 GET_MODE (SET_DEST (set)),
1023 offset1);
1024 dest_zero = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1025 GET_MODE (SET_DEST (set)),
1026 offset2);
1027 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1028 GET_MODE (op_operand),
1029 src_offset);
1030 if (GET_CODE (op) != ZERO_EXTEND)
1031 {
1032 int shift_count = INTVAL (XEXP (op, 1));
1033 if (shift_count > BITS_PER_WORD)
1034 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1035 LSHIFT_EXPR : RSHIFT_EXPR,
1036 word_mode, src_reg,
1037 build_int_cst (NULL_TREE,
1038 shift_count - BITS_PER_WORD),
1039 dest_reg, 1);
1040 }
1041
1042 if (dest_reg != src_reg)
1043 emit_move_insn (dest_reg, src_reg);
1044 emit_move_insn (dest_zero, CONST0_RTX (word_mode));
1045 insns = get_insns ();
1046
1047 end_sequence ();
1048
1049 emit_insn_before (insns, insn);
1050
1051 if (dump_file)
1052 {
1053 rtx in;
1054 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1055 for (in = insns; in != insn; in = NEXT_INSN (in))
1056 fprintf (dump_file, "%d ", INSN_UID (in));
1057 fprintf (dump_file, "\n");
1058 }
1059
1060 delete_insn (insn);
1061 return insns;
1062 }
1063
1064 /* Look for registers which are always accessed via word-sized SUBREGs
1065 or via copies. Decompose these registers into several word-sized
1066 pseudo-registers. */
1067
1068 static void
1069 decompose_multiword_subregs (void)
1070 {
1071 unsigned int max;
1072 basic_block bb;
1073
1074 if (df)
1075 df_set_flags (DF_DEFER_INSN_RESCAN);
1076
1077 max = max_reg_num ();
1078
1079 /* First see if there are any multi-word pseudo-registers. If there
1080 aren't, there is nothing we can do. This should speed up this
1081 pass in the normal case, since it should be faster than scanning
1082 all the insns. */
1083 {
1084 unsigned int i;
1085
1086 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1087 {
1088 if (regno_reg_rtx[i] != NULL
1089 && GET_MODE_SIZE (GET_MODE (regno_reg_rtx[i])) > UNITS_PER_WORD)
1090 break;
1091 }
1092 if (i == max)
1093 return;
1094 }
1095
1096 if (df)
1097 run_word_dce ();
1098
1099 /* FIXME: When the dataflow branch is merged, we can change this
1100 code to look for each multi-word pseudo-register and to find each
1101 insn which sets or uses that register. That should be faster
1102 than scanning all the insns. */
1103
1104 decomposable_context = BITMAP_ALLOC (NULL);
1105 non_decomposable_context = BITMAP_ALLOC (NULL);
1106 subreg_context = BITMAP_ALLOC (NULL);
1107
1108 reg_copy_graph = VEC_alloc (bitmap, heap, max);
1109 VEC_safe_grow (bitmap, heap, reg_copy_graph, max);
1110 memset (VEC_address (bitmap, reg_copy_graph), 0, sizeof (bitmap) * max);
1111
1112 FOR_EACH_BB (bb)
1113 {
1114 rtx insn;
1115
1116 FOR_BB_INSNS (bb, insn)
1117 {
1118 rtx set;
1119 enum classify_move_insn cmi;
1120 int i, n;
1121
1122 if (!INSN_P (insn)
1123 || GET_CODE (PATTERN (insn)) == CLOBBER
1124 || GET_CODE (PATTERN (insn)) == USE)
1125 continue;
1126
1127 if (find_decomposable_shift_zext (insn))
1128 continue;
1129
1130 recog_memoized (insn);
1131 extract_insn (insn);
1132
1133 set = simple_move (insn);
1134
1135 if (!set)
1136 cmi = NOT_SIMPLE_MOVE;
1137 else
1138 {
1139 if (find_pseudo_copy (set))
1140 cmi = SIMPLE_PSEUDO_REG_MOVE;
1141 else
1142 cmi = SIMPLE_MOVE;
1143 }
1144
1145 n = recog_data.n_operands;
1146 for (i = 0; i < n; ++i)
1147 {
1148 for_each_rtx (&recog_data.operand[i],
1149 find_decomposable_subregs,
1150 &cmi);
1151
1152 /* We handle ASM_OPERANDS as a special case to support
1153 things like x86 rdtsc which returns a DImode value.
1154 We can decompose the output, which will certainly be
1155 operand 0, but not the inputs. */
1156
1157 if (cmi == SIMPLE_MOVE
1158 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1159 {
1160 gcc_assert (i == 0);
1161 cmi = NOT_SIMPLE_MOVE;
1162 }
1163 }
1164 }
1165 }
1166
1167 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1168 if (!bitmap_empty_p (decomposable_context))
1169 {
1170 sbitmap sub_blocks;
1171 unsigned int i;
1172 sbitmap_iterator sbi;
1173 bitmap_iterator iter;
1174 unsigned int regno;
1175
1176 propagate_pseudo_copies ();
1177
1178 sub_blocks = sbitmap_alloc (last_basic_block);
1179 sbitmap_zero (sub_blocks);
1180
1181 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1182 decompose_register (regno);
1183
1184 FOR_EACH_BB (bb)
1185 {
1186 rtx insn;
1187
1188 FOR_BB_INSNS (bb, insn)
1189 {
1190 rtx pat;
1191
1192 if (!INSN_P (insn))
1193 continue;
1194
1195 pat = PATTERN (insn);
1196 if (GET_CODE (pat) == CLOBBER)
1197 resolve_clobber (pat, insn);
1198 else if (GET_CODE (pat) == USE)
1199 resolve_use (pat, insn);
1200 else if (DEBUG_INSN_P (insn))
1201 resolve_debug (insn);
1202 else
1203 {
1204 rtx set;
1205 int i;
1206
1207 recog_memoized (insn);
1208 extract_insn (insn);
1209
1210 set = simple_move (insn);
1211 if (set)
1212 {
1213 rtx orig_insn = insn;
1214 bool cfi = control_flow_insn_p (insn);
1215
1216 /* We can end up splitting loads to multi-word pseudos
1217 into separate loads to machine word size pseudos.
1218 When this happens, we first had one load that can
1219 throw, and after resolve_simple_move we'll have a
1220 bunch of loads (at least two). All those loads may
1221 trap if we can have non-call exceptions, so they
1222 all will end the current basic block. We split the
1223 block after the outer loop over all insns, but we
1224 make sure here that we will be able to split the
1225 basic block and still produce the correct control
1226 flow graph for it. */
1227 gcc_assert (!cfi
1228 || (cfun->can_throw_non_call_exceptions
1229 && can_throw_internal (insn)));
1230
1231 insn = resolve_simple_move (set, insn);
1232 if (insn != orig_insn)
1233 {
1234 recog_memoized (insn);
1235 extract_insn (insn);
1236
1237 if (cfi)
1238 SET_BIT (sub_blocks, bb->index);
1239 }
1240 }
1241 else
1242 {
1243 rtx decomposed_shift;
1244
1245 decomposed_shift = resolve_shift_zext (insn);
1246 if (decomposed_shift != NULL_RTX)
1247 {
1248 insn = decomposed_shift;
1249 recog_memoized (insn);
1250 extract_insn (insn);
1251 }
1252 }
1253
1254 for (i = recog_data.n_operands - 1; i >= 0; --i)
1255 for_each_rtx (recog_data.operand_loc[i],
1256 resolve_subreg_use,
1257 insn);
1258
1259 resolve_reg_notes (insn);
1260
1261 if (num_validated_changes () > 0)
1262 {
1263 for (i = recog_data.n_dups - 1; i >= 0; --i)
1264 {
1265 rtx *pl = recog_data.dup_loc[i];
1266 int dup_num = recog_data.dup_num[i];
1267 rtx *px = recog_data.operand_loc[dup_num];
1268
1269 validate_unshare_change (insn, pl, *px, 1);
1270 }
1271
1272 i = apply_change_group ();
1273 gcc_assert (i);
1274 }
1275 }
1276 }
1277 }
1278
1279 /* If we had insns to split that caused control flow insns in the middle
1280 of a basic block, split those blocks now. Note that we only handle
1281 the case where splitting a load has caused multiple possibly trapping
1282 loads to appear. */
1283 EXECUTE_IF_SET_IN_SBITMAP (sub_blocks, 0, i, sbi)
1284 {
1285 rtx insn, end;
1286 edge fallthru;
1287
1288 bb = BASIC_BLOCK (i);
1289 insn = BB_HEAD (bb);
1290 end = BB_END (bb);
1291
1292 while (insn != end)
1293 {
1294 if (control_flow_insn_p (insn))
1295 {
1296 /* Split the block after insn. There will be a fallthru
1297 edge, which is OK so we keep it. We have to create the
1298 exception edges ourselves. */
1299 fallthru = split_block (bb, insn);
1300 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1301 bb = fallthru->dest;
1302 insn = BB_HEAD (bb);
1303 }
1304 else
1305 insn = NEXT_INSN (insn);
1306 }
1307 }
1308
1309 sbitmap_free (sub_blocks);
1310 }
1311
1312 {
1313 unsigned int i;
1314 bitmap b;
1315
1316 FOR_EACH_VEC_ELT (bitmap, reg_copy_graph, i, b)
1317 if (b)
1318 BITMAP_FREE (b);
1319 }
1320
1321 VEC_free (bitmap, heap, reg_copy_graph);
1322
1323 BITMAP_FREE (decomposable_context);
1324 BITMAP_FREE (non_decomposable_context);
1325 BITMAP_FREE (subreg_context);
1326 }
1327 \f
1328 /* Gate function for lower subreg pass. */
1329
1330 static bool
1331 gate_handle_lower_subreg (void)
1332 {
1333 return flag_split_wide_types != 0;
1334 }
1335
1336 /* Implement first lower subreg pass. */
1337
1338 static unsigned int
1339 rest_of_handle_lower_subreg (void)
1340 {
1341 decompose_multiword_subregs ();
1342 return 0;
1343 }
1344
1345 /* Implement second lower subreg pass. */
1346
1347 static unsigned int
1348 rest_of_handle_lower_subreg2 (void)
1349 {
1350 decompose_multiword_subregs ();
1351 return 0;
1352 }
1353
1354 struct rtl_opt_pass pass_lower_subreg =
1355 {
1356 {
1357 RTL_PASS,
1358 "subreg1", /* name */
1359 gate_handle_lower_subreg, /* gate */
1360 rest_of_handle_lower_subreg, /* execute */
1361 NULL, /* sub */
1362 NULL, /* next */
1363 0, /* static_pass_number */
1364 TV_LOWER_SUBREG, /* tv_id */
1365 0, /* properties_required */
1366 0, /* properties_provided */
1367 0, /* properties_destroyed */
1368 0, /* todo_flags_start */
1369 TODO_dump_func |
1370 TODO_ggc_collect |
1371 TODO_verify_flow /* todo_flags_finish */
1372 }
1373 };
1374
1375 struct rtl_opt_pass pass_lower_subreg2 =
1376 {
1377 {
1378 RTL_PASS,
1379 "subreg2", /* name */
1380 gate_handle_lower_subreg, /* gate */
1381 rest_of_handle_lower_subreg2, /* execute */
1382 NULL, /* sub */
1383 NULL, /* next */
1384 0, /* static_pass_number */
1385 TV_LOWER_SUBREG, /* tv_id */
1386 0, /* properties_required */
1387 0, /* properties_provided */
1388 0, /* properties_destroyed */
1389 0, /* todo_flags_start */
1390 TODO_df_finish | TODO_verify_rtl_sharing |
1391 TODO_dump_func |
1392 TODO_ggc_collect |
1393 TODO_verify_flow /* todo_flags_finish */
1394 }
1395 };