Daily bump.
[gcc.git] / gcc / lower-subreg.c
1 /* Decompose multiword subregs.
2 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Richard Henderson <rth@redhat.com>
5 Ian Lance Taylor <iant@google.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "machmode.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "flags.h"
31 #include "insn-config.h"
32 #include "obstack.h"
33 #include "basic-block.h"
34 #include "recog.h"
35 #include "bitmap.h"
36 #include "dce.h"
37 #include "expr.h"
38 #include "except.h"
39 #include "regs.h"
40 #include "tree-pass.h"
41 #include "df.h"
42 #include "lower-subreg.h"
43
44 #ifdef STACK_GROWS_DOWNWARD
45 # undef STACK_GROWS_DOWNWARD
46 # define STACK_GROWS_DOWNWARD 1
47 #else
48 # define STACK_GROWS_DOWNWARD 0
49 #endif
50
51
52 /* Decompose multi-word pseudo-registers into individual
53 pseudo-registers when possible and profitable. This is possible
54 when all the uses of a multi-word register are via SUBREG, or are
55 copies of the register to another location. Breaking apart the
56 register permits more CSE and permits better register allocation.
57 This is profitable if the machine does not have move instructions
58 to do this.
59
60 This pass only splits moves with modes that are wider than
61 word_mode and ASHIFTs, LSHIFTRTs and ZERO_EXTENDs with integer
62 modes that are twice the width of word_mode. The latter could be
63 generalized if there was a need to do this, but the trend in
64 architectures is to not need this.
65
66 There are two useful preprocessor defines for use by maintainers:
67
68 #define LOG_COSTS 1
69
70 if you wish to see the actual cost estimates that are being used
71 for each mode wider than word mode and the cost estimates for zero
72 extension and the shifts. This can be useful when port maintainers
73 are tuning insn rtx costs.
74
75 #define FORCE_LOWERING 1
76
77 if you wish to test the pass with all the transformation forced on.
78 This can be useful for finding bugs in the transformations. */
79
80 #define LOG_COSTS 0
81 #define FORCE_LOWERING 0
82
83 /* Bit N in this bitmap is set if regno N is used in a context in
84 which we can decompose it. */
85 static bitmap decomposable_context;
86
87 /* Bit N in this bitmap is set if regno N is used in a context in
88 which it can not be decomposed. */
89 static bitmap non_decomposable_context;
90
91 /* Bit N in this bitmap is set if regno N is used in a subreg
92 which changes the mode but not the size. This typically happens
93 when the register accessed as a floating-point value; we want to
94 avoid generating accesses to its subwords in integer modes. */
95 static bitmap subreg_context;
96
97 /* Bit N in the bitmap in element M of this array is set if there is a
98 copy from reg M to reg N. */
99 static vec<bitmap> reg_copy_graph;
100
101 struct target_lower_subreg default_target_lower_subreg;
102 #if SWITCHABLE_TARGET
103 struct target_lower_subreg *this_target_lower_subreg
104 = &default_target_lower_subreg;
105 #endif
106
107 #define twice_word_mode \
108 this_target_lower_subreg->x_twice_word_mode
109 #define choices \
110 this_target_lower_subreg->x_choices
111
112 /* RTXes used while computing costs. */
113 struct cost_rtxes {
114 /* Source and target registers. */
115 rtx source;
116 rtx target;
117
118 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
119 rtx zext;
120
121 /* A shift of SOURCE. */
122 rtx shift;
123
124 /* A SET of TARGET. */
125 rtx set;
126 };
127
128 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
129 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
130
131 static int
132 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
133 enum machine_mode mode, int op1)
134 {
135 PUT_CODE (rtxes->shift, code);
136 PUT_MODE (rtxes->shift, mode);
137 PUT_MODE (rtxes->source, mode);
138 XEXP (rtxes->shift, 1) = GEN_INT (op1);
139 return set_src_cost (rtxes->shift, speed_p);
140 }
141
142 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
143 to true if it is profitable to split a double-word CODE shift
144 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
145 for speed or size profitability.
146
147 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
148 the cost of moving zero into a word-mode register. WORD_MOVE_COST
149 is the cost of moving between word registers. */
150
151 static void
152 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
153 bool *splitting, enum rtx_code code,
154 int word_move_zero_cost, int word_move_cost)
155 {
156 int wide_cost, narrow_cost, i;
157
158 for (i = 0; i < BITS_PER_WORD; i++)
159 {
160 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
161 i + BITS_PER_WORD);
162 if (i == 0)
163 narrow_cost = word_move_cost;
164 else
165 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
166
167 if (LOG_COSTS)
168 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
169 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
170 i + BITS_PER_WORD, wide_cost, narrow_cost,
171 word_move_zero_cost);
172
173 if (FORCE_LOWERING || wide_cost >= narrow_cost + word_move_zero_cost)
174 splitting[i] = true;
175 }
176 }
177
178 /* Compute what we should do when optimizing for speed or size; SPEED_P
179 selects which. Use RTXES for computing costs. */
180
181 static void
182 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
183 {
184 unsigned int i;
185 int word_move_zero_cost, word_move_cost;
186
187 PUT_MODE (rtxes->target, word_mode);
188 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
189 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
190
191 SET_SRC (rtxes->set) = rtxes->source;
192 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
193
194 if (LOG_COSTS)
195 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
196 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
197
198 for (i = 0; i < MAX_MACHINE_MODE; i++)
199 {
200 enum machine_mode mode = (enum machine_mode) i;
201 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
202 if (factor > 1)
203 {
204 int mode_move_cost;
205
206 PUT_MODE (rtxes->target, mode);
207 PUT_MODE (rtxes->source, mode);
208 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
209
210 if (LOG_COSTS)
211 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
212 GET_MODE_NAME (mode), mode_move_cost,
213 word_move_cost, factor);
214
215 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
216 {
217 choices[speed_p].move_modes_to_split[i] = true;
218 choices[speed_p].something_to_do = true;
219 }
220 }
221 }
222
223 /* For the moves and shifts, the only case that is checked is one
224 where the mode of the target is an integer mode twice the width
225 of the word_mode.
226
227 If it is not profitable to split a double word move then do not
228 even consider the shifts or the zero extension. */
229 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
230 {
231 int zext_cost;
232
233 /* The only case here to check to see if moving the upper part with a
234 zero is cheaper than doing the zext itself. */
235 PUT_MODE (rtxes->source, word_mode);
236 zext_cost = set_src_cost (rtxes->zext, speed_p);
237
238 if (LOG_COSTS)
239 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
240 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
241 zext_cost, word_move_cost, word_move_zero_cost);
242
243 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
244 choices[speed_p].splitting_zext = true;
245
246 compute_splitting_shift (speed_p, rtxes,
247 choices[speed_p].splitting_ashift, ASHIFT,
248 word_move_zero_cost, word_move_cost);
249 compute_splitting_shift (speed_p, rtxes,
250 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
251 word_move_zero_cost, word_move_cost);
252 }
253 }
254
255 /* Do one-per-target initialisation. This involves determining
256 which operations on the machine are profitable. If none are found,
257 then the pass just returns when called. */
258
259 void
260 init_lower_subreg (void)
261 {
262 struct cost_rtxes rtxes;
263
264 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
265
266 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
267
268 rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
269 rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
270 rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
271 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
272 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
273
274 if (LOG_COSTS)
275 fprintf (stderr, "\nSize costs\n==========\n\n");
276 compute_costs (false, &rtxes);
277
278 if (LOG_COSTS)
279 fprintf (stderr, "\nSpeed costs\n===========\n\n");
280 compute_costs (true, &rtxes);
281 }
282
283 static bool
284 simple_move_operand (rtx x)
285 {
286 if (GET_CODE (x) == SUBREG)
287 x = SUBREG_REG (x);
288
289 if (!OBJECT_P (x))
290 return false;
291
292 if (GET_CODE (x) == LABEL_REF
293 || GET_CODE (x) == SYMBOL_REF
294 || GET_CODE (x) == HIGH
295 || GET_CODE (x) == CONST)
296 return false;
297
298 if (MEM_P (x)
299 && (MEM_VOLATILE_P (x)
300 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
301 return false;
302
303 return true;
304 }
305
306 /* If INSN is a single set between two objects that we want to split,
307 return the single set. SPEED_P says whether we are optimizing
308 INSN for speed or size.
309
310 INSN should have been passed to recog and extract_insn before this
311 is called. */
312
313 static rtx
314 simple_move (rtx insn, bool speed_p)
315 {
316 rtx x;
317 rtx set;
318 enum machine_mode mode;
319
320 if (recog_data.n_operands != 2)
321 return NULL_RTX;
322
323 set = single_set (insn);
324 if (!set)
325 return NULL_RTX;
326
327 x = SET_DEST (set);
328 if (x != recog_data.operand[0] && x != recog_data.operand[1])
329 return NULL_RTX;
330 if (!simple_move_operand (x))
331 return NULL_RTX;
332
333 x = SET_SRC (set);
334 if (x != recog_data.operand[0] && x != recog_data.operand[1])
335 return NULL_RTX;
336 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
337 things like x86 rdtsc which returns a DImode value. */
338 if (GET_CODE (x) != ASM_OPERANDS
339 && !simple_move_operand (x))
340 return NULL_RTX;
341
342 /* We try to decompose in integer modes, to avoid generating
343 inefficient code copying between integer and floating point
344 registers. That means that we can't decompose if this is a
345 non-integer mode for which there is no integer mode of the same
346 size. */
347 mode = GET_MODE (SET_SRC (set));
348 if (!SCALAR_INT_MODE_P (mode)
349 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
350 == BLKmode))
351 return NULL_RTX;
352
353 /* Reject PARTIAL_INT modes. They are used for processor specific
354 purposes and it's probably best not to tamper with them. */
355 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
356 return NULL_RTX;
357
358 if (!choices[speed_p].move_modes_to_split[(int) mode])
359 return NULL_RTX;
360
361 return set;
362 }
363
364 /* If SET is a copy from one multi-word pseudo-register to another,
365 record that in reg_copy_graph. Return whether it is such a
366 copy. */
367
368 static bool
369 find_pseudo_copy (rtx set)
370 {
371 rtx dest = SET_DEST (set);
372 rtx src = SET_SRC (set);
373 unsigned int rd, rs;
374 bitmap b;
375
376 if (!REG_P (dest) || !REG_P (src))
377 return false;
378
379 rd = REGNO (dest);
380 rs = REGNO (src);
381 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
382 return false;
383
384 b = reg_copy_graph[rs];
385 if (b == NULL)
386 {
387 b = BITMAP_ALLOC (NULL);
388 reg_copy_graph[rs] = b;
389 }
390
391 bitmap_set_bit (b, rd);
392
393 return true;
394 }
395
396 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
397 where they are copied to another register, add the register to
398 which they are copied to DECOMPOSABLE_CONTEXT. Use
399 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
400 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
401
402 static void
403 propagate_pseudo_copies (void)
404 {
405 bitmap queue, propagate;
406
407 queue = BITMAP_ALLOC (NULL);
408 propagate = BITMAP_ALLOC (NULL);
409
410 bitmap_copy (queue, decomposable_context);
411 do
412 {
413 bitmap_iterator iter;
414 unsigned int i;
415
416 bitmap_clear (propagate);
417
418 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
419 {
420 bitmap b = reg_copy_graph[i];
421 if (b)
422 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
423 }
424
425 bitmap_and_compl (queue, propagate, decomposable_context);
426 bitmap_ior_into (decomposable_context, propagate);
427 }
428 while (!bitmap_empty_p (queue));
429
430 BITMAP_FREE (queue);
431 BITMAP_FREE (propagate);
432 }
433
434 /* A pointer to one of these values is passed to
435 find_decomposable_subregs via for_each_rtx. */
436
437 enum classify_move_insn
438 {
439 /* Not a simple move from one location to another. */
440 NOT_SIMPLE_MOVE,
441 /* A simple move we want to decompose. */
442 DECOMPOSABLE_SIMPLE_MOVE,
443 /* Any other simple move. */
444 SIMPLE_MOVE
445 };
446
447 /* This is called via for_each_rtx. If we find a SUBREG which we
448 could use to decompose a pseudo-register, set a bit in
449 DECOMPOSABLE_CONTEXT. If we find an unadorned register which is
450 not a simple pseudo-register copy, DATA will point at the type of
451 move, and we set a bit in DECOMPOSABLE_CONTEXT or
452 NON_DECOMPOSABLE_CONTEXT as appropriate. */
453
454 static int
455 find_decomposable_subregs (rtx *px, void *data)
456 {
457 enum classify_move_insn *pcmi = (enum classify_move_insn *) data;
458 rtx x = *px;
459
460 if (x == NULL_RTX)
461 return 0;
462
463 if (GET_CODE (x) == SUBREG)
464 {
465 rtx inner = SUBREG_REG (x);
466 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
467
468 if (!REG_P (inner))
469 return 0;
470
471 regno = REGNO (inner);
472 if (HARD_REGISTER_NUM_P (regno))
473 return -1;
474
475 outer_size = GET_MODE_SIZE (GET_MODE (x));
476 inner_size = GET_MODE_SIZE (GET_MODE (inner));
477 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
478 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
479
480 /* We only try to decompose single word subregs of multi-word
481 registers. When we find one, we return -1 to avoid iterating
482 over the inner register.
483
484 ??? This doesn't allow, e.g., DImode subregs of TImode values
485 on 32-bit targets. We would need to record the way the
486 pseudo-register was used, and only decompose if all the uses
487 were the same number and size of pieces. Hopefully this
488 doesn't happen much. */
489
490 if (outer_words == 1 && inner_words > 1)
491 {
492 bitmap_set_bit (decomposable_context, regno);
493 return -1;
494 }
495
496 /* If this is a cast from one mode to another, where the modes
497 have the same size, and they are not tieable, then mark this
498 register as non-decomposable. If we decompose it we are
499 likely to mess up whatever the backend is trying to do. */
500 if (outer_words > 1
501 && outer_size == inner_size
502 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
503 {
504 bitmap_set_bit (non_decomposable_context, regno);
505 bitmap_set_bit (subreg_context, regno);
506 return -1;
507 }
508 }
509 else if (REG_P (x))
510 {
511 unsigned int regno;
512
513 /* We will see an outer SUBREG before we see the inner REG, so
514 when we see a plain REG here it means a direct reference to
515 the register.
516
517 If this is not a simple copy from one location to another,
518 then we can not decompose this register. If this is a simple
519 copy we want to decompose, and the mode is right,
520 then we mark the register as decomposable.
521 Otherwise we don't say anything about this register --
522 it could be decomposed, but whether that would be
523 profitable depends upon how it is used elsewhere.
524
525 We only set bits in the bitmap for multi-word
526 pseudo-registers, since those are the only ones we care about
527 and it keeps the size of the bitmaps down. */
528
529 regno = REGNO (x);
530 if (!HARD_REGISTER_NUM_P (regno)
531 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
532 {
533 switch (*pcmi)
534 {
535 case NOT_SIMPLE_MOVE:
536 bitmap_set_bit (non_decomposable_context, regno);
537 break;
538 case DECOMPOSABLE_SIMPLE_MOVE:
539 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
540 bitmap_set_bit (decomposable_context, regno);
541 break;
542 case SIMPLE_MOVE:
543 break;
544 default:
545 gcc_unreachable ();
546 }
547 }
548 }
549 else if (MEM_P (x))
550 {
551 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
552
553 /* Any registers used in a MEM do not participate in a
554 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
555 here, and return -1 to block the parent's recursion. */
556 for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
557 return -1;
558 }
559
560 return 0;
561 }
562
563 /* Decompose REGNO into word-sized components. We smash the REG node
564 in place. This ensures that (1) something goes wrong quickly if we
565 fail to make some replacement, and (2) the debug information inside
566 the symbol table is automatically kept up to date. */
567
568 static void
569 decompose_register (unsigned int regno)
570 {
571 rtx reg;
572 unsigned int words, i;
573 rtvec v;
574
575 reg = regno_reg_rtx[regno];
576
577 regno_reg_rtx[regno] = NULL_RTX;
578
579 words = GET_MODE_SIZE (GET_MODE (reg));
580 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
581
582 v = rtvec_alloc (words);
583 for (i = 0; i < words; ++i)
584 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
585
586 PUT_CODE (reg, CONCATN);
587 XVEC (reg, 0) = v;
588
589 if (dump_file)
590 {
591 fprintf (dump_file, "; Splitting reg %u ->", regno);
592 for (i = 0; i < words; ++i)
593 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
594 fputc ('\n', dump_file);
595 }
596 }
597
598 /* Get a SUBREG of a CONCATN. */
599
600 static rtx
601 simplify_subreg_concatn (enum machine_mode outermode, rtx op,
602 unsigned int byte)
603 {
604 unsigned int inner_size;
605 enum machine_mode innermode, partmode;
606 rtx part;
607 unsigned int final_offset;
608
609 gcc_assert (GET_CODE (op) == CONCATN);
610 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
611
612 innermode = GET_MODE (op);
613 gcc_assert (byte < GET_MODE_SIZE (innermode));
614 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
615
616 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
617 part = XVECEXP (op, 0, byte / inner_size);
618 partmode = GET_MODE (part);
619
620 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
621 regular CONST_VECTORs. They have vector or integer modes, depending
622 on the capabilities of the target. Cope with them. */
623 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
624 partmode = GET_MODE_INNER (innermode);
625 else if (partmode == VOIDmode)
626 {
627 enum mode_class mclass = GET_MODE_CLASS (innermode);
628 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
629 }
630
631 final_offset = byte % inner_size;
632 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
633 return NULL_RTX;
634
635 return simplify_gen_subreg (outermode, part, partmode, final_offset);
636 }
637
638 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
639
640 static rtx
641 simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op,
642 enum machine_mode innermode, unsigned int byte)
643 {
644 rtx ret;
645
646 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
647 If OP is a SUBREG of a CONCATN, then it must be a simple mode
648 change with the same size and offset 0, or it must extract a
649 part. We shouldn't see anything else here. */
650 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
651 {
652 rtx op2;
653
654 if ((GET_MODE_SIZE (GET_MODE (op))
655 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
656 && SUBREG_BYTE (op) == 0)
657 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
658 GET_MODE (SUBREG_REG (op)), byte);
659
660 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
661 SUBREG_BYTE (op));
662 if (op2 == NULL_RTX)
663 {
664 /* We don't handle paradoxical subregs here. */
665 gcc_assert (GET_MODE_SIZE (outermode)
666 <= GET_MODE_SIZE (GET_MODE (op)));
667 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
668 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
669 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
670 byte + SUBREG_BYTE (op));
671 gcc_assert (op2 != NULL_RTX);
672 return op2;
673 }
674
675 op = op2;
676 gcc_assert (op != NULL_RTX);
677 gcc_assert (innermode == GET_MODE (op));
678 }
679
680 if (GET_CODE (op) == CONCATN)
681 return simplify_subreg_concatn (outermode, op, byte);
682
683 ret = simplify_gen_subreg (outermode, op, innermode, byte);
684
685 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
686 resolve_simple_move will ask for the high part of the paradoxical
687 subreg, which does not have a value. Just return a zero. */
688 if (ret == NULL_RTX
689 && GET_CODE (op) == SUBREG
690 && SUBREG_BYTE (op) == 0
691 && (GET_MODE_SIZE (innermode)
692 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
693 return CONST0_RTX (outermode);
694
695 gcc_assert (ret != NULL_RTX);
696 return ret;
697 }
698
699 /* Return whether we should resolve X into the registers into which it
700 was decomposed. */
701
702 static bool
703 resolve_reg_p (rtx x)
704 {
705 return GET_CODE (x) == CONCATN;
706 }
707
708 /* Return whether X is a SUBREG of a register which we need to
709 resolve. */
710
711 static bool
712 resolve_subreg_p (rtx x)
713 {
714 if (GET_CODE (x) != SUBREG)
715 return false;
716 return resolve_reg_p (SUBREG_REG (x));
717 }
718
719 /* This is called via for_each_rtx. Look for SUBREGs which need to be
720 decomposed. */
721
722 static int
723 resolve_subreg_use (rtx *px, void *data)
724 {
725 rtx insn = (rtx) data;
726 rtx x = *px;
727
728 if (x == NULL_RTX)
729 return 0;
730
731 if (resolve_subreg_p (x))
732 {
733 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
734 SUBREG_BYTE (x));
735
736 /* It is possible for a note to contain a reference which we can
737 decompose. In this case, return 1 to the caller to indicate
738 that the note must be removed. */
739 if (!x)
740 {
741 gcc_assert (!insn);
742 return 1;
743 }
744
745 validate_change (insn, px, x, 1);
746 return -1;
747 }
748
749 if (resolve_reg_p (x))
750 {
751 /* Return 1 to the caller to indicate that we found a direct
752 reference to a register which is being decomposed. This can
753 happen inside notes, multiword shift or zero-extend
754 instructions. */
755 return 1;
756 }
757
758 return 0;
759 }
760
761 /* This is called via for_each_rtx. Look for SUBREGs which can be
762 decomposed and decomposed REGs that need copying. */
763
764 static int
765 adjust_decomposed_uses (rtx *px, void *data ATTRIBUTE_UNUSED)
766 {
767 rtx x = *px;
768
769 if (x == NULL_RTX)
770 return 0;
771
772 if (resolve_subreg_p (x))
773 {
774 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
775 SUBREG_BYTE (x));
776
777 if (x)
778 *px = x;
779 else
780 x = copy_rtx (*px);
781 }
782
783 if (resolve_reg_p (x))
784 *px = copy_rtx (x);
785
786 return 0;
787 }
788
789 /* Resolve any decomposed registers which appear in register notes on
790 INSN. */
791
792 static void
793 resolve_reg_notes (rtx insn)
794 {
795 rtx *pnote, note;
796
797 note = find_reg_equal_equiv_note (insn);
798 if (note)
799 {
800 int old_count = num_validated_changes ();
801 if (for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL))
802 remove_note (insn, note);
803 else
804 if (old_count != num_validated_changes ())
805 df_notes_rescan (insn);
806 }
807
808 pnote = &REG_NOTES (insn);
809 while (*pnote != NULL_RTX)
810 {
811 bool del = false;
812
813 note = *pnote;
814 switch (REG_NOTE_KIND (note))
815 {
816 case REG_DEAD:
817 case REG_UNUSED:
818 if (resolve_reg_p (XEXP (note, 0)))
819 del = true;
820 break;
821
822 default:
823 break;
824 }
825
826 if (del)
827 *pnote = XEXP (note, 1);
828 else
829 pnote = &XEXP (note, 1);
830 }
831 }
832
833 /* Return whether X can be decomposed into subwords. */
834
835 static bool
836 can_decompose_p (rtx x)
837 {
838 if (REG_P (x))
839 {
840 unsigned int regno = REGNO (x);
841
842 if (HARD_REGISTER_NUM_P (regno))
843 {
844 unsigned int byte, num_bytes;
845
846 num_bytes = GET_MODE_SIZE (GET_MODE (x));
847 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
848 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
849 return false;
850 return true;
851 }
852 else
853 return !bitmap_bit_p (subreg_context, regno);
854 }
855
856 return true;
857 }
858
859 /* Decompose the registers used in a simple move SET within INSN. If
860 we don't change anything, return INSN, otherwise return the start
861 of the sequence of moves. */
862
863 static rtx
864 resolve_simple_move (rtx set, rtx insn)
865 {
866 rtx src, dest, real_dest, insns;
867 enum machine_mode orig_mode, dest_mode;
868 unsigned int words;
869 bool pushing;
870
871 src = SET_SRC (set);
872 dest = SET_DEST (set);
873 orig_mode = GET_MODE (dest);
874
875 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
876 gcc_assert (words > 1);
877
878 start_sequence ();
879
880 /* We have to handle copying from a SUBREG of a decomposed reg where
881 the SUBREG is larger than word size. Rather than assume that we
882 can take a word_mode SUBREG of the destination, we copy to a new
883 register and then copy that to the destination. */
884
885 real_dest = NULL_RTX;
886
887 if (GET_CODE (src) == SUBREG
888 && resolve_reg_p (SUBREG_REG (src))
889 && (SUBREG_BYTE (src) != 0
890 || (GET_MODE_SIZE (orig_mode)
891 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
892 {
893 real_dest = dest;
894 dest = gen_reg_rtx (orig_mode);
895 if (REG_P (real_dest))
896 REG_ATTRS (dest) = REG_ATTRS (real_dest);
897 }
898
899 /* Similarly if we are copying to a SUBREG of a decomposed reg where
900 the SUBREG is larger than word size. */
901
902 if (GET_CODE (dest) == SUBREG
903 && resolve_reg_p (SUBREG_REG (dest))
904 && (SUBREG_BYTE (dest) != 0
905 || (GET_MODE_SIZE (orig_mode)
906 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
907 {
908 rtx reg, minsn, smove;
909
910 reg = gen_reg_rtx (orig_mode);
911 minsn = emit_move_insn (reg, src);
912 smove = single_set (minsn);
913 gcc_assert (smove != NULL_RTX);
914 resolve_simple_move (smove, minsn);
915 src = reg;
916 }
917
918 /* If we didn't have any big SUBREGS of decomposed registers, and
919 neither side of the move is a register we are decomposing, then
920 we don't have to do anything here. */
921
922 if (src == SET_SRC (set)
923 && dest == SET_DEST (set)
924 && !resolve_reg_p (src)
925 && !resolve_subreg_p (src)
926 && !resolve_reg_p (dest)
927 && !resolve_subreg_p (dest))
928 {
929 end_sequence ();
930 return insn;
931 }
932
933 /* It's possible for the code to use a subreg of a decomposed
934 register while forming an address. We need to handle that before
935 passing the address to emit_move_insn. We pass NULL_RTX as the
936 insn parameter to resolve_subreg_use because we can not validate
937 the insn yet. */
938 if (MEM_P (src) || MEM_P (dest))
939 {
940 int acg;
941
942 if (MEM_P (src))
943 for_each_rtx (&XEXP (src, 0), resolve_subreg_use, NULL_RTX);
944 if (MEM_P (dest))
945 for_each_rtx (&XEXP (dest, 0), resolve_subreg_use, NULL_RTX);
946 acg = apply_change_group ();
947 gcc_assert (acg);
948 }
949
950 /* If SRC is a register which we can't decompose, or has side
951 effects, we need to move via a temporary register. */
952
953 if (!can_decompose_p (src)
954 || side_effects_p (src)
955 || GET_CODE (src) == ASM_OPERANDS)
956 {
957 rtx reg;
958
959 reg = gen_reg_rtx (orig_mode);
960 emit_move_insn (reg, src);
961 src = reg;
962 }
963
964 /* If DEST is a register which we can't decompose, or has side
965 effects, we need to first move to a temporary register. We
966 handle the common case of pushing an operand directly. We also
967 go through a temporary register if it holds a floating point
968 value. This gives us better code on systems which can't move
969 data easily between integer and floating point registers. */
970
971 dest_mode = orig_mode;
972 pushing = push_operand (dest, dest_mode);
973 if (!can_decompose_p (dest)
974 || (side_effects_p (dest) && !pushing)
975 || (!SCALAR_INT_MODE_P (dest_mode)
976 && !resolve_reg_p (dest)
977 && !resolve_subreg_p (dest)))
978 {
979 if (real_dest == NULL_RTX)
980 real_dest = dest;
981 if (!SCALAR_INT_MODE_P (dest_mode))
982 {
983 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
984 MODE_INT, 0);
985 gcc_assert (dest_mode != BLKmode);
986 }
987 dest = gen_reg_rtx (dest_mode);
988 if (REG_P (real_dest))
989 REG_ATTRS (dest) = REG_ATTRS (real_dest);
990 }
991
992 if (pushing)
993 {
994 unsigned int i, j, jinc;
995
996 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
997 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
998 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
999
1000 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1001 {
1002 j = 0;
1003 jinc = 1;
1004 }
1005 else
1006 {
1007 j = words - 1;
1008 jinc = -1;
1009 }
1010
1011 for (i = 0; i < words; ++i, j += jinc)
1012 {
1013 rtx temp;
1014
1015 temp = copy_rtx (XEXP (dest, 0));
1016 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1017 j * UNITS_PER_WORD);
1018 emit_move_insn (temp,
1019 simplify_gen_subreg_concatn (word_mode, src,
1020 orig_mode,
1021 j * UNITS_PER_WORD));
1022 }
1023 }
1024 else
1025 {
1026 unsigned int i;
1027
1028 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1029 emit_clobber (dest);
1030
1031 for (i = 0; i < words; ++i)
1032 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1033 dest_mode,
1034 i * UNITS_PER_WORD),
1035 simplify_gen_subreg_concatn (word_mode, src,
1036 orig_mode,
1037 i * UNITS_PER_WORD));
1038 }
1039
1040 if (real_dest != NULL_RTX)
1041 {
1042 rtx mdest, minsn, smove;
1043
1044 if (dest_mode == orig_mode)
1045 mdest = dest;
1046 else
1047 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1048 minsn = emit_move_insn (real_dest, mdest);
1049
1050 smove = single_set (minsn);
1051 gcc_assert (smove != NULL_RTX);
1052
1053 resolve_simple_move (smove, minsn);
1054 }
1055
1056 insns = get_insns ();
1057 end_sequence ();
1058
1059 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1060
1061 emit_insn_before (insns, insn);
1062
1063 delete_insn (insn);
1064
1065 return insns;
1066 }
1067
1068 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1069 component registers. Return whether we changed something. */
1070
1071 static bool
1072 resolve_clobber (rtx pat, rtx insn)
1073 {
1074 rtx reg;
1075 enum machine_mode orig_mode;
1076 unsigned int words, i;
1077 int ret;
1078
1079 reg = XEXP (pat, 0);
1080 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1081 return false;
1082
1083 orig_mode = GET_MODE (reg);
1084 words = GET_MODE_SIZE (orig_mode);
1085 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1086
1087 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1088 simplify_gen_subreg_concatn (word_mode, reg,
1089 orig_mode, 0),
1090 0);
1091 df_insn_rescan (insn);
1092 gcc_assert (ret != 0);
1093
1094 for (i = words - 1; i > 0; --i)
1095 {
1096 rtx x;
1097
1098 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1099 i * UNITS_PER_WORD);
1100 x = gen_rtx_CLOBBER (VOIDmode, x);
1101 emit_insn_after (x, insn);
1102 }
1103
1104 resolve_reg_notes (insn);
1105
1106 return true;
1107 }
1108
1109 /* A USE of a decomposed register is no longer meaningful. Return
1110 whether we changed something. */
1111
1112 static bool
1113 resolve_use (rtx pat, rtx insn)
1114 {
1115 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1116 {
1117 delete_insn (insn);
1118 return true;
1119 }
1120
1121 resolve_reg_notes (insn);
1122
1123 return false;
1124 }
1125
1126 /* A VAR_LOCATION can be simplified. */
1127
1128 static void
1129 resolve_debug (rtx insn)
1130 {
1131 for_each_rtx (&PATTERN (insn), adjust_decomposed_uses, NULL_RTX);
1132
1133 df_insn_rescan (insn);
1134
1135 resolve_reg_notes (insn);
1136 }
1137
1138 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1139 set the decomposable_context bitmap accordingly. SPEED_P is true
1140 if we are optimizing INSN for speed rather than size. Return true
1141 if INSN is decomposable. */
1142
1143 static bool
1144 find_decomposable_shift_zext (rtx insn, bool speed_p)
1145 {
1146 rtx set;
1147 rtx op;
1148 rtx op_operand;
1149
1150 set = single_set (insn);
1151 if (!set)
1152 return false;
1153
1154 op = SET_SRC (set);
1155 if (GET_CODE (op) != ASHIFT
1156 && GET_CODE (op) != LSHIFTRT
1157 && GET_CODE (op) != ZERO_EXTEND)
1158 return false;
1159
1160 op_operand = XEXP (op, 0);
1161 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1162 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1163 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1164 || GET_MODE (op) != twice_word_mode)
1165 return false;
1166
1167 if (GET_CODE (op) == ZERO_EXTEND)
1168 {
1169 if (GET_MODE (op_operand) != word_mode
1170 || !choices[speed_p].splitting_zext)
1171 return false;
1172 }
1173 else /* left or right shift */
1174 {
1175 bool *splitting = (GET_CODE (op) == ASHIFT
1176 ? choices[speed_p].splitting_ashift
1177 : choices[speed_p].splitting_lshiftrt);
1178 if (!CONST_INT_P (XEXP (op, 1))
1179 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1180 2 * BITS_PER_WORD - 1)
1181 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1182 return false;
1183
1184 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1185 }
1186
1187 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1188
1189 return true;
1190 }
1191
1192 /* Decompose a more than word wide shift (in INSN) of a multiword
1193 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1194 and 'set to zero' insn. Return a pointer to the new insn when a
1195 replacement was done. */
1196
1197 static rtx
1198 resolve_shift_zext (rtx insn)
1199 {
1200 rtx set;
1201 rtx op;
1202 rtx op_operand;
1203 rtx insns;
1204 rtx src_reg, dest_reg, dest_zero;
1205 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1206
1207 set = single_set (insn);
1208 if (!set)
1209 return NULL_RTX;
1210
1211 op = SET_SRC (set);
1212 if (GET_CODE (op) != ASHIFT
1213 && GET_CODE (op) != LSHIFTRT
1214 && GET_CODE (op) != ZERO_EXTEND)
1215 return NULL_RTX;
1216
1217 op_operand = XEXP (op, 0);
1218
1219 /* We can tear this operation apart only if the regs were already
1220 torn apart. */
1221 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1222 return NULL_RTX;
1223
1224 /* src_reg_num is the number of the word mode register which we
1225 are operating on. For a left shift and a zero_extend on little
1226 endian machines this is register 0. */
1227 src_reg_num = GET_CODE (op) == LSHIFTRT ? 1 : 0;
1228
1229 if (WORDS_BIG_ENDIAN
1230 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1231 src_reg_num = 1 - src_reg_num;
1232
1233 if (GET_CODE (op) == ZERO_EXTEND)
1234 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1235 else
1236 dest_reg_num = 1 - src_reg_num;
1237
1238 offset1 = UNITS_PER_WORD * dest_reg_num;
1239 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1240 src_offset = UNITS_PER_WORD * src_reg_num;
1241
1242 start_sequence ();
1243
1244 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1245 GET_MODE (SET_DEST (set)),
1246 offset1);
1247 dest_zero = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1248 GET_MODE (SET_DEST (set)),
1249 offset2);
1250 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1251 GET_MODE (op_operand),
1252 src_offset);
1253 if (GET_CODE (op) != ZERO_EXTEND)
1254 {
1255 int shift_count = INTVAL (XEXP (op, 1));
1256 if (shift_count > BITS_PER_WORD)
1257 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1258 LSHIFT_EXPR : RSHIFT_EXPR,
1259 word_mode, src_reg,
1260 shift_count - BITS_PER_WORD,
1261 dest_reg, 1);
1262 }
1263
1264 if (dest_reg != src_reg)
1265 emit_move_insn (dest_reg, src_reg);
1266 emit_move_insn (dest_zero, CONST0_RTX (word_mode));
1267 insns = get_insns ();
1268
1269 end_sequence ();
1270
1271 emit_insn_before (insns, insn);
1272
1273 if (dump_file)
1274 {
1275 rtx in;
1276 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1277 for (in = insns; in != insn; in = NEXT_INSN (in))
1278 fprintf (dump_file, "%d ", INSN_UID (in));
1279 fprintf (dump_file, "\n");
1280 }
1281
1282 delete_insn (insn);
1283 return insns;
1284 }
1285
1286 /* Print to dump_file a description of what we're doing with shift code CODE.
1287 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1288
1289 static void
1290 dump_shift_choices (enum rtx_code code, bool *splitting)
1291 {
1292 int i;
1293 const char *sep;
1294
1295 fprintf (dump_file,
1296 " Splitting mode %s for %s lowering with shift amounts = ",
1297 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1298 sep = "";
1299 for (i = 0; i < BITS_PER_WORD; i++)
1300 if (splitting[i])
1301 {
1302 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1303 sep = ",";
1304 }
1305 fprintf (dump_file, "\n");
1306 }
1307
1308 /* Print to dump_file a description of what we're doing when optimizing
1309 for speed or size; SPEED_P says which. DESCRIPTION is a description
1310 of the SPEED_P choice. */
1311
1312 static void
1313 dump_choices (bool speed_p, const char *description)
1314 {
1315 unsigned int i;
1316
1317 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1318
1319 for (i = 0; i < MAX_MACHINE_MODE; i++)
1320 if (GET_MODE_SIZE (i) > UNITS_PER_WORD)
1321 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1322 choices[speed_p].move_modes_to_split[i]
1323 ? "Splitting"
1324 : "Skipping",
1325 GET_MODE_NAME ((enum machine_mode) i));
1326
1327 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1328 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1329 GET_MODE_NAME (twice_word_mode));
1330
1331 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1332 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_ashift);
1333 fprintf (dump_file, "\n");
1334 }
1335
1336 /* Look for registers which are always accessed via word-sized SUBREGs
1337 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1338 registers into several word-sized pseudo-registers. */
1339
1340 static void
1341 decompose_multiword_subregs (bool decompose_copies)
1342 {
1343 unsigned int max;
1344 basic_block bb;
1345 bool speed_p;
1346
1347 if (dump_file)
1348 {
1349 dump_choices (false, "size");
1350 dump_choices (true, "speed");
1351 }
1352
1353 /* Check if this target even has any modes to consider lowering. */
1354 if (!choices[false].something_to_do && !choices[true].something_to_do)
1355 {
1356 if (dump_file)
1357 fprintf (dump_file, "Nothing to do!\n");
1358 return;
1359 }
1360
1361 max = max_reg_num ();
1362
1363 /* First see if there are any multi-word pseudo-registers. If there
1364 aren't, there is nothing we can do. This should speed up this
1365 pass in the normal case, since it should be faster than scanning
1366 all the insns. */
1367 {
1368 unsigned int i;
1369 bool useful_modes_seen = false;
1370
1371 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1372 if (regno_reg_rtx[i] != NULL)
1373 {
1374 enum machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1375 if (choices[false].move_modes_to_split[(int) mode]
1376 || choices[true].move_modes_to_split[(int) mode])
1377 {
1378 useful_modes_seen = true;
1379 break;
1380 }
1381 }
1382
1383 if (!useful_modes_seen)
1384 {
1385 if (dump_file)
1386 fprintf (dump_file, "Nothing to lower in this function.\n");
1387 return;
1388 }
1389 }
1390
1391 if (df)
1392 {
1393 df_set_flags (DF_DEFER_INSN_RESCAN);
1394 run_word_dce ();
1395 }
1396
1397 /* FIXME: It may be possible to change this code to look for each
1398 multi-word pseudo-register and to find each insn which sets or
1399 uses that register. That should be faster than scanning all the
1400 insns. */
1401
1402 decomposable_context = BITMAP_ALLOC (NULL);
1403 non_decomposable_context = BITMAP_ALLOC (NULL);
1404 subreg_context = BITMAP_ALLOC (NULL);
1405
1406 reg_copy_graph.create (max);
1407 reg_copy_graph.safe_grow_cleared (max);
1408 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1409
1410 speed_p = optimize_function_for_speed_p (cfun);
1411 FOR_EACH_BB (bb)
1412 {
1413 rtx insn;
1414
1415 FOR_BB_INSNS (bb, insn)
1416 {
1417 rtx set;
1418 enum classify_move_insn cmi;
1419 int i, n;
1420
1421 if (!INSN_P (insn)
1422 || GET_CODE (PATTERN (insn)) == CLOBBER
1423 || GET_CODE (PATTERN (insn)) == USE)
1424 continue;
1425
1426 recog_memoized (insn);
1427
1428 if (find_decomposable_shift_zext (insn, speed_p))
1429 continue;
1430
1431 extract_insn (insn);
1432
1433 set = simple_move (insn, speed_p);
1434
1435 if (!set)
1436 cmi = NOT_SIMPLE_MOVE;
1437 else
1438 {
1439 /* We mark pseudo-to-pseudo copies as decomposable during the
1440 second pass only. The first pass is so early that there is
1441 good chance such moves will be optimized away completely by
1442 subsequent optimizations anyway.
1443
1444 However, we call find_pseudo_copy even during the first pass
1445 so as to properly set up the reg_copy_graph. */
1446 if (find_pseudo_copy (set))
1447 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1448 else
1449 cmi = SIMPLE_MOVE;
1450 }
1451
1452 n = recog_data.n_operands;
1453 for (i = 0; i < n; ++i)
1454 {
1455 for_each_rtx (&recog_data.operand[i],
1456 find_decomposable_subregs,
1457 &cmi);
1458
1459 /* We handle ASM_OPERANDS as a special case to support
1460 things like x86 rdtsc which returns a DImode value.
1461 We can decompose the output, which will certainly be
1462 operand 0, but not the inputs. */
1463
1464 if (cmi == SIMPLE_MOVE
1465 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1466 {
1467 gcc_assert (i == 0);
1468 cmi = NOT_SIMPLE_MOVE;
1469 }
1470 }
1471 }
1472 }
1473
1474 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1475 if (!bitmap_empty_p (decomposable_context))
1476 {
1477 sbitmap sub_blocks;
1478 unsigned int i;
1479 sbitmap_iterator sbi;
1480 bitmap_iterator iter;
1481 unsigned int regno;
1482
1483 propagate_pseudo_copies ();
1484
1485 sub_blocks = sbitmap_alloc (last_basic_block);
1486 bitmap_clear (sub_blocks);
1487
1488 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1489 decompose_register (regno);
1490
1491 FOR_EACH_BB (bb)
1492 {
1493 rtx insn;
1494
1495 FOR_BB_INSNS (bb, insn)
1496 {
1497 rtx pat;
1498
1499 if (!INSN_P (insn))
1500 continue;
1501
1502 pat = PATTERN (insn);
1503 if (GET_CODE (pat) == CLOBBER)
1504 resolve_clobber (pat, insn);
1505 else if (GET_CODE (pat) == USE)
1506 resolve_use (pat, insn);
1507 else if (DEBUG_INSN_P (insn))
1508 resolve_debug (insn);
1509 else
1510 {
1511 rtx set;
1512 int i;
1513
1514 recog_memoized (insn);
1515 extract_insn (insn);
1516
1517 set = simple_move (insn, speed_p);
1518 if (set)
1519 {
1520 rtx orig_insn = insn;
1521 bool cfi = control_flow_insn_p (insn);
1522
1523 /* We can end up splitting loads to multi-word pseudos
1524 into separate loads to machine word size pseudos.
1525 When this happens, we first had one load that can
1526 throw, and after resolve_simple_move we'll have a
1527 bunch of loads (at least two). All those loads may
1528 trap if we can have non-call exceptions, so they
1529 all will end the current basic block. We split the
1530 block after the outer loop over all insns, but we
1531 make sure here that we will be able to split the
1532 basic block and still produce the correct control
1533 flow graph for it. */
1534 gcc_assert (!cfi
1535 || (cfun->can_throw_non_call_exceptions
1536 && can_throw_internal (insn)));
1537
1538 insn = resolve_simple_move (set, insn);
1539 if (insn != orig_insn)
1540 {
1541 recog_memoized (insn);
1542 extract_insn (insn);
1543
1544 if (cfi)
1545 bitmap_set_bit (sub_blocks, bb->index);
1546 }
1547 }
1548 else
1549 {
1550 rtx decomposed_shift;
1551
1552 decomposed_shift = resolve_shift_zext (insn);
1553 if (decomposed_shift != NULL_RTX)
1554 {
1555 insn = decomposed_shift;
1556 recog_memoized (insn);
1557 extract_insn (insn);
1558 }
1559 }
1560
1561 for (i = recog_data.n_operands - 1; i >= 0; --i)
1562 for_each_rtx (recog_data.operand_loc[i],
1563 resolve_subreg_use,
1564 insn);
1565
1566 resolve_reg_notes (insn);
1567
1568 if (num_validated_changes () > 0)
1569 {
1570 for (i = recog_data.n_dups - 1; i >= 0; --i)
1571 {
1572 rtx *pl = recog_data.dup_loc[i];
1573 int dup_num = recog_data.dup_num[i];
1574 rtx *px = recog_data.operand_loc[dup_num];
1575
1576 validate_unshare_change (insn, pl, *px, 1);
1577 }
1578
1579 i = apply_change_group ();
1580 gcc_assert (i);
1581 }
1582 }
1583 }
1584 }
1585
1586 /* If we had insns to split that caused control flow insns in the middle
1587 of a basic block, split those blocks now. Note that we only handle
1588 the case where splitting a load has caused multiple possibly trapping
1589 loads to appear. */
1590 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1591 {
1592 rtx insn, end;
1593 edge fallthru;
1594
1595 bb = BASIC_BLOCK (i);
1596 insn = BB_HEAD (bb);
1597 end = BB_END (bb);
1598
1599 while (insn != end)
1600 {
1601 if (control_flow_insn_p (insn))
1602 {
1603 /* Split the block after insn. There will be a fallthru
1604 edge, which is OK so we keep it. We have to create the
1605 exception edges ourselves. */
1606 fallthru = split_block (bb, insn);
1607 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1608 bb = fallthru->dest;
1609 insn = BB_HEAD (bb);
1610 }
1611 else
1612 insn = NEXT_INSN (insn);
1613 }
1614 }
1615
1616 sbitmap_free (sub_blocks);
1617 }
1618
1619 {
1620 unsigned int i;
1621 bitmap b;
1622
1623 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1624 if (b)
1625 BITMAP_FREE (b);
1626 }
1627
1628 reg_copy_graph.release ();
1629
1630 BITMAP_FREE (decomposable_context);
1631 BITMAP_FREE (non_decomposable_context);
1632 BITMAP_FREE (subreg_context);
1633 }
1634 \f
1635 /* Gate function for lower subreg pass. */
1636
1637 static bool
1638 gate_handle_lower_subreg (void)
1639 {
1640 return flag_split_wide_types != 0;
1641 }
1642
1643 /* Implement first lower subreg pass. */
1644
1645 static unsigned int
1646 rest_of_handle_lower_subreg (void)
1647 {
1648 decompose_multiword_subregs (false);
1649 return 0;
1650 }
1651
1652 /* Implement second lower subreg pass. */
1653
1654 static unsigned int
1655 rest_of_handle_lower_subreg2 (void)
1656 {
1657 decompose_multiword_subregs (true);
1658 return 0;
1659 }
1660
1661 struct rtl_opt_pass pass_lower_subreg =
1662 {
1663 {
1664 RTL_PASS,
1665 "subreg1", /* name */
1666 OPTGROUP_NONE, /* optinfo_flags */
1667 gate_handle_lower_subreg, /* gate */
1668 rest_of_handle_lower_subreg, /* execute */
1669 NULL, /* sub */
1670 NULL, /* next */
1671 0, /* static_pass_number */
1672 TV_LOWER_SUBREG, /* tv_id */
1673 0, /* properties_required */
1674 0, /* properties_provided */
1675 0, /* properties_destroyed */
1676 0, /* todo_flags_start */
1677 TODO_ggc_collect |
1678 TODO_verify_flow /* todo_flags_finish */
1679 }
1680 };
1681
1682 struct rtl_opt_pass pass_lower_subreg2 =
1683 {
1684 {
1685 RTL_PASS,
1686 "subreg2", /* name */
1687 OPTGROUP_NONE, /* optinfo_flags */
1688 gate_handle_lower_subreg, /* gate */
1689 rest_of_handle_lower_subreg2, /* execute */
1690 NULL, /* sub */
1691 NULL, /* next */
1692 0, /* static_pass_number */
1693 TV_LOWER_SUBREG, /* tv_id */
1694 0, /* properties_required */
1695 0, /* properties_provided */
1696 0, /* properties_destroyed */
1697 0, /* todo_flags_start */
1698 TODO_df_finish | TODO_verify_rtl_sharing |
1699 TODO_ggc_collect |
1700 TODO_verify_flow /* todo_flags_finish */
1701 }
1702 };