tree-vectorizer.h (struct _slp_oprnd_info): Add second_pattern member.
[gcc.git] / gcc / lower-subreg.c
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "machmode.h"
26 #include "tm.h"
27 #include "hash-set.h"
28 #include "vec.h"
29 #include "double-int.h"
30 #include "input.h"
31 #include "alias.h"
32 #include "symtab.h"
33 #include "wide-int.h"
34 #include "inchash.h"
35 #include "tree.h"
36 #include "rtl.h"
37 #include "tm_p.h"
38 #include "flags.h"
39 #include "insn-config.h"
40 #include "obstack.h"
41 #include "predict.h"
42 #include "hard-reg-set.h"
43 #include "function.h"
44 #include "dominance.h"
45 #include "cfg.h"
46 #include "cfgrtl.h"
47 #include "cfgbuild.h"
48 #include "basic-block.h"
49 #include "recog.h"
50 #include "bitmap.h"
51 #include "dce.h"
52 #include "hashtab.h"
53 #include "statistics.h"
54 #include "real.h"
55 #include "fixed-value.h"
56 #include "expmed.h"
57 #include "dojump.h"
58 #include "explow.h"
59 #include "calls.h"
60 #include "emit-rtl.h"
61 #include "varasm.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "except.h"
65 #include "regs.h"
66 #include "tree-pass.h"
67 #include "df.h"
68 #include "lower-subreg.h"
69 #include "rtl-iter.h"
70
71
72 /* Decompose multi-word pseudo-registers into individual
73 pseudo-registers when possible and profitable. This is possible
74 when all the uses of a multi-word register are via SUBREG, or are
75 copies of the register to another location. Breaking apart the
76 register permits more CSE and permits better register allocation.
77 This is profitable if the machine does not have move instructions
78 to do this.
79
80 This pass only splits moves with modes that are wider than
81 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
82 integer modes that are twice the width of word_mode. The latter
83 could be generalized if there was a need to do this, but the trend in
84 architectures is to not need this.
85
86 There are two useful preprocessor defines for use by maintainers:
87
88 #define LOG_COSTS 1
89
90 if you wish to see the actual cost estimates that are being used
91 for each mode wider than word mode and the cost estimates for zero
92 extension and the shifts. This can be useful when port maintainers
93 are tuning insn rtx costs.
94
95 #define FORCE_LOWERING 1
96
97 if you wish to test the pass with all the transformation forced on.
98 This can be useful for finding bugs in the transformations. */
99
100 #define LOG_COSTS 0
101 #define FORCE_LOWERING 0
102
103 /* Bit N in this bitmap is set if regno N is used in a context in
104 which we can decompose it. */
105 static bitmap decomposable_context;
106
107 /* Bit N in this bitmap is set if regno N is used in a context in
108 which it can not be decomposed. */
109 static bitmap non_decomposable_context;
110
111 /* Bit N in this bitmap is set if regno N is used in a subreg
112 which changes the mode but not the size. This typically happens
113 when the register accessed as a floating-point value; we want to
114 avoid generating accesses to its subwords in integer modes. */
115 static bitmap subreg_context;
116
117 /* Bit N in the bitmap in element M of this array is set if there is a
118 copy from reg M to reg N. */
119 static vec<bitmap> reg_copy_graph;
120
121 struct target_lower_subreg default_target_lower_subreg;
122 #if SWITCHABLE_TARGET
123 struct target_lower_subreg *this_target_lower_subreg
124 = &default_target_lower_subreg;
125 #endif
126
127 #define twice_word_mode \
128 this_target_lower_subreg->x_twice_word_mode
129 #define choices \
130 this_target_lower_subreg->x_choices
131
132 /* RTXes used while computing costs. */
133 struct cost_rtxes {
134 /* Source and target registers. */
135 rtx source;
136 rtx target;
137
138 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
139 rtx zext;
140
141 /* A shift of SOURCE. */
142 rtx shift;
143
144 /* A SET of TARGET. */
145 rtx set;
146 };
147
148 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
149 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
150
151 static int
152 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
153 machine_mode mode, int op1)
154 {
155 PUT_CODE (rtxes->shift, code);
156 PUT_MODE (rtxes->shift, mode);
157 PUT_MODE (rtxes->source, mode);
158 XEXP (rtxes->shift, 1) = GEN_INT (op1);
159 return set_src_cost (rtxes->shift, speed_p);
160 }
161
162 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
163 to true if it is profitable to split a double-word CODE shift
164 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
165 for speed or size profitability.
166
167 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
168 the cost of moving zero into a word-mode register. WORD_MOVE_COST
169 is the cost of moving between word registers. */
170
171 static void
172 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
173 bool *splitting, enum rtx_code code,
174 int word_move_zero_cost, int word_move_cost)
175 {
176 int wide_cost, narrow_cost, upper_cost, i;
177
178 for (i = 0; i < BITS_PER_WORD; i++)
179 {
180 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
181 i + BITS_PER_WORD);
182 if (i == 0)
183 narrow_cost = word_move_cost;
184 else
185 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
186
187 if (code != ASHIFTRT)
188 upper_cost = word_move_zero_cost;
189 else if (i == BITS_PER_WORD - 1)
190 upper_cost = word_move_cost;
191 else
192 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
193 BITS_PER_WORD - 1);
194
195 if (LOG_COSTS)
196 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
197 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
198 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
199
200 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
201 splitting[i] = true;
202 }
203 }
204
205 /* Compute what we should do when optimizing for speed or size; SPEED_P
206 selects which. Use RTXES for computing costs. */
207
208 static void
209 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
210 {
211 unsigned int i;
212 int word_move_zero_cost, word_move_cost;
213
214 PUT_MODE (rtxes->target, word_mode);
215 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
216 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
217
218 SET_SRC (rtxes->set) = rtxes->source;
219 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
220
221 if (LOG_COSTS)
222 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
223 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
224
225 for (i = 0; i < MAX_MACHINE_MODE; i++)
226 {
227 machine_mode mode = (machine_mode) i;
228 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
229 if (factor > 1)
230 {
231 int mode_move_cost;
232
233 PUT_MODE (rtxes->target, mode);
234 PUT_MODE (rtxes->source, mode);
235 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
236
237 if (LOG_COSTS)
238 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
239 GET_MODE_NAME (mode), mode_move_cost,
240 word_move_cost, factor);
241
242 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
243 {
244 choices[speed_p].move_modes_to_split[i] = true;
245 choices[speed_p].something_to_do = true;
246 }
247 }
248 }
249
250 /* For the moves and shifts, the only case that is checked is one
251 where the mode of the target is an integer mode twice the width
252 of the word_mode.
253
254 If it is not profitable to split a double word move then do not
255 even consider the shifts or the zero extension. */
256 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
257 {
258 int zext_cost;
259
260 /* The only case here to check to see if moving the upper part with a
261 zero is cheaper than doing the zext itself. */
262 PUT_MODE (rtxes->source, word_mode);
263 zext_cost = set_src_cost (rtxes->zext, speed_p);
264
265 if (LOG_COSTS)
266 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
267 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
268 zext_cost, word_move_cost, word_move_zero_cost);
269
270 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
271 choices[speed_p].splitting_zext = true;
272
273 compute_splitting_shift (speed_p, rtxes,
274 choices[speed_p].splitting_ashift, ASHIFT,
275 word_move_zero_cost, word_move_cost);
276 compute_splitting_shift (speed_p, rtxes,
277 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
278 word_move_zero_cost, word_move_cost);
279 compute_splitting_shift (speed_p, rtxes,
280 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
281 word_move_zero_cost, word_move_cost);
282 }
283 }
284
285 /* Do one-per-target initialisation. This involves determining
286 which operations on the machine are profitable. If none are found,
287 then the pass just returns when called. */
288
289 void
290 init_lower_subreg (void)
291 {
292 struct cost_rtxes rtxes;
293
294 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
295
296 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
297
298 rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
299 rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
300 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
301 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
302 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
303
304 if (LOG_COSTS)
305 fprintf (stderr, "\nSize costs\n==========\n\n");
306 compute_costs (false, &rtxes);
307
308 if (LOG_COSTS)
309 fprintf (stderr, "\nSpeed costs\n===========\n\n");
310 compute_costs (true, &rtxes);
311 }
312
313 static bool
314 simple_move_operand (rtx x)
315 {
316 if (GET_CODE (x) == SUBREG)
317 x = SUBREG_REG (x);
318
319 if (!OBJECT_P (x))
320 return false;
321
322 if (GET_CODE (x) == LABEL_REF
323 || GET_CODE (x) == SYMBOL_REF
324 || GET_CODE (x) == HIGH
325 || GET_CODE (x) == CONST)
326 return false;
327
328 if (MEM_P (x)
329 && (MEM_VOLATILE_P (x)
330 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
331 return false;
332
333 return true;
334 }
335
336 /* If INSN is a single set between two objects that we want to split,
337 return the single set. SPEED_P says whether we are optimizing
338 INSN for speed or size.
339
340 INSN should have been passed to recog and extract_insn before this
341 is called. */
342
343 static rtx
344 simple_move (rtx_insn *insn, bool speed_p)
345 {
346 rtx x;
347 rtx set;
348 machine_mode mode;
349
350 if (recog_data.n_operands != 2)
351 return NULL_RTX;
352
353 set = single_set (insn);
354 if (!set)
355 return NULL_RTX;
356
357 x = SET_DEST (set);
358 if (x != recog_data.operand[0] && x != recog_data.operand[1])
359 return NULL_RTX;
360 if (!simple_move_operand (x))
361 return NULL_RTX;
362
363 x = SET_SRC (set);
364 if (x != recog_data.operand[0] && x != recog_data.operand[1])
365 return NULL_RTX;
366 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
367 things like x86 rdtsc which returns a DImode value. */
368 if (GET_CODE (x) != ASM_OPERANDS
369 && !simple_move_operand (x))
370 return NULL_RTX;
371
372 /* We try to decompose in integer modes, to avoid generating
373 inefficient code copying between integer and floating point
374 registers. That means that we can't decompose if this is a
375 non-integer mode for which there is no integer mode of the same
376 size. */
377 mode = GET_MODE (SET_DEST (set));
378 if (!SCALAR_INT_MODE_P (mode)
379 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
380 == BLKmode))
381 return NULL_RTX;
382
383 /* Reject PARTIAL_INT modes. They are used for processor specific
384 purposes and it's probably best not to tamper with them. */
385 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
386 return NULL_RTX;
387
388 if (!choices[speed_p].move_modes_to_split[(int) mode])
389 return NULL_RTX;
390
391 return set;
392 }
393
394 /* If SET is a copy from one multi-word pseudo-register to another,
395 record that in reg_copy_graph. Return whether it is such a
396 copy. */
397
398 static bool
399 find_pseudo_copy (rtx set)
400 {
401 rtx dest = SET_DEST (set);
402 rtx src = SET_SRC (set);
403 unsigned int rd, rs;
404 bitmap b;
405
406 if (!REG_P (dest) || !REG_P (src))
407 return false;
408
409 rd = REGNO (dest);
410 rs = REGNO (src);
411 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
412 return false;
413
414 b = reg_copy_graph[rs];
415 if (b == NULL)
416 {
417 b = BITMAP_ALLOC (NULL);
418 reg_copy_graph[rs] = b;
419 }
420
421 bitmap_set_bit (b, rd);
422
423 return true;
424 }
425
426 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
427 where they are copied to another register, add the register to
428 which they are copied to DECOMPOSABLE_CONTEXT. Use
429 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
430 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
431
432 static void
433 propagate_pseudo_copies (void)
434 {
435 bitmap queue, propagate;
436
437 queue = BITMAP_ALLOC (NULL);
438 propagate = BITMAP_ALLOC (NULL);
439
440 bitmap_copy (queue, decomposable_context);
441 do
442 {
443 bitmap_iterator iter;
444 unsigned int i;
445
446 bitmap_clear (propagate);
447
448 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
449 {
450 bitmap b = reg_copy_graph[i];
451 if (b)
452 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
453 }
454
455 bitmap_and_compl (queue, propagate, decomposable_context);
456 bitmap_ior_into (decomposable_context, propagate);
457 }
458 while (!bitmap_empty_p (queue));
459
460 BITMAP_FREE (queue);
461 BITMAP_FREE (propagate);
462 }
463
464 /* A pointer to one of these values is passed to
465 find_decomposable_subregs. */
466
467 enum classify_move_insn
468 {
469 /* Not a simple move from one location to another. */
470 NOT_SIMPLE_MOVE,
471 /* A simple move we want to decompose. */
472 DECOMPOSABLE_SIMPLE_MOVE,
473 /* Any other simple move. */
474 SIMPLE_MOVE
475 };
476
477 /* If we find a SUBREG in *LOC which we could use to decompose a
478 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
479 unadorned register which is not a simple pseudo-register copy,
480 DATA will point at the type of move, and we set a bit in
481 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
482
483 static void
484 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
485 {
486 subrtx_var_iterator::array_type array;
487 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
488 {
489 rtx x = *iter;
490 if (GET_CODE (x) == SUBREG)
491 {
492 rtx inner = SUBREG_REG (x);
493 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
494
495 if (!REG_P (inner))
496 continue;
497
498 regno = REGNO (inner);
499 if (HARD_REGISTER_NUM_P (regno))
500 {
501 iter.skip_subrtxes ();
502 continue;
503 }
504
505 outer_size = GET_MODE_SIZE (GET_MODE (x));
506 inner_size = GET_MODE_SIZE (GET_MODE (inner));
507 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
508 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
509
510 /* We only try to decompose single word subregs of multi-word
511 registers. When we find one, we return -1 to avoid iterating
512 over the inner register.
513
514 ??? This doesn't allow, e.g., DImode subregs of TImode values
515 on 32-bit targets. We would need to record the way the
516 pseudo-register was used, and only decompose if all the uses
517 were the same number and size of pieces. Hopefully this
518 doesn't happen much. */
519
520 if (outer_words == 1 && inner_words > 1)
521 {
522 bitmap_set_bit (decomposable_context, regno);
523 iter.skip_subrtxes ();
524 continue;
525 }
526
527 /* If this is a cast from one mode to another, where the modes
528 have the same size, and they are not tieable, then mark this
529 register as non-decomposable. If we decompose it we are
530 likely to mess up whatever the backend is trying to do. */
531 if (outer_words > 1
532 && outer_size == inner_size
533 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
534 {
535 bitmap_set_bit (non_decomposable_context, regno);
536 bitmap_set_bit (subreg_context, regno);
537 iter.skip_subrtxes ();
538 continue;
539 }
540 }
541 else if (REG_P (x))
542 {
543 unsigned int regno;
544
545 /* We will see an outer SUBREG before we see the inner REG, so
546 when we see a plain REG here it means a direct reference to
547 the register.
548
549 If this is not a simple copy from one location to another,
550 then we can not decompose this register. If this is a simple
551 copy we want to decompose, and the mode is right,
552 then we mark the register as decomposable.
553 Otherwise we don't say anything about this register --
554 it could be decomposed, but whether that would be
555 profitable depends upon how it is used elsewhere.
556
557 We only set bits in the bitmap for multi-word
558 pseudo-registers, since those are the only ones we care about
559 and it keeps the size of the bitmaps down. */
560
561 regno = REGNO (x);
562 if (!HARD_REGISTER_NUM_P (regno)
563 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
564 {
565 switch (*pcmi)
566 {
567 case NOT_SIMPLE_MOVE:
568 bitmap_set_bit (non_decomposable_context, regno);
569 break;
570 case DECOMPOSABLE_SIMPLE_MOVE:
571 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
572 bitmap_set_bit (decomposable_context, regno);
573 break;
574 case SIMPLE_MOVE:
575 break;
576 default:
577 gcc_unreachable ();
578 }
579 }
580 }
581 else if (MEM_P (x))
582 {
583 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
584
585 /* Any registers used in a MEM do not participate in a
586 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
587 here, and return -1 to block the parent's recursion. */
588 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
589 iter.skip_subrtxes ();
590 }
591 }
592 }
593
594 /* Decompose REGNO into word-sized components. We smash the REG node
595 in place. This ensures that (1) something goes wrong quickly if we
596 fail to make some replacement, and (2) the debug information inside
597 the symbol table is automatically kept up to date. */
598
599 static void
600 decompose_register (unsigned int regno)
601 {
602 rtx reg;
603 unsigned int words, i;
604 rtvec v;
605
606 reg = regno_reg_rtx[regno];
607
608 regno_reg_rtx[regno] = NULL_RTX;
609
610 words = GET_MODE_SIZE (GET_MODE (reg));
611 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
612
613 v = rtvec_alloc (words);
614 for (i = 0; i < words; ++i)
615 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
616
617 PUT_CODE (reg, CONCATN);
618 XVEC (reg, 0) = v;
619
620 if (dump_file)
621 {
622 fprintf (dump_file, "; Splitting reg %u ->", regno);
623 for (i = 0; i < words; ++i)
624 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
625 fputc ('\n', dump_file);
626 }
627 }
628
629 /* Get a SUBREG of a CONCATN. */
630
631 static rtx
632 simplify_subreg_concatn (machine_mode outermode, rtx op,
633 unsigned int byte)
634 {
635 unsigned int inner_size;
636 machine_mode innermode, partmode;
637 rtx part;
638 unsigned int final_offset;
639
640 gcc_assert (GET_CODE (op) == CONCATN);
641 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
642
643 innermode = GET_MODE (op);
644 gcc_assert (byte < GET_MODE_SIZE (innermode));
645 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
646
647 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
648 part = XVECEXP (op, 0, byte / inner_size);
649 partmode = GET_MODE (part);
650
651 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
652 regular CONST_VECTORs. They have vector or integer modes, depending
653 on the capabilities of the target. Cope with them. */
654 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
655 partmode = GET_MODE_INNER (innermode);
656 else if (partmode == VOIDmode)
657 {
658 enum mode_class mclass = GET_MODE_CLASS (innermode);
659 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
660 }
661
662 final_offset = byte % inner_size;
663 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
664 return NULL_RTX;
665
666 return simplify_gen_subreg (outermode, part, partmode, final_offset);
667 }
668
669 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
670
671 static rtx
672 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
673 machine_mode innermode, unsigned int byte)
674 {
675 rtx ret;
676
677 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
678 If OP is a SUBREG of a CONCATN, then it must be a simple mode
679 change with the same size and offset 0, or it must extract a
680 part. We shouldn't see anything else here. */
681 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
682 {
683 rtx op2;
684
685 if ((GET_MODE_SIZE (GET_MODE (op))
686 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
687 && SUBREG_BYTE (op) == 0)
688 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
689 GET_MODE (SUBREG_REG (op)), byte);
690
691 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
692 SUBREG_BYTE (op));
693 if (op2 == NULL_RTX)
694 {
695 /* We don't handle paradoxical subregs here. */
696 gcc_assert (GET_MODE_SIZE (outermode)
697 <= GET_MODE_SIZE (GET_MODE (op)));
698 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
699 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
700 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
701 byte + SUBREG_BYTE (op));
702 gcc_assert (op2 != NULL_RTX);
703 return op2;
704 }
705
706 op = op2;
707 gcc_assert (op != NULL_RTX);
708 gcc_assert (innermode == GET_MODE (op));
709 }
710
711 if (GET_CODE (op) == CONCATN)
712 return simplify_subreg_concatn (outermode, op, byte);
713
714 ret = simplify_gen_subreg (outermode, op, innermode, byte);
715
716 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
717 resolve_simple_move will ask for the high part of the paradoxical
718 subreg, which does not have a value. Just return a zero. */
719 if (ret == NULL_RTX
720 && GET_CODE (op) == SUBREG
721 && SUBREG_BYTE (op) == 0
722 && (GET_MODE_SIZE (innermode)
723 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
724 return CONST0_RTX (outermode);
725
726 gcc_assert (ret != NULL_RTX);
727 return ret;
728 }
729
730 /* Return whether we should resolve X into the registers into which it
731 was decomposed. */
732
733 static bool
734 resolve_reg_p (rtx x)
735 {
736 return GET_CODE (x) == CONCATN;
737 }
738
739 /* Return whether X is a SUBREG of a register which we need to
740 resolve. */
741
742 static bool
743 resolve_subreg_p (rtx x)
744 {
745 if (GET_CODE (x) != SUBREG)
746 return false;
747 return resolve_reg_p (SUBREG_REG (x));
748 }
749
750 /* Look for SUBREGs in *LOC which need to be decomposed. */
751
752 static bool
753 resolve_subreg_use (rtx *loc, rtx insn)
754 {
755 subrtx_ptr_iterator::array_type array;
756 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
757 {
758 rtx *loc = *iter;
759 rtx x = *loc;
760 if (resolve_subreg_p (x))
761 {
762 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
763 SUBREG_BYTE (x));
764
765 /* It is possible for a note to contain a reference which we can
766 decompose. In this case, return 1 to the caller to indicate
767 that the note must be removed. */
768 if (!x)
769 {
770 gcc_assert (!insn);
771 return true;
772 }
773
774 validate_change (insn, loc, x, 1);
775 iter.skip_subrtxes ();
776 }
777 else if (resolve_reg_p (x))
778 /* Return 1 to the caller to indicate that we found a direct
779 reference to a register which is being decomposed. This can
780 happen inside notes, multiword shift or zero-extend
781 instructions. */
782 return true;
783 }
784
785 return false;
786 }
787
788 /* Resolve any decomposed registers which appear in register notes on
789 INSN. */
790
791 static void
792 resolve_reg_notes (rtx_insn *insn)
793 {
794 rtx *pnote, note;
795
796 note = find_reg_equal_equiv_note (insn);
797 if (note)
798 {
799 int old_count = num_validated_changes ();
800 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
801 remove_note (insn, note);
802 else
803 if (old_count != num_validated_changes ())
804 df_notes_rescan (insn);
805 }
806
807 pnote = &REG_NOTES (insn);
808 while (*pnote != NULL_RTX)
809 {
810 bool del = false;
811
812 note = *pnote;
813 switch (REG_NOTE_KIND (note))
814 {
815 case REG_DEAD:
816 case REG_UNUSED:
817 if (resolve_reg_p (XEXP (note, 0)))
818 del = true;
819 break;
820
821 default:
822 break;
823 }
824
825 if (del)
826 *pnote = XEXP (note, 1);
827 else
828 pnote = &XEXP (note, 1);
829 }
830 }
831
832 /* Return whether X can be decomposed into subwords. */
833
834 static bool
835 can_decompose_p (rtx x)
836 {
837 if (REG_P (x))
838 {
839 unsigned int regno = REGNO (x);
840
841 if (HARD_REGISTER_NUM_P (regno))
842 {
843 unsigned int byte, num_bytes;
844
845 num_bytes = GET_MODE_SIZE (GET_MODE (x));
846 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
847 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
848 return false;
849 return true;
850 }
851 else
852 return !bitmap_bit_p (subreg_context, regno);
853 }
854
855 return true;
856 }
857
858 /* Decompose the registers used in a simple move SET within INSN. If
859 we don't change anything, return INSN, otherwise return the start
860 of the sequence of moves. */
861
862 static rtx_insn *
863 resolve_simple_move (rtx set, rtx_insn *insn)
864 {
865 rtx src, dest, real_dest;
866 rtx_insn *insns;
867 machine_mode orig_mode, dest_mode;
868 unsigned int words;
869 bool pushing;
870
871 src = SET_SRC (set);
872 dest = SET_DEST (set);
873 orig_mode = GET_MODE (dest);
874
875 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
876 gcc_assert (words > 1);
877
878 start_sequence ();
879
880 /* We have to handle copying from a SUBREG of a decomposed reg where
881 the SUBREG is larger than word size. Rather than assume that we
882 can take a word_mode SUBREG of the destination, we copy to a new
883 register and then copy that to the destination. */
884
885 real_dest = NULL_RTX;
886
887 if (GET_CODE (src) == SUBREG
888 && resolve_reg_p (SUBREG_REG (src))
889 && (SUBREG_BYTE (src) != 0
890 || (GET_MODE_SIZE (orig_mode)
891 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
892 {
893 real_dest = dest;
894 dest = gen_reg_rtx (orig_mode);
895 if (REG_P (real_dest))
896 REG_ATTRS (dest) = REG_ATTRS (real_dest);
897 }
898
899 /* Similarly if we are copying to a SUBREG of a decomposed reg where
900 the SUBREG is larger than word size. */
901
902 if (GET_CODE (dest) == SUBREG
903 && resolve_reg_p (SUBREG_REG (dest))
904 && (SUBREG_BYTE (dest) != 0
905 || (GET_MODE_SIZE (orig_mode)
906 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
907 {
908 rtx reg, smove;
909 rtx_insn *minsn;
910
911 reg = gen_reg_rtx (orig_mode);
912 minsn = emit_move_insn (reg, src);
913 smove = single_set (minsn);
914 gcc_assert (smove != NULL_RTX);
915 resolve_simple_move (smove, minsn);
916 src = reg;
917 }
918
919 /* If we didn't have any big SUBREGS of decomposed registers, and
920 neither side of the move is a register we are decomposing, then
921 we don't have to do anything here. */
922
923 if (src == SET_SRC (set)
924 && dest == SET_DEST (set)
925 && !resolve_reg_p (src)
926 && !resolve_subreg_p (src)
927 && !resolve_reg_p (dest)
928 && !resolve_subreg_p (dest))
929 {
930 end_sequence ();
931 return insn;
932 }
933
934 /* It's possible for the code to use a subreg of a decomposed
935 register while forming an address. We need to handle that before
936 passing the address to emit_move_insn. We pass NULL_RTX as the
937 insn parameter to resolve_subreg_use because we can not validate
938 the insn yet. */
939 if (MEM_P (src) || MEM_P (dest))
940 {
941 int acg;
942
943 if (MEM_P (src))
944 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
945 if (MEM_P (dest))
946 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
947 acg = apply_change_group ();
948 gcc_assert (acg);
949 }
950
951 /* If SRC is a register which we can't decompose, or has side
952 effects, we need to move via a temporary register. */
953
954 if (!can_decompose_p (src)
955 || side_effects_p (src)
956 || GET_CODE (src) == ASM_OPERANDS)
957 {
958 rtx reg;
959
960 reg = gen_reg_rtx (orig_mode);
961
962 #ifdef AUTO_INC_DEC
963 {
964 rtx move = emit_move_insn (reg, src);
965 if (MEM_P (src))
966 {
967 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
968 if (note)
969 add_reg_note (move, REG_INC, XEXP (note, 0));
970 }
971 }
972 #else
973 emit_move_insn (reg, src);
974 #endif
975 src = reg;
976 }
977
978 /* If DEST is a register which we can't decompose, or has side
979 effects, we need to first move to a temporary register. We
980 handle the common case of pushing an operand directly. We also
981 go through a temporary register if it holds a floating point
982 value. This gives us better code on systems which can't move
983 data easily between integer and floating point registers. */
984
985 dest_mode = orig_mode;
986 pushing = push_operand (dest, dest_mode);
987 if (!can_decompose_p (dest)
988 || (side_effects_p (dest) && !pushing)
989 || (!SCALAR_INT_MODE_P (dest_mode)
990 && !resolve_reg_p (dest)
991 && !resolve_subreg_p (dest)))
992 {
993 if (real_dest == NULL_RTX)
994 real_dest = dest;
995 if (!SCALAR_INT_MODE_P (dest_mode))
996 {
997 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
998 MODE_INT, 0);
999 gcc_assert (dest_mode != BLKmode);
1000 }
1001 dest = gen_reg_rtx (dest_mode);
1002 if (REG_P (real_dest))
1003 REG_ATTRS (dest) = REG_ATTRS (real_dest);
1004 }
1005
1006 if (pushing)
1007 {
1008 unsigned int i, j, jinc;
1009
1010 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
1011 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1012 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1013
1014 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1015 {
1016 j = 0;
1017 jinc = 1;
1018 }
1019 else
1020 {
1021 j = words - 1;
1022 jinc = -1;
1023 }
1024
1025 for (i = 0; i < words; ++i, j += jinc)
1026 {
1027 rtx temp;
1028
1029 temp = copy_rtx (XEXP (dest, 0));
1030 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1031 j * UNITS_PER_WORD);
1032 emit_move_insn (temp,
1033 simplify_gen_subreg_concatn (word_mode, src,
1034 orig_mode,
1035 j * UNITS_PER_WORD));
1036 }
1037 }
1038 else
1039 {
1040 unsigned int i;
1041
1042 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1043 emit_clobber (dest);
1044
1045 for (i = 0; i < words; ++i)
1046 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1047 dest_mode,
1048 i * UNITS_PER_WORD),
1049 simplify_gen_subreg_concatn (word_mode, src,
1050 orig_mode,
1051 i * UNITS_PER_WORD));
1052 }
1053
1054 if (real_dest != NULL_RTX)
1055 {
1056 rtx mdest, smove;
1057 rtx_insn *minsn;
1058
1059 if (dest_mode == orig_mode)
1060 mdest = dest;
1061 else
1062 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1063 minsn = emit_move_insn (real_dest, mdest);
1064
1065 #ifdef AUTO_INC_DEC
1066 if (MEM_P (real_dest)
1067 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1068 {
1069 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1070 if (note)
1071 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1072 }
1073 #endif
1074
1075 smove = single_set (minsn);
1076 gcc_assert (smove != NULL_RTX);
1077
1078 resolve_simple_move (smove, minsn);
1079 }
1080
1081 insns = get_insns ();
1082 end_sequence ();
1083
1084 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1085
1086 emit_insn_before (insns, insn);
1087
1088 /* If we get here via self-recursion, then INSN is not yet in the insns
1089 chain and delete_insn will fail. We only want to remove INSN from the
1090 current sequence. See PR56738. */
1091 if (in_sequence_p ())
1092 remove_insn (insn);
1093 else
1094 delete_insn (insn);
1095
1096 return insns;
1097 }
1098
1099 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1100 component registers. Return whether we changed something. */
1101
1102 static bool
1103 resolve_clobber (rtx pat, rtx_insn *insn)
1104 {
1105 rtx reg;
1106 machine_mode orig_mode;
1107 unsigned int words, i;
1108 int ret;
1109
1110 reg = XEXP (pat, 0);
1111 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1112 return false;
1113
1114 orig_mode = GET_MODE (reg);
1115 words = GET_MODE_SIZE (orig_mode);
1116 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1117
1118 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1119 simplify_gen_subreg_concatn (word_mode, reg,
1120 orig_mode, 0),
1121 0);
1122 df_insn_rescan (insn);
1123 gcc_assert (ret != 0);
1124
1125 for (i = words - 1; i > 0; --i)
1126 {
1127 rtx x;
1128
1129 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1130 i * UNITS_PER_WORD);
1131 x = gen_rtx_CLOBBER (VOIDmode, x);
1132 emit_insn_after (x, insn);
1133 }
1134
1135 resolve_reg_notes (insn);
1136
1137 return true;
1138 }
1139
1140 /* A USE of a decomposed register is no longer meaningful. Return
1141 whether we changed something. */
1142
1143 static bool
1144 resolve_use (rtx pat, rtx_insn *insn)
1145 {
1146 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1147 {
1148 delete_insn (insn);
1149 return true;
1150 }
1151
1152 resolve_reg_notes (insn);
1153
1154 return false;
1155 }
1156
1157 /* A VAR_LOCATION can be simplified. */
1158
1159 static void
1160 resolve_debug (rtx_insn *insn)
1161 {
1162 subrtx_ptr_iterator::array_type array;
1163 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1164 {
1165 rtx *loc = *iter;
1166 rtx x = *loc;
1167 if (resolve_subreg_p (x))
1168 {
1169 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1170 SUBREG_BYTE (x));
1171
1172 if (x)
1173 *loc = x;
1174 else
1175 x = copy_rtx (*loc);
1176 }
1177 if (resolve_reg_p (x))
1178 *loc = copy_rtx (x);
1179 }
1180
1181 df_insn_rescan (insn);
1182
1183 resolve_reg_notes (insn);
1184 }
1185
1186 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1187 set the decomposable_context bitmap accordingly. SPEED_P is true
1188 if we are optimizing INSN for speed rather than size. Return true
1189 if INSN is decomposable. */
1190
1191 static bool
1192 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1193 {
1194 rtx set;
1195 rtx op;
1196 rtx op_operand;
1197
1198 set = single_set (insn);
1199 if (!set)
1200 return false;
1201
1202 op = SET_SRC (set);
1203 if (GET_CODE (op) != ASHIFT
1204 && GET_CODE (op) != LSHIFTRT
1205 && GET_CODE (op) != ASHIFTRT
1206 && GET_CODE (op) != ZERO_EXTEND)
1207 return false;
1208
1209 op_operand = XEXP (op, 0);
1210 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1211 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1212 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1213 || GET_MODE (op) != twice_word_mode)
1214 return false;
1215
1216 if (GET_CODE (op) == ZERO_EXTEND)
1217 {
1218 if (GET_MODE (op_operand) != word_mode
1219 || !choices[speed_p].splitting_zext)
1220 return false;
1221 }
1222 else /* left or right shift */
1223 {
1224 bool *splitting = (GET_CODE (op) == ASHIFT
1225 ? choices[speed_p].splitting_ashift
1226 : GET_CODE (op) == ASHIFTRT
1227 ? choices[speed_p].splitting_ashiftrt
1228 : choices[speed_p].splitting_lshiftrt);
1229 if (!CONST_INT_P (XEXP (op, 1))
1230 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1231 2 * BITS_PER_WORD - 1)
1232 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1233 return false;
1234
1235 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1236 }
1237
1238 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1239
1240 return true;
1241 }
1242
1243 /* Decompose a more than word wide shift (in INSN) of a multiword
1244 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1245 and 'set to zero' insn. Return a pointer to the new insn when a
1246 replacement was done. */
1247
1248 static rtx_insn *
1249 resolve_shift_zext (rtx_insn *insn)
1250 {
1251 rtx set;
1252 rtx op;
1253 rtx op_operand;
1254 rtx_insn *insns;
1255 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1256 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1257
1258 set = single_set (insn);
1259 if (!set)
1260 return NULL;
1261
1262 op = SET_SRC (set);
1263 if (GET_CODE (op) != ASHIFT
1264 && GET_CODE (op) != LSHIFTRT
1265 && GET_CODE (op) != ASHIFTRT
1266 && GET_CODE (op) != ZERO_EXTEND)
1267 return NULL;
1268
1269 op_operand = XEXP (op, 0);
1270
1271 /* We can tear this operation apart only if the regs were already
1272 torn apart. */
1273 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1274 return NULL;
1275
1276 /* src_reg_num is the number of the word mode register which we
1277 are operating on. For a left shift and a zero_extend on little
1278 endian machines this is register 0. */
1279 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1280 ? 1 : 0;
1281
1282 if (WORDS_BIG_ENDIAN
1283 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1284 src_reg_num = 1 - src_reg_num;
1285
1286 if (GET_CODE (op) == ZERO_EXTEND)
1287 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1288 else
1289 dest_reg_num = 1 - src_reg_num;
1290
1291 offset1 = UNITS_PER_WORD * dest_reg_num;
1292 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1293 src_offset = UNITS_PER_WORD * src_reg_num;
1294
1295 start_sequence ();
1296
1297 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1298 GET_MODE (SET_DEST (set)),
1299 offset1);
1300 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1301 GET_MODE (SET_DEST (set)),
1302 offset2);
1303 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1304 GET_MODE (op_operand),
1305 src_offset);
1306 if (GET_CODE (op) == ASHIFTRT
1307 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1308 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1309 BITS_PER_WORD - 1, NULL_RTX, 0);
1310
1311 if (GET_CODE (op) != ZERO_EXTEND)
1312 {
1313 int shift_count = INTVAL (XEXP (op, 1));
1314 if (shift_count > BITS_PER_WORD)
1315 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1316 LSHIFT_EXPR : RSHIFT_EXPR,
1317 word_mode, src_reg,
1318 shift_count - BITS_PER_WORD,
1319 dest_reg, GET_CODE (op) != ASHIFTRT);
1320 }
1321
1322 if (dest_reg != src_reg)
1323 emit_move_insn (dest_reg, src_reg);
1324 if (GET_CODE (op) != ASHIFTRT)
1325 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1326 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1327 emit_move_insn (dest_upper, copy_rtx (src_reg));
1328 else
1329 emit_move_insn (dest_upper, upper_src);
1330 insns = get_insns ();
1331
1332 end_sequence ();
1333
1334 emit_insn_before (insns, insn);
1335
1336 if (dump_file)
1337 {
1338 rtx_insn *in;
1339 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1340 for (in = insns; in != insn; in = NEXT_INSN (in))
1341 fprintf (dump_file, "%d ", INSN_UID (in));
1342 fprintf (dump_file, "\n");
1343 }
1344
1345 delete_insn (insn);
1346 return insns;
1347 }
1348
1349 /* Print to dump_file a description of what we're doing with shift code CODE.
1350 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1351
1352 static void
1353 dump_shift_choices (enum rtx_code code, bool *splitting)
1354 {
1355 int i;
1356 const char *sep;
1357
1358 fprintf (dump_file,
1359 " Splitting mode %s for %s lowering with shift amounts = ",
1360 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1361 sep = "";
1362 for (i = 0; i < BITS_PER_WORD; i++)
1363 if (splitting[i])
1364 {
1365 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1366 sep = ",";
1367 }
1368 fprintf (dump_file, "\n");
1369 }
1370
1371 /* Print to dump_file a description of what we're doing when optimizing
1372 for speed or size; SPEED_P says which. DESCRIPTION is a description
1373 of the SPEED_P choice. */
1374
1375 static void
1376 dump_choices (bool speed_p, const char *description)
1377 {
1378 unsigned int i;
1379
1380 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1381
1382 for (i = 0; i < MAX_MACHINE_MODE; i++)
1383 if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
1384 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1385 choices[speed_p].move_modes_to_split[i]
1386 ? "Splitting"
1387 : "Skipping",
1388 GET_MODE_NAME ((machine_mode) i));
1389
1390 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1391 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1392 GET_MODE_NAME (twice_word_mode));
1393
1394 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1395 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1396 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1397 fprintf (dump_file, "\n");
1398 }
1399
1400 /* Look for registers which are always accessed via word-sized SUBREGs
1401 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1402 registers into several word-sized pseudo-registers. */
1403
1404 static void
1405 decompose_multiword_subregs (bool decompose_copies)
1406 {
1407 unsigned int max;
1408 basic_block bb;
1409 bool speed_p;
1410
1411 if (dump_file)
1412 {
1413 dump_choices (false, "size");
1414 dump_choices (true, "speed");
1415 }
1416
1417 /* Check if this target even has any modes to consider lowering. */
1418 if (!choices[false].something_to_do && !choices[true].something_to_do)
1419 {
1420 if (dump_file)
1421 fprintf (dump_file, "Nothing to do!\n");
1422 return;
1423 }
1424
1425 max = max_reg_num ();
1426
1427 /* First see if there are any multi-word pseudo-registers. If there
1428 aren't, there is nothing we can do. This should speed up this
1429 pass in the normal case, since it should be faster than scanning
1430 all the insns. */
1431 {
1432 unsigned int i;
1433 bool useful_modes_seen = false;
1434
1435 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1436 if (regno_reg_rtx[i] != NULL)
1437 {
1438 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1439 if (choices[false].move_modes_to_split[(int) mode]
1440 || choices[true].move_modes_to_split[(int) mode])
1441 {
1442 useful_modes_seen = true;
1443 break;
1444 }
1445 }
1446
1447 if (!useful_modes_seen)
1448 {
1449 if (dump_file)
1450 fprintf (dump_file, "Nothing to lower in this function.\n");
1451 return;
1452 }
1453 }
1454
1455 if (df)
1456 {
1457 df_set_flags (DF_DEFER_INSN_RESCAN);
1458 run_word_dce ();
1459 }
1460
1461 /* FIXME: It may be possible to change this code to look for each
1462 multi-word pseudo-register and to find each insn which sets or
1463 uses that register. That should be faster than scanning all the
1464 insns. */
1465
1466 decomposable_context = BITMAP_ALLOC (NULL);
1467 non_decomposable_context = BITMAP_ALLOC (NULL);
1468 subreg_context = BITMAP_ALLOC (NULL);
1469
1470 reg_copy_graph.create (max);
1471 reg_copy_graph.safe_grow_cleared (max);
1472 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1473
1474 speed_p = optimize_function_for_speed_p (cfun);
1475 FOR_EACH_BB_FN (bb, cfun)
1476 {
1477 rtx_insn *insn;
1478
1479 FOR_BB_INSNS (bb, insn)
1480 {
1481 rtx set;
1482 enum classify_move_insn cmi;
1483 int i, n;
1484
1485 if (!INSN_P (insn)
1486 || GET_CODE (PATTERN (insn)) == CLOBBER
1487 || GET_CODE (PATTERN (insn)) == USE)
1488 continue;
1489
1490 recog_memoized (insn);
1491
1492 if (find_decomposable_shift_zext (insn, speed_p))
1493 continue;
1494
1495 extract_insn (insn);
1496
1497 set = simple_move (insn, speed_p);
1498
1499 if (!set)
1500 cmi = NOT_SIMPLE_MOVE;
1501 else
1502 {
1503 /* We mark pseudo-to-pseudo copies as decomposable during the
1504 second pass only. The first pass is so early that there is
1505 good chance such moves will be optimized away completely by
1506 subsequent optimizations anyway.
1507
1508 However, we call find_pseudo_copy even during the first pass
1509 so as to properly set up the reg_copy_graph. */
1510 if (find_pseudo_copy (set))
1511 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1512 else
1513 cmi = SIMPLE_MOVE;
1514 }
1515
1516 n = recog_data.n_operands;
1517 for (i = 0; i < n; ++i)
1518 {
1519 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1520
1521 /* We handle ASM_OPERANDS as a special case to support
1522 things like x86 rdtsc which returns a DImode value.
1523 We can decompose the output, which will certainly be
1524 operand 0, but not the inputs. */
1525
1526 if (cmi == SIMPLE_MOVE
1527 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1528 {
1529 gcc_assert (i == 0);
1530 cmi = NOT_SIMPLE_MOVE;
1531 }
1532 }
1533 }
1534 }
1535
1536 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1537 if (!bitmap_empty_p (decomposable_context))
1538 {
1539 sbitmap sub_blocks;
1540 unsigned int i;
1541 sbitmap_iterator sbi;
1542 bitmap_iterator iter;
1543 unsigned int regno;
1544
1545 propagate_pseudo_copies ();
1546
1547 sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
1548 bitmap_clear (sub_blocks);
1549
1550 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1551 decompose_register (regno);
1552
1553 FOR_EACH_BB_FN (bb, cfun)
1554 {
1555 rtx_insn *insn;
1556
1557 FOR_BB_INSNS (bb, insn)
1558 {
1559 rtx pat;
1560
1561 if (!INSN_P (insn))
1562 continue;
1563
1564 pat = PATTERN (insn);
1565 if (GET_CODE (pat) == CLOBBER)
1566 resolve_clobber (pat, insn);
1567 else if (GET_CODE (pat) == USE)
1568 resolve_use (pat, insn);
1569 else if (DEBUG_INSN_P (insn))
1570 resolve_debug (insn);
1571 else
1572 {
1573 rtx set;
1574 int i;
1575
1576 recog_memoized (insn);
1577 extract_insn (insn);
1578
1579 set = simple_move (insn, speed_p);
1580 if (set)
1581 {
1582 rtx_insn *orig_insn = insn;
1583 bool cfi = control_flow_insn_p (insn);
1584
1585 /* We can end up splitting loads to multi-word pseudos
1586 into separate loads to machine word size pseudos.
1587 When this happens, we first had one load that can
1588 throw, and after resolve_simple_move we'll have a
1589 bunch of loads (at least two). All those loads may
1590 trap if we can have non-call exceptions, so they
1591 all will end the current basic block. We split the
1592 block after the outer loop over all insns, but we
1593 make sure here that we will be able to split the
1594 basic block and still produce the correct control
1595 flow graph for it. */
1596 gcc_assert (!cfi
1597 || (cfun->can_throw_non_call_exceptions
1598 && can_throw_internal (insn)));
1599
1600 insn = resolve_simple_move (set, insn);
1601 if (insn != orig_insn)
1602 {
1603 recog_memoized (insn);
1604 extract_insn (insn);
1605
1606 if (cfi)
1607 bitmap_set_bit (sub_blocks, bb->index);
1608 }
1609 }
1610 else
1611 {
1612 rtx_insn *decomposed_shift;
1613
1614 decomposed_shift = resolve_shift_zext (insn);
1615 if (decomposed_shift != NULL_RTX)
1616 {
1617 insn = decomposed_shift;
1618 recog_memoized (insn);
1619 extract_insn (insn);
1620 }
1621 }
1622
1623 for (i = recog_data.n_operands - 1; i >= 0; --i)
1624 resolve_subreg_use (recog_data.operand_loc[i], insn);
1625
1626 resolve_reg_notes (insn);
1627
1628 if (num_validated_changes () > 0)
1629 {
1630 for (i = recog_data.n_dups - 1; i >= 0; --i)
1631 {
1632 rtx *pl = recog_data.dup_loc[i];
1633 int dup_num = recog_data.dup_num[i];
1634 rtx *px = recog_data.operand_loc[dup_num];
1635
1636 validate_unshare_change (insn, pl, *px, 1);
1637 }
1638
1639 i = apply_change_group ();
1640 gcc_assert (i);
1641 }
1642 }
1643 }
1644 }
1645
1646 /* If we had insns to split that caused control flow insns in the middle
1647 of a basic block, split those blocks now. Note that we only handle
1648 the case where splitting a load has caused multiple possibly trapping
1649 loads to appear. */
1650 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1651 {
1652 rtx_insn *insn, *end;
1653 edge fallthru;
1654
1655 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1656 insn = BB_HEAD (bb);
1657 end = BB_END (bb);
1658
1659 while (insn != end)
1660 {
1661 if (control_flow_insn_p (insn))
1662 {
1663 /* Split the block after insn. There will be a fallthru
1664 edge, which is OK so we keep it. We have to create the
1665 exception edges ourselves. */
1666 fallthru = split_block (bb, insn);
1667 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1668 bb = fallthru->dest;
1669 insn = BB_HEAD (bb);
1670 }
1671 else
1672 insn = NEXT_INSN (insn);
1673 }
1674 }
1675
1676 sbitmap_free (sub_blocks);
1677 }
1678
1679 {
1680 unsigned int i;
1681 bitmap b;
1682
1683 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1684 if (b)
1685 BITMAP_FREE (b);
1686 }
1687
1688 reg_copy_graph.release ();
1689
1690 BITMAP_FREE (decomposable_context);
1691 BITMAP_FREE (non_decomposable_context);
1692 BITMAP_FREE (subreg_context);
1693 }
1694 \f
1695 /* Implement first lower subreg pass. */
1696
1697 namespace {
1698
1699 const pass_data pass_data_lower_subreg =
1700 {
1701 RTL_PASS, /* type */
1702 "subreg1", /* name */
1703 OPTGROUP_NONE, /* optinfo_flags */
1704 TV_LOWER_SUBREG, /* tv_id */
1705 0, /* properties_required */
1706 0, /* properties_provided */
1707 0, /* properties_destroyed */
1708 0, /* todo_flags_start */
1709 0, /* todo_flags_finish */
1710 };
1711
1712 class pass_lower_subreg : public rtl_opt_pass
1713 {
1714 public:
1715 pass_lower_subreg (gcc::context *ctxt)
1716 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1717 {}
1718
1719 /* opt_pass methods: */
1720 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1721 virtual unsigned int execute (function *)
1722 {
1723 decompose_multiword_subregs (false);
1724 return 0;
1725 }
1726
1727 }; // class pass_lower_subreg
1728
1729 } // anon namespace
1730
1731 rtl_opt_pass *
1732 make_pass_lower_subreg (gcc::context *ctxt)
1733 {
1734 return new pass_lower_subreg (ctxt);
1735 }
1736
1737 /* Implement second lower subreg pass. */
1738
1739 namespace {
1740
1741 const pass_data pass_data_lower_subreg2 =
1742 {
1743 RTL_PASS, /* type */
1744 "subreg2", /* name */
1745 OPTGROUP_NONE, /* optinfo_flags */
1746 TV_LOWER_SUBREG, /* tv_id */
1747 0, /* properties_required */
1748 0, /* properties_provided */
1749 0, /* properties_destroyed */
1750 0, /* todo_flags_start */
1751 TODO_df_finish, /* todo_flags_finish */
1752 };
1753
1754 class pass_lower_subreg2 : public rtl_opt_pass
1755 {
1756 public:
1757 pass_lower_subreg2 (gcc::context *ctxt)
1758 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1759 {}
1760
1761 /* opt_pass methods: */
1762 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1763 virtual unsigned int execute (function *)
1764 {
1765 decompose_multiword_subregs (true);
1766 return 0;
1767 }
1768
1769 }; // class pass_lower_subreg2
1770
1771 } // anon namespace
1772
1773 rtl_opt_pass *
1774 make_pass_lower_subreg2 (gcc::context *ctxt)
1775 {
1776 return new pass_lower_subreg2 (ctxt);
1777 }