re PR c++/13478 (gcc uses wrong constructor to initialize a const reference)
[gcc.git] / gcc / loop.c
1 /* Perform various loop optimizations, including strength reduction.
2 Copyright (C) 1987, 1988, 1989, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
3 1998, 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22 /* This is the loop optimization pass of the compiler.
23 It finds invariant computations within loops and moves them
24 to the beginning of the loop. Then it identifies basic and
25 general induction variables.
26
27 Basic induction variables (BIVs) are a pseudo registers which are set within
28 a loop only by incrementing or decrementing its value. General induction
29 variables (GIVs) are pseudo registers with a value which is a linear function
30 of a basic induction variable. BIVs are recognized by `basic_induction_var';
31 GIVs by `general_induction_var'.
32
33 Once induction variables are identified, strength reduction is applied to the
34 general induction variables, and induction variable elimination is applied to
35 the basic induction variables.
36
37 It also finds cases where
38 a register is set within the loop by zero-extending a narrower value
39 and changes these to zero the entire register once before the loop
40 and merely copy the low part within the loop.
41
42 Most of the complexity is in heuristics to decide when it is worth
43 while to do these things. */
44
45 #include "config.h"
46 #include "system.h"
47 #include "coretypes.h"
48 #include "tm.h"
49 #include "rtl.h"
50 #include "tm_p.h"
51 #include "function.h"
52 #include "expr.h"
53 #include "hard-reg-set.h"
54 #include "basic-block.h"
55 #include "insn-config.h"
56 #include "regs.h"
57 #include "recog.h"
58 #include "flags.h"
59 #include "real.h"
60 #include "loop.h"
61 #include "cselib.h"
62 #include "except.h"
63 #include "toplev.h"
64 #include "predict.h"
65 #include "insn-flags.h"
66 #include "optabs.h"
67 #include "cfgloop.h"
68
69 /* Not really meaningful values, but at least something. */
70 #ifndef SIMULTANEOUS_PREFETCHES
71 #define SIMULTANEOUS_PREFETCHES 3
72 #endif
73 #ifndef PREFETCH_BLOCK
74 #define PREFETCH_BLOCK 32
75 #endif
76 #ifndef HAVE_prefetch
77 #define HAVE_prefetch 0
78 #define CODE_FOR_prefetch 0
79 #define gen_prefetch(a,b,c) (abort(), NULL_RTX)
80 #endif
81
82 /* Give up the prefetch optimizations once we exceed a given threshold.
83 It is unlikely that we would be able to optimize something in a loop
84 with so many detected prefetches. */
85 #define MAX_PREFETCHES 100
86 /* The number of prefetch blocks that are beneficial to fetch at once before
87 a loop with a known (and low) iteration count. */
88 #define PREFETCH_BLOCKS_BEFORE_LOOP_MAX 6
89 /* For very tiny loops it is not worthwhile to prefetch even before the loop,
90 since it is likely that the data are already in the cache. */
91 #define PREFETCH_BLOCKS_BEFORE_LOOP_MIN 2
92
93 /* Parameterize some prefetch heuristics so they can be turned on and off
94 easily for performance testing on new architectures. These can be
95 defined in target-dependent files. */
96
97 /* Prefetch is worthwhile only when loads/stores are dense. */
98 #ifndef PREFETCH_ONLY_DENSE_MEM
99 #define PREFETCH_ONLY_DENSE_MEM 1
100 #endif
101
102 /* Define what we mean by "dense" loads and stores; This value divided by 256
103 is the minimum percentage of memory references that worth prefetching. */
104 #ifndef PREFETCH_DENSE_MEM
105 #define PREFETCH_DENSE_MEM 220
106 #endif
107
108 /* Do not prefetch for a loop whose iteration count is known to be low. */
109 #ifndef PREFETCH_NO_LOW_LOOPCNT
110 #define PREFETCH_NO_LOW_LOOPCNT 1
111 #endif
112
113 /* Define what we mean by a "low" iteration count. */
114 #ifndef PREFETCH_LOW_LOOPCNT
115 #define PREFETCH_LOW_LOOPCNT 32
116 #endif
117
118 /* Do not prefetch for a loop that contains a function call; such a loop is
119 probably not an internal loop. */
120 #ifndef PREFETCH_NO_CALL
121 #define PREFETCH_NO_CALL 1
122 #endif
123
124 /* Do not prefetch accesses with an extreme stride. */
125 #ifndef PREFETCH_NO_EXTREME_STRIDE
126 #define PREFETCH_NO_EXTREME_STRIDE 1
127 #endif
128
129 /* Define what we mean by an "extreme" stride. */
130 #ifndef PREFETCH_EXTREME_STRIDE
131 #define PREFETCH_EXTREME_STRIDE 4096
132 #endif
133
134 /* Define a limit to how far apart indices can be and still be merged
135 into a single prefetch. */
136 #ifndef PREFETCH_EXTREME_DIFFERENCE
137 #define PREFETCH_EXTREME_DIFFERENCE 4096
138 #endif
139
140 /* Issue prefetch instructions before the loop to fetch data to be used
141 in the first few loop iterations. */
142 #ifndef PREFETCH_BEFORE_LOOP
143 #define PREFETCH_BEFORE_LOOP 1
144 #endif
145
146 /* Do not handle reversed order prefetches (negative stride). */
147 #ifndef PREFETCH_NO_REVERSE_ORDER
148 #define PREFETCH_NO_REVERSE_ORDER 1
149 #endif
150
151 /* Prefetch even if the GIV is in conditional code. */
152 #ifndef PREFETCH_CONDITIONAL
153 #define PREFETCH_CONDITIONAL 1
154 #endif
155
156 #define LOOP_REG_LIFETIME(LOOP, REGNO) \
157 ((REGNO_LAST_LUID (REGNO) - REGNO_FIRST_LUID (REGNO)))
158
159 #define LOOP_REG_GLOBAL_P(LOOP, REGNO) \
160 ((REGNO_LAST_LUID (REGNO) > INSN_LUID ((LOOP)->end) \
161 || REGNO_FIRST_LUID (REGNO) < INSN_LUID ((LOOP)->start)))
162
163 #define LOOP_REGNO_NREGS(REGNO, SET_DEST) \
164 ((REGNO) < FIRST_PSEUDO_REGISTER \
165 ? (int) HARD_REGNO_NREGS ((REGNO), GET_MODE (SET_DEST)) : 1)
166
167
168 /* Vector mapping INSN_UIDs to luids.
169 The luids are like uids but increase monotonically always.
170 We use them to see whether a jump comes from outside a given loop. */
171
172 int *uid_luid;
173
174 /* Indexed by INSN_UID, contains the ordinal giving the (innermost) loop
175 number the insn is contained in. */
176
177 struct loop **uid_loop;
178
179 /* 1 + largest uid of any insn. */
180
181 int max_uid_for_loop;
182
183 /* Number of loops detected in current function. Used as index to the
184 next few tables. */
185
186 static int max_loop_num;
187
188 /* Bound on pseudo register number before loop optimization.
189 A pseudo has valid regscan info if its number is < max_reg_before_loop. */
190 unsigned int max_reg_before_loop;
191
192 /* The value to pass to the next call of reg_scan_update. */
193 static int loop_max_reg;
194 \f
195 /* During the analysis of a loop, a chain of `struct movable's
196 is made to record all the movable insns found.
197 Then the entire chain can be scanned to decide which to move. */
198
199 struct movable
200 {
201 rtx insn; /* A movable insn */
202 rtx set_src; /* The expression this reg is set from. */
203 rtx set_dest; /* The destination of this SET. */
204 rtx dependencies; /* When INSN is libcall, this is an EXPR_LIST
205 of any registers used within the LIBCALL. */
206 int consec; /* Number of consecutive following insns
207 that must be moved with this one. */
208 unsigned int regno; /* The register it sets */
209 short lifetime; /* lifetime of that register;
210 may be adjusted when matching movables
211 that load the same value are found. */
212 short savings; /* Number of insns we can move for this reg,
213 including other movables that force this
214 or match this one. */
215 ENUM_BITFIELD(machine_mode) savemode : 8; /* Nonzero means it is a mode for
216 a low part that we should avoid changing when
217 clearing the rest of the reg. */
218 unsigned int cond : 1; /* 1 if only conditionally movable */
219 unsigned int force : 1; /* 1 means MUST move this insn */
220 unsigned int global : 1; /* 1 means reg is live outside this loop */
221 /* If PARTIAL is 1, GLOBAL means something different:
222 that the reg is live outside the range from where it is set
223 to the following label. */
224 unsigned int done : 1; /* 1 inhibits further processing of this */
225
226 unsigned int partial : 1; /* 1 means this reg is used for zero-extending.
227 In particular, moving it does not make it
228 invariant. */
229 unsigned int move_insn : 1; /* 1 means that we call emit_move_insn to
230 load SRC, rather than copying INSN. */
231 unsigned int move_insn_first:1;/* Same as above, if this is necessary for the
232 first insn of a consecutive sets group. */
233 unsigned int is_equiv : 1; /* 1 means a REG_EQUIV is present on INSN. */
234 unsigned int insert_temp : 1; /* 1 means we copy to a new pseudo and replace
235 the original insn with a copy from that
236 pseudo, rather than deleting it. */
237 struct movable *match; /* First entry for same value */
238 struct movable *forces; /* An insn that must be moved if this is */
239 struct movable *next;
240 };
241
242
243 FILE *loop_dump_stream;
244
245 /* Forward declarations. */
246
247 static void invalidate_loops_containing_label (rtx);
248 static void find_and_verify_loops (rtx, struct loops *);
249 static void mark_loop_jump (rtx, struct loop *);
250 static void prescan_loop (struct loop *);
251 static int reg_in_basic_block_p (rtx, rtx);
252 static int consec_sets_invariant_p (const struct loop *, rtx, int, rtx);
253 static int labels_in_range_p (rtx, int);
254 static void count_one_set (struct loop_regs *, rtx, rtx, rtx *);
255 static void note_addr_stored (rtx, rtx, void *);
256 static void note_set_pseudo_multiple_uses (rtx, rtx, void *);
257 static int loop_reg_used_before_p (const struct loop *, rtx, rtx);
258 static rtx find_regs_nested (rtx, rtx);
259 static void scan_loop (struct loop*, int);
260 #if 0
261 static void replace_call_address (rtx, rtx, rtx);
262 #endif
263 static rtx skip_consec_insns (rtx, int);
264 static int libcall_benefit (rtx);
265 static void ignore_some_movables (struct loop_movables *);
266 static void force_movables (struct loop_movables *);
267 static void combine_movables (struct loop_movables *, struct loop_regs *);
268 static int num_unmoved_movables (const struct loop *);
269 static int regs_match_p (rtx, rtx, struct loop_movables *);
270 static int rtx_equal_for_loop_p (rtx, rtx, struct loop_movables *,
271 struct loop_regs *);
272 static void add_label_notes (rtx, rtx);
273 static void move_movables (struct loop *loop, struct loop_movables *, int,
274 int);
275 static void loop_movables_add (struct loop_movables *, struct movable *);
276 static void loop_movables_free (struct loop_movables *);
277 static int count_nonfixed_reads (const struct loop *, rtx);
278 static void loop_bivs_find (struct loop *);
279 static void loop_bivs_init_find (struct loop *);
280 static void loop_bivs_check (struct loop *);
281 static void loop_givs_find (struct loop *);
282 static void loop_givs_check (struct loop *);
283 static int loop_biv_eliminable_p (struct loop *, struct iv_class *, int, int);
284 static int loop_giv_reduce_benefit (struct loop *, struct iv_class *,
285 struct induction *, rtx);
286 static void loop_givs_dead_check (struct loop *, struct iv_class *);
287 static void loop_givs_reduce (struct loop *, struct iv_class *);
288 static void loop_givs_rescan (struct loop *, struct iv_class *, rtx *);
289 static void loop_ivs_free (struct loop *);
290 static void strength_reduce (struct loop *, int);
291 static void find_single_use_in_loop (struct loop_regs *, rtx, rtx);
292 static int valid_initial_value_p (rtx, rtx, int, rtx);
293 static void find_mem_givs (const struct loop *, rtx, rtx, int, int);
294 static void record_biv (struct loop *, struct induction *, rtx, rtx, rtx,
295 rtx, rtx *, int, int);
296 static void check_final_value (const struct loop *, struct induction *);
297 static void loop_ivs_dump (const struct loop *, FILE *, int);
298 static void loop_iv_class_dump (const struct iv_class *, FILE *, int);
299 static void loop_biv_dump (const struct induction *, FILE *, int);
300 static void loop_giv_dump (const struct induction *, FILE *, int);
301 static void record_giv (const struct loop *, struct induction *, rtx, rtx,
302 rtx, rtx, rtx, rtx, int, enum g_types, int, int,
303 rtx *);
304 static void update_giv_derive (const struct loop *, rtx);
305 static void check_ext_dependent_givs (const struct loop *, struct iv_class *);
306 static int basic_induction_var (const struct loop *, rtx, enum machine_mode,
307 rtx, rtx, rtx *, rtx *, rtx **);
308 static rtx simplify_giv_expr (const struct loop *, rtx, rtx *, int *);
309 static int general_induction_var (const struct loop *loop, rtx, rtx *, rtx *,
310 rtx *, rtx *, int, int *, enum machine_mode);
311 static int consec_sets_giv (const struct loop *, int, rtx, rtx, rtx, rtx *,
312 rtx *, rtx *, rtx *);
313 static int check_dbra_loop (struct loop *, int);
314 static rtx express_from_1 (rtx, rtx, rtx);
315 static rtx combine_givs_p (struct induction *, struct induction *);
316 static int cmp_combine_givs_stats (const void *, const void *);
317 static void combine_givs (struct loop_regs *, struct iv_class *);
318 static int product_cheap_p (rtx, rtx);
319 static int maybe_eliminate_biv (const struct loop *, struct iv_class *, int,
320 int, int);
321 static int maybe_eliminate_biv_1 (const struct loop *, rtx, rtx,
322 struct iv_class *, int, basic_block, rtx);
323 static int last_use_this_basic_block (rtx, rtx);
324 static void record_initial (rtx, rtx, void *);
325 static void update_reg_last_use (rtx, rtx);
326 static rtx next_insn_in_loop (const struct loop *, rtx);
327 static void loop_regs_scan (const struct loop *, int);
328 static int count_insns_in_loop (const struct loop *);
329 static int find_mem_in_note_1 (rtx *, void *);
330 static rtx find_mem_in_note (rtx);
331 static void load_mems (const struct loop *);
332 static int insert_loop_mem (rtx *, void *);
333 static int replace_loop_mem (rtx *, void *);
334 static void replace_loop_mems (rtx, rtx, rtx, int);
335 static int replace_loop_reg (rtx *, void *);
336 static void replace_loop_regs (rtx insn, rtx, rtx);
337 static void note_reg_stored (rtx, rtx, void *);
338 static void try_copy_prop (const struct loop *, rtx, unsigned int);
339 static void try_swap_copy_prop (const struct loop *, rtx, unsigned int);
340 static rtx check_insn_for_givs (struct loop *, rtx, int, int);
341 static rtx check_insn_for_bivs (struct loop *, rtx, int, int);
342 static rtx gen_add_mult (rtx, rtx, rtx, rtx);
343 static void loop_regs_update (const struct loop *, rtx);
344 static int iv_add_mult_cost (rtx, rtx, rtx, rtx);
345
346 static rtx loop_insn_emit_after (const struct loop *, basic_block, rtx, rtx);
347 static rtx loop_call_insn_emit_before (const struct loop *, basic_block,
348 rtx, rtx);
349 static rtx loop_call_insn_hoist (const struct loop *, rtx);
350 static rtx loop_insn_sink_or_swim (const struct loop *, rtx);
351
352 static void loop_dump_aux (const struct loop *, FILE *, int);
353 static void loop_delete_insns (rtx, rtx);
354 static HOST_WIDE_INT remove_constant_addition (rtx *);
355 static rtx gen_load_of_final_value (rtx, rtx);
356 void debug_ivs (const struct loop *);
357 void debug_iv_class (const struct iv_class *);
358 void debug_biv (const struct induction *);
359 void debug_giv (const struct induction *);
360 void debug_loop (const struct loop *);
361 void debug_loops (const struct loops *);
362
363 typedef struct loop_replace_args
364 {
365 rtx match;
366 rtx replacement;
367 rtx insn;
368 } loop_replace_args;
369
370 /* Nonzero iff INSN is between START and END, inclusive. */
371 #define INSN_IN_RANGE_P(INSN, START, END) \
372 (INSN_UID (INSN) < max_uid_for_loop \
373 && INSN_LUID (INSN) >= INSN_LUID (START) \
374 && INSN_LUID (INSN) <= INSN_LUID (END))
375
376 /* Indirect_jump_in_function is computed once per function. */
377 static int indirect_jump_in_function;
378 static int indirect_jump_in_function_p (rtx);
379
380 static int compute_luids (rtx, rtx, int);
381
382 static int biv_elimination_giv_has_0_offset (struct induction *,
383 struct induction *, rtx);
384 \f
385 /* Benefit penalty, if a giv is not replaceable, i.e. must emit an insn to
386 copy the value of the strength reduced giv to its original register. */
387 static int copy_cost;
388
389 /* Cost of using a register, to normalize the benefits of a giv. */
390 static int reg_address_cost;
391
392 void
393 init_loop (void)
394 {
395 rtx reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
396
397 reg_address_cost = address_cost (reg, SImode);
398
399 copy_cost = COSTS_N_INSNS (1);
400 }
401 \f
402 /* Compute the mapping from uids to luids.
403 LUIDs are numbers assigned to insns, like uids,
404 except that luids increase monotonically through the code.
405 Start at insn START and stop just before END. Assign LUIDs
406 starting with PREV_LUID + 1. Return the last assigned LUID + 1. */
407 static int
408 compute_luids (rtx start, rtx end, int prev_luid)
409 {
410 int i;
411 rtx insn;
412
413 for (insn = start, i = prev_luid; insn != end; insn = NEXT_INSN (insn))
414 {
415 if (INSN_UID (insn) >= max_uid_for_loop)
416 continue;
417 /* Don't assign luids to line-number NOTEs, so that the distance in
418 luids between two insns is not affected by -g. */
419 if (GET_CODE (insn) != NOTE
420 || NOTE_LINE_NUMBER (insn) <= 0)
421 uid_luid[INSN_UID (insn)] = ++i;
422 else
423 /* Give a line number note the same luid as preceding insn. */
424 uid_luid[INSN_UID (insn)] = i;
425 }
426 return i + 1;
427 }
428 \f
429 /* Entry point of this file. Perform loop optimization
430 on the current function. F is the first insn of the function
431 and DUMPFILE is a stream for output of a trace of actions taken
432 (or 0 if none should be output). */
433
434 void
435 loop_optimize (rtx f, FILE *dumpfile, int flags)
436 {
437 rtx insn;
438 int i;
439 struct loops loops_data;
440 struct loops *loops = &loops_data;
441 struct loop_info *loops_info;
442
443 loop_dump_stream = dumpfile;
444
445 init_recog_no_volatile ();
446
447 max_reg_before_loop = max_reg_num ();
448 loop_max_reg = max_reg_before_loop;
449
450 regs_may_share = 0;
451
452 /* Count the number of loops. */
453
454 max_loop_num = 0;
455 for (insn = f; insn; insn = NEXT_INSN (insn))
456 {
457 if (GET_CODE (insn) == NOTE
458 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
459 max_loop_num++;
460 }
461
462 /* Don't waste time if no loops. */
463 if (max_loop_num == 0)
464 return;
465
466 loops->num = max_loop_num;
467
468 /* Get size to use for tables indexed by uids.
469 Leave some space for labels allocated by find_and_verify_loops. */
470 max_uid_for_loop = get_max_uid () + 1 + max_loop_num * 32;
471
472 uid_luid = xcalloc (max_uid_for_loop, sizeof (int));
473 uid_loop = xcalloc (max_uid_for_loop, sizeof (struct loop *));
474
475 /* Allocate storage for array of loops. */
476 loops->array = xcalloc (loops->num, sizeof (struct loop));
477
478 /* Find and process each loop.
479 First, find them, and record them in order of their beginnings. */
480 find_and_verify_loops (f, loops);
481
482 /* Allocate and initialize auxiliary loop information. */
483 loops_info = xcalloc (loops->num, sizeof (struct loop_info));
484 for (i = 0; i < (int) loops->num; i++)
485 loops->array[i].aux = loops_info + i;
486
487 /* Now find all register lifetimes. This must be done after
488 find_and_verify_loops, because it might reorder the insns in the
489 function. */
490 reg_scan (f, max_reg_before_loop, 1);
491
492 /* This must occur after reg_scan so that registers created by gcse
493 will have entries in the register tables.
494
495 We could have added a call to reg_scan after gcse_main in toplev.c,
496 but moving this call to init_alias_analysis is more efficient. */
497 init_alias_analysis ();
498
499 /* See if we went too far. Note that get_max_uid already returns
500 one more that the maximum uid of all insn. */
501 if (get_max_uid () > max_uid_for_loop)
502 abort ();
503 /* Now reset it to the actual size we need. See above. */
504 max_uid_for_loop = get_max_uid ();
505
506 /* find_and_verify_loops has already called compute_luids, but it
507 might have rearranged code afterwards, so we need to recompute
508 the luids now. */
509 compute_luids (f, NULL_RTX, 0);
510
511 /* Don't leave gaps in uid_luid for insns that have been
512 deleted. It is possible that the first or last insn
513 using some register has been deleted by cross-jumping.
514 Make sure that uid_luid for that former insn's uid
515 points to the general area where that insn used to be. */
516 for (i = 0; i < max_uid_for_loop; i++)
517 {
518 uid_luid[0] = uid_luid[i];
519 if (uid_luid[0] != 0)
520 break;
521 }
522 for (i = 0; i < max_uid_for_loop; i++)
523 if (uid_luid[i] == 0)
524 uid_luid[i] = uid_luid[i - 1];
525
526 /* Determine if the function has indirect jump. On some systems
527 this prevents low overhead loop instructions from being used. */
528 indirect_jump_in_function = indirect_jump_in_function_p (f);
529
530 /* Now scan the loops, last ones first, since this means inner ones are done
531 before outer ones. */
532 for (i = max_loop_num - 1; i >= 0; i--)
533 {
534 struct loop *loop = &loops->array[i];
535
536 if (! loop->invalid && loop->end)
537 scan_loop (loop, flags);
538 }
539
540 end_alias_analysis ();
541
542 /* Clean up. */
543 for (i = 0; i < (int) loops->num; i++)
544 free (loops_info[i].mems);
545
546 free (uid_luid);
547 free (uid_loop);
548 free (loops_info);
549 free (loops->array);
550 }
551 \f
552 /* Returns the next insn, in execution order, after INSN. START and
553 END are the NOTE_INSN_LOOP_BEG and NOTE_INSN_LOOP_END for the loop,
554 respectively. LOOP->TOP, if non-NULL, is the top of the loop in the
555 insn-stream; it is used with loops that are entered near the
556 bottom. */
557
558 static rtx
559 next_insn_in_loop (const struct loop *loop, rtx insn)
560 {
561 insn = NEXT_INSN (insn);
562
563 if (insn == loop->end)
564 {
565 if (loop->top)
566 /* Go to the top of the loop, and continue there. */
567 insn = loop->top;
568 else
569 /* We're done. */
570 insn = NULL_RTX;
571 }
572
573 if (insn == loop->scan_start)
574 /* We're done. */
575 insn = NULL_RTX;
576
577 return insn;
578 }
579
580 /* Find any register references hidden inside X and add them to
581 the dependency list DEPS. This is used to look inside CLOBBER (MEM
582 when checking whether a PARALLEL can be pulled out of a loop. */
583
584 static rtx
585 find_regs_nested (rtx deps, rtx x)
586 {
587 enum rtx_code code = GET_CODE (x);
588 if (code == REG)
589 deps = gen_rtx_EXPR_LIST (VOIDmode, x, deps);
590 else
591 {
592 const char *fmt = GET_RTX_FORMAT (code);
593 int i, j;
594 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
595 {
596 if (fmt[i] == 'e')
597 deps = find_regs_nested (deps, XEXP (x, i));
598 else if (fmt[i] == 'E')
599 for (j = 0; j < XVECLEN (x, i); j++)
600 deps = find_regs_nested (deps, XVECEXP (x, i, j));
601 }
602 }
603 return deps;
604 }
605
606 /* Optimize one loop described by LOOP. */
607
608 /* ??? Could also move memory writes out of loops if the destination address
609 is invariant, the source is invariant, the memory write is not volatile,
610 and if we can prove that no read inside the loop can read this address
611 before the write occurs. If there is a read of this address after the
612 write, then we can also mark the memory read as invariant. */
613
614 static void
615 scan_loop (struct loop *loop, int flags)
616 {
617 struct loop_info *loop_info = LOOP_INFO (loop);
618 struct loop_regs *regs = LOOP_REGS (loop);
619 int i;
620 rtx loop_start = loop->start;
621 rtx loop_end = loop->end;
622 rtx p;
623 /* 1 if we are scanning insns that could be executed zero times. */
624 int maybe_never = 0;
625 /* 1 if we are scanning insns that might never be executed
626 due to a subroutine call which might exit before they are reached. */
627 int call_passed = 0;
628 /* Number of insns in the loop. */
629 int insn_count;
630 int tem;
631 rtx temp, update_start, update_end;
632 /* The SET from an insn, if it is the only SET in the insn. */
633 rtx set, set1;
634 /* Chain describing insns movable in current loop. */
635 struct loop_movables *movables = LOOP_MOVABLES (loop);
636 /* Ratio of extra register life span we can justify
637 for saving an instruction. More if loop doesn't call subroutines
638 since in that case saving an insn makes more difference
639 and more registers are available. */
640 int threshold;
641 /* Nonzero if we are scanning instructions in a sub-loop. */
642 int loop_depth = 0;
643 int in_libcall;
644
645 loop->top = 0;
646
647 movables->head = 0;
648 movables->last = 0;
649
650 /* Determine whether this loop starts with a jump down to a test at
651 the end. This will occur for a small number of loops with a test
652 that is too complex to duplicate in front of the loop.
653
654 We search for the first insn or label in the loop, skipping NOTEs.
655 However, we must be careful not to skip past a NOTE_INSN_LOOP_BEG
656 (because we might have a loop executed only once that contains a
657 loop which starts with a jump to its exit test) or a NOTE_INSN_LOOP_END
658 (in case we have a degenerate loop).
659
660 Note that if we mistakenly think that a loop is entered at the top
661 when, in fact, it is entered at the exit test, the only effect will be
662 slightly poorer optimization. Making the opposite error can generate
663 incorrect code. Since very few loops now start with a jump to the
664 exit test, the code here to detect that case is very conservative. */
665
666 for (p = NEXT_INSN (loop_start);
667 p != loop_end
668 && GET_CODE (p) != CODE_LABEL && ! INSN_P (p)
669 && (GET_CODE (p) != NOTE
670 || (NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_BEG
671 && NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_END));
672 p = NEXT_INSN (p))
673 ;
674
675 loop->scan_start = p;
676
677 /* If loop end is the end of the current function, then emit a
678 NOTE_INSN_DELETED after loop_end and set loop->sink to the dummy
679 note insn. This is the position we use when sinking insns out of
680 the loop. */
681 if (NEXT_INSN (loop->end) != 0)
682 loop->sink = NEXT_INSN (loop->end);
683 else
684 loop->sink = emit_note_after (NOTE_INSN_DELETED, loop->end);
685
686 /* Set up variables describing this loop. */
687 prescan_loop (loop);
688 threshold = (loop_info->has_call ? 1 : 2) * (1 + n_non_fixed_regs);
689
690 /* If loop has a jump before the first label,
691 the true entry is the target of that jump.
692 Start scan from there.
693 But record in LOOP->TOP the place where the end-test jumps
694 back to so we can scan that after the end of the loop. */
695 if (GET_CODE (p) == JUMP_INSN
696 /* Loop entry must be unconditional jump (and not a RETURN) */
697 && any_uncondjump_p (p)
698 && JUMP_LABEL (p) != 0
699 /* Check to see whether the jump actually
700 jumps out of the loop (meaning it's no loop).
701 This case can happen for things like
702 do {..} while (0). If this label was generated previously
703 by loop, we can't tell anything about it and have to reject
704 the loop. */
705 && INSN_IN_RANGE_P (JUMP_LABEL (p), loop_start, loop_end))
706 {
707 loop->top = next_label (loop->scan_start);
708 loop->scan_start = JUMP_LABEL (p);
709 }
710
711 /* If LOOP->SCAN_START was an insn created by loop, we don't know its luid
712 as required by loop_reg_used_before_p. So skip such loops. (This
713 test may never be true, but it's best to play it safe.)
714
715 Also, skip loops where we do not start scanning at a label. This
716 test also rejects loops starting with a JUMP_INSN that failed the
717 test above. */
718
719 if (INSN_UID (loop->scan_start) >= max_uid_for_loop
720 || GET_CODE (loop->scan_start) != CODE_LABEL)
721 {
722 if (loop_dump_stream)
723 fprintf (loop_dump_stream, "\nLoop from %d to %d is phony.\n\n",
724 INSN_UID (loop_start), INSN_UID (loop_end));
725 return;
726 }
727
728 /* Allocate extra space for REGs that might be created by load_mems.
729 We allocate a little extra slop as well, in the hopes that we
730 won't have to reallocate the regs array. */
731 loop_regs_scan (loop, loop_info->mems_idx + 16);
732 insn_count = count_insns_in_loop (loop);
733
734 if (loop_dump_stream)
735 {
736 fprintf (loop_dump_stream, "\nLoop from %d to %d: %d real insns.\n",
737 INSN_UID (loop_start), INSN_UID (loop_end), insn_count);
738 if (loop->cont)
739 fprintf (loop_dump_stream, "Continue at insn %d.\n",
740 INSN_UID (loop->cont));
741 }
742
743 /* Scan through the loop finding insns that are safe to move.
744 Set REGS->ARRAY[I].SET_IN_LOOP negative for the reg I being set, so that
745 this reg will be considered invariant for subsequent insns.
746 We consider whether subsequent insns use the reg
747 in deciding whether it is worth actually moving.
748
749 MAYBE_NEVER is nonzero if we have passed a conditional jump insn
750 and therefore it is possible that the insns we are scanning
751 would never be executed. At such times, we must make sure
752 that it is safe to execute the insn once instead of zero times.
753 When MAYBE_NEVER is 0, all insns will be executed at least once
754 so that is not a problem. */
755
756 for (in_libcall = 0, p = next_insn_in_loop (loop, loop->scan_start);
757 p != NULL_RTX;
758 p = next_insn_in_loop (loop, p))
759 {
760 if (in_libcall && INSN_P (p) && find_reg_note (p, REG_RETVAL, NULL_RTX))
761 in_libcall--;
762 if (GET_CODE (p) == INSN)
763 {
764 temp = find_reg_note (p, REG_LIBCALL, NULL_RTX);
765 if (temp)
766 in_libcall++;
767 if (! in_libcall
768 && (set = single_set (p))
769 && GET_CODE (SET_DEST (set)) == REG
770 #ifdef PIC_OFFSET_TABLE_REG_CALL_CLOBBERED
771 && SET_DEST (set) != pic_offset_table_rtx
772 #endif
773 && ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
774 {
775 int tem1 = 0;
776 int tem2 = 0;
777 int move_insn = 0;
778 int insert_temp = 0;
779 rtx src = SET_SRC (set);
780 rtx dependencies = 0;
781
782 /* Figure out what to use as a source of this insn. If a
783 REG_EQUIV note is given or if a REG_EQUAL note with a
784 constant operand is specified, use it as the source and
785 mark that we should move this insn by calling
786 emit_move_insn rather that duplicating the insn.
787
788 Otherwise, only use the REG_EQUAL contents if a REG_RETVAL
789 note is present. */
790 temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
791 if (temp)
792 src = XEXP (temp, 0), move_insn = 1;
793 else
794 {
795 temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
796 if (temp && CONSTANT_P (XEXP (temp, 0)))
797 src = XEXP (temp, 0), move_insn = 1;
798 if (temp && find_reg_note (p, REG_RETVAL, NULL_RTX))
799 {
800 src = XEXP (temp, 0);
801 /* A libcall block can use regs that don't appear in
802 the equivalent expression. To move the libcall,
803 we must move those regs too. */
804 dependencies = libcall_other_reg (p, src);
805 }
806 }
807
808 /* For parallels, add any possible uses to the dependencies, as
809 we can't move the insn without resolving them first.
810 MEMs inside CLOBBERs may also reference registers; these
811 count as implicit uses. */
812 if (GET_CODE (PATTERN (p)) == PARALLEL)
813 {
814 for (i = 0; i < XVECLEN (PATTERN (p), 0); i++)
815 {
816 rtx x = XVECEXP (PATTERN (p), 0, i);
817 if (GET_CODE (x) == USE)
818 dependencies
819 = gen_rtx_EXPR_LIST (VOIDmode, XEXP (x, 0),
820 dependencies);
821 else if (GET_CODE (x) == CLOBBER
822 && GET_CODE (XEXP (x, 0)) == MEM)
823 dependencies = find_regs_nested (dependencies,
824 XEXP (XEXP (x, 0), 0));
825 }
826 }
827
828 if (/* The register is used in basic blocks other
829 than the one where it is set (meaning that
830 something after this point in the loop might
831 depend on its value before the set). */
832 ! reg_in_basic_block_p (p, SET_DEST (set))
833 /* And the set is not guaranteed to be executed once
834 the loop starts, or the value before the set is
835 needed before the set occurs...
836
837 ??? Note we have quadratic behavior here, mitigated
838 by the fact that the previous test will often fail for
839 large loops. Rather than re-scanning the entire loop
840 each time for register usage, we should build tables
841 of the register usage and use them here instead. */
842 && (maybe_never
843 || loop_reg_used_before_p (loop, set, p)))
844 /* It is unsafe to move the set. However, it may be OK to
845 move the source into a new pseudo, and substitute a
846 reg-to-reg copy for the original insn.
847
848 This code used to consider it OK to move a set of a variable
849 which was not created by the user and not used in an exit
850 test.
851 That behavior is incorrect and was removed. */
852 insert_temp = 1;
853
854 /* Don't try to optimize a MODE_CC set with a constant
855 source. It probably will be combined with a conditional
856 jump. */
857 if (GET_MODE_CLASS (GET_MODE (SET_DEST (set))) == MODE_CC
858 && CONSTANT_P (src))
859 ;
860 /* Don't try to optimize a register that was made
861 by loop-optimization for an inner loop.
862 We don't know its life-span, so we can't compute
863 the benefit. */
864 else if (REGNO (SET_DEST (set)) >= max_reg_before_loop)
865 ;
866 /* Don't move the source and add a reg-to-reg copy:
867 - with -Os (this certainly increases size),
868 - if the mode doesn't support copy operations (obviously),
869 - if the source is already a reg (the motion will gain nothing),
870 - if the source is a legitimate constant (likewise). */
871 else if (insert_temp
872 && (optimize_size
873 || ! can_copy_p (GET_MODE (SET_SRC (set)))
874 || GET_CODE (SET_SRC (set)) == REG
875 || (CONSTANT_P (SET_SRC (set))
876 && LEGITIMATE_CONSTANT_P (SET_SRC (set)))))
877 ;
878 else if ((tem = loop_invariant_p (loop, src))
879 && (dependencies == 0
880 || (tem2
881 = loop_invariant_p (loop, dependencies)) != 0)
882 && (regs->array[REGNO (SET_DEST (set))].set_in_loop == 1
883 || (tem1
884 = consec_sets_invariant_p
885 (loop, SET_DEST (set),
886 regs->array[REGNO (SET_DEST (set))].set_in_loop,
887 p)))
888 /* If the insn can cause a trap (such as divide by zero),
889 can't move it unless it's guaranteed to be executed
890 once loop is entered. Even a function call might
891 prevent the trap insn from being reached
892 (since it might exit!) */
893 && ! ((maybe_never || call_passed)
894 && may_trap_p (src)))
895 {
896 struct movable *m;
897 int regno = REGNO (SET_DEST (set));
898
899 /* A potential lossage is where we have a case where two insns
900 can be combined as long as they are both in the loop, but
901 we move one of them outside the loop. For large loops,
902 this can lose. The most common case of this is the address
903 of a function being called.
904
905 Therefore, if this register is marked as being used
906 exactly once if we are in a loop with calls
907 (a "large loop"), see if we can replace the usage of
908 this register with the source of this SET. If we can,
909 delete this insn.
910
911 Don't do this if P has a REG_RETVAL note or if we have
912 SMALL_REGISTER_CLASSES and SET_SRC is a hard register. */
913
914 if (loop_info->has_call
915 && regs->array[regno].single_usage != 0
916 && regs->array[regno].single_usage != const0_rtx
917 && REGNO_FIRST_UID (regno) == INSN_UID (p)
918 && (REGNO_LAST_UID (regno)
919 == INSN_UID (regs->array[regno].single_usage))
920 && regs->array[regno].set_in_loop == 1
921 && GET_CODE (SET_SRC (set)) != ASM_OPERANDS
922 && ! side_effects_p (SET_SRC (set))
923 && ! find_reg_note (p, REG_RETVAL, NULL_RTX)
924 && (! SMALL_REGISTER_CLASSES
925 || (! (GET_CODE (SET_SRC (set)) == REG
926 && (REGNO (SET_SRC (set))
927 < FIRST_PSEUDO_REGISTER))))
928 /* This test is not redundant; SET_SRC (set) might be
929 a call-clobbered register and the life of REGNO
930 might span a call. */
931 && ! modified_between_p (SET_SRC (set), p,
932 regs->array[regno].single_usage)
933 && no_labels_between_p (p,
934 regs->array[regno].single_usage)
935 && validate_replace_rtx (SET_DEST (set), SET_SRC (set),
936 regs->array[regno].single_usage))
937 {
938 /* Replace any usage in a REG_EQUAL note. Must copy
939 the new source, so that we don't get rtx sharing
940 between the SET_SOURCE and REG_NOTES of insn p. */
941 REG_NOTES (regs->array[regno].single_usage)
942 = (replace_rtx
943 (REG_NOTES (regs->array[regno].single_usage),
944 SET_DEST (set), copy_rtx (SET_SRC (set))));
945
946 delete_insn (p);
947 for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
948 i++)
949 regs->array[regno+i].set_in_loop = 0;
950 continue;
951 }
952
953 m = xmalloc (sizeof (struct movable));
954 m->next = 0;
955 m->insn = p;
956 m->set_src = src;
957 m->dependencies = dependencies;
958 m->set_dest = SET_DEST (set);
959 m->force = 0;
960 m->consec
961 = regs->array[REGNO (SET_DEST (set))].set_in_loop - 1;
962 m->done = 0;
963 m->forces = 0;
964 m->partial = 0;
965 m->move_insn = move_insn;
966 m->move_insn_first = 0;
967 m->insert_temp = insert_temp;
968 m->is_equiv = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
969 m->savemode = VOIDmode;
970 m->regno = regno;
971 /* Set M->cond if either loop_invariant_p
972 or consec_sets_invariant_p returned 2
973 (only conditionally invariant). */
974 m->cond = ((tem | tem1 | tem2) > 1);
975 m->global = LOOP_REG_GLOBAL_P (loop, regno);
976 m->match = 0;
977 m->lifetime = LOOP_REG_LIFETIME (loop, regno);
978 m->savings = regs->array[regno].n_times_set;
979 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
980 m->savings += libcall_benefit (p);
981 for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set)); i++)
982 regs->array[regno+i].set_in_loop = move_insn ? -2 : -1;
983 /* Add M to the end of the chain MOVABLES. */
984 loop_movables_add (movables, m);
985
986 if (m->consec > 0)
987 {
988 /* It is possible for the first instruction to have a
989 REG_EQUAL note but a non-invariant SET_SRC, so we must
990 remember the status of the first instruction in case
991 the last instruction doesn't have a REG_EQUAL note. */
992 m->move_insn_first = m->move_insn;
993
994 /* Skip this insn, not checking REG_LIBCALL notes. */
995 p = next_nonnote_insn (p);
996 /* Skip the consecutive insns, if there are any. */
997 p = skip_consec_insns (p, m->consec);
998 /* Back up to the last insn of the consecutive group. */
999 p = prev_nonnote_insn (p);
1000
1001 /* We must now reset m->move_insn, m->is_equiv, and
1002 possibly m->set_src to correspond to the effects of
1003 all the insns. */
1004 temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
1005 if (temp)
1006 m->set_src = XEXP (temp, 0), m->move_insn = 1;
1007 else
1008 {
1009 temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
1010 if (temp && CONSTANT_P (XEXP (temp, 0)))
1011 m->set_src = XEXP (temp, 0), m->move_insn = 1;
1012 else
1013 m->move_insn = 0;
1014
1015 }
1016 m->is_equiv
1017 = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
1018 }
1019 }
1020 /* If this register is always set within a STRICT_LOW_PART
1021 or set to zero, then its high bytes are constant.
1022 So clear them outside the loop and within the loop
1023 just load the low bytes.
1024 We must check that the machine has an instruction to do so.
1025 Also, if the value loaded into the register
1026 depends on the same register, this cannot be done. */
1027 else if (SET_SRC (set) == const0_rtx
1028 && GET_CODE (NEXT_INSN (p)) == INSN
1029 && (set1 = single_set (NEXT_INSN (p)))
1030 && GET_CODE (set1) == SET
1031 && (GET_CODE (SET_DEST (set1)) == STRICT_LOW_PART)
1032 && (GET_CODE (XEXP (SET_DEST (set1), 0)) == SUBREG)
1033 && (SUBREG_REG (XEXP (SET_DEST (set1), 0))
1034 == SET_DEST (set))
1035 && !reg_mentioned_p (SET_DEST (set), SET_SRC (set1)))
1036 {
1037 int regno = REGNO (SET_DEST (set));
1038 if (regs->array[regno].set_in_loop == 2)
1039 {
1040 struct movable *m;
1041 m = xmalloc (sizeof (struct movable));
1042 m->next = 0;
1043 m->insn = p;
1044 m->set_dest = SET_DEST (set);
1045 m->dependencies = 0;
1046 m->force = 0;
1047 m->consec = 0;
1048 m->done = 0;
1049 m->forces = 0;
1050 m->move_insn = 0;
1051 m->move_insn_first = 0;
1052 m->insert_temp = insert_temp;
1053 m->partial = 1;
1054 /* If the insn may not be executed on some cycles,
1055 we can't clear the whole reg; clear just high part.
1056 Not even if the reg is used only within this loop.
1057 Consider this:
1058 while (1)
1059 while (s != t) {
1060 if (foo ()) x = *s;
1061 use (x);
1062 }
1063 Clearing x before the inner loop could clobber a value
1064 being saved from the last time around the outer loop.
1065 However, if the reg is not used outside this loop
1066 and all uses of the register are in the same
1067 basic block as the store, there is no problem.
1068
1069 If this insn was made by loop, we don't know its
1070 INSN_LUID and hence must make a conservative
1071 assumption. */
1072 m->global = (INSN_UID (p) >= max_uid_for_loop
1073 || LOOP_REG_GLOBAL_P (loop, regno)
1074 || (labels_in_range_p
1075 (p, REGNO_FIRST_LUID (regno))));
1076 if (maybe_never && m->global)
1077 m->savemode = GET_MODE (SET_SRC (set1));
1078 else
1079 m->savemode = VOIDmode;
1080 m->regno = regno;
1081 m->cond = 0;
1082 m->match = 0;
1083 m->lifetime = LOOP_REG_LIFETIME (loop, regno);
1084 m->savings = 1;
1085 for (i = 0;
1086 i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
1087 i++)
1088 regs->array[regno+i].set_in_loop = -1;
1089 /* Add M to the end of the chain MOVABLES. */
1090 loop_movables_add (movables, m);
1091 }
1092 }
1093 }
1094 }
1095 /* Past a call insn, we get to insns which might not be executed
1096 because the call might exit. This matters for insns that trap.
1097 Constant and pure call insns always return, so they don't count. */
1098 else if (GET_CODE (p) == CALL_INSN && ! CONST_OR_PURE_CALL_P (p))
1099 call_passed = 1;
1100 /* Past a label or a jump, we get to insns for which we
1101 can't count on whether or how many times they will be
1102 executed during each iteration. Therefore, we can
1103 only move out sets of trivial variables
1104 (those not used after the loop). */
1105 /* Similar code appears twice in strength_reduce. */
1106 else if ((GET_CODE (p) == CODE_LABEL || GET_CODE (p) == JUMP_INSN)
1107 /* If we enter the loop in the middle, and scan around to the
1108 beginning, don't set maybe_never for that. This must be an
1109 unconditional jump, otherwise the code at the top of the
1110 loop might never be executed. Unconditional jumps are
1111 followed by a barrier then the loop_end. */
1112 && ! (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p) == loop->top
1113 && NEXT_INSN (NEXT_INSN (p)) == loop_end
1114 && any_uncondjump_p (p)))
1115 maybe_never = 1;
1116 else if (GET_CODE (p) == NOTE)
1117 {
1118 /* At the virtual top of a converted loop, insns are again known to
1119 be executed: logically, the loop begins here even though the exit
1120 code has been duplicated. */
1121 if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_VTOP && loop_depth == 0)
1122 maybe_never = call_passed = 0;
1123 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
1124 loop_depth++;
1125 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_END)
1126 loop_depth--;
1127 }
1128 }
1129
1130 /* If one movable subsumes another, ignore that other. */
1131
1132 ignore_some_movables (movables);
1133
1134 /* For each movable insn, see if the reg that it loads
1135 leads when it dies right into another conditionally movable insn.
1136 If so, record that the second insn "forces" the first one,
1137 since the second can be moved only if the first is. */
1138
1139 force_movables (movables);
1140
1141 /* See if there are multiple movable insns that load the same value.
1142 If there are, make all but the first point at the first one
1143 through the `match' field, and add the priorities of them
1144 all together as the priority of the first. */
1145
1146 combine_movables (movables, regs);
1147
1148 /* Now consider each movable insn to decide whether it is worth moving.
1149 Store 0 in regs->array[I].set_in_loop for each reg I that is moved.
1150
1151 For machines with few registers this increases code size, so do not
1152 move moveables when optimizing for code size on such machines.
1153 (The 18 below is the value for i386.) */
1154
1155 if (!optimize_size
1156 || (reg_class_size[GENERAL_REGS] > 18 && !loop_info->has_call))
1157 {
1158 move_movables (loop, movables, threshold, insn_count);
1159
1160 /* Recalculate regs->array if move_movables has created new
1161 registers. */
1162 if (max_reg_num () > regs->num)
1163 {
1164 loop_regs_scan (loop, 0);
1165 for (update_start = loop_start;
1166 PREV_INSN (update_start)
1167 && GET_CODE (PREV_INSN (update_start)) != CODE_LABEL;
1168 update_start = PREV_INSN (update_start))
1169 ;
1170 update_end = NEXT_INSN (loop_end);
1171
1172 reg_scan_update (update_start, update_end, loop_max_reg);
1173 loop_max_reg = max_reg_num ();
1174 }
1175 }
1176
1177 /* Now candidates that still are negative are those not moved.
1178 Change regs->array[I].set_in_loop to indicate that those are not actually
1179 invariant. */
1180 for (i = 0; i < regs->num; i++)
1181 if (regs->array[i].set_in_loop < 0)
1182 regs->array[i].set_in_loop = regs->array[i].n_times_set;
1183
1184 /* Now that we've moved some things out of the loop, we might be able to
1185 hoist even more memory references. */
1186 load_mems (loop);
1187
1188 /* Recalculate regs->array if load_mems has created new registers. */
1189 if (max_reg_num () > regs->num)
1190 loop_regs_scan (loop, 0);
1191
1192 for (update_start = loop_start;
1193 PREV_INSN (update_start)
1194 && GET_CODE (PREV_INSN (update_start)) != CODE_LABEL;
1195 update_start = PREV_INSN (update_start))
1196 ;
1197 update_end = NEXT_INSN (loop_end);
1198
1199 reg_scan_update (update_start, update_end, loop_max_reg);
1200 loop_max_reg = max_reg_num ();
1201
1202 if (flag_strength_reduce)
1203 {
1204 if (update_end && GET_CODE (update_end) == CODE_LABEL)
1205 /* Ensure our label doesn't go away. */
1206 LABEL_NUSES (update_end)++;
1207
1208 strength_reduce (loop, flags);
1209
1210 reg_scan_update (update_start, update_end, loop_max_reg);
1211 loop_max_reg = max_reg_num ();
1212
1213 if (update_end && GET_CODE (update_end) == CODE_LABEL
1214 && --LABEL_NUSES (update_end) == 0)
1215 delete_related_insns (update_end);
1216 }
1217
1218
1219 /* The movable information is required for strength reduction. */
1220 loop_movables_free (movables);
1221
1222 free (regs->array);
1223 regs->array = 0;
1224 regs->num = 0;
1225 }
1226 \f
1227 /* Add elements to *OUTPUT to record all the pseudo-regs
1228 mentioned in IN_THIS but not mentioned in NOT_IN_THIS. */
1229
1230 void
1231 record_excess_regs (rtx in_this, rtx not_in_this, rtx *output)
1232 {
1233 enum rtx_code code;
1234 const char *fmt;
1235 int i;
1236
1237 code = GET_CODE (in_this);
1238
1239 switch (code)
1240 {
1241 case PC:
1242 case CC0:
1243 case CONST_INT:
1244 case CONST_DOUBLE:
1245 case CONST:
1246 case SYMBOL_REF:
1247 case LABEL_REF:
1248 return;
1249
1250 case REG:
1251 if (REGNO (in_this) >= FIRST_PSEUDO_REGISTER
1252 && ! reg_mentioned_p (in_this, not_in_this))
1253 *output = gen_rtx_EXPR_LIST (VOIDmode, in_this, *output);
1254 return;
1255
1256 default:
1257 break;
1258 }
1259
1260 fmt = GET_RTX_FORMAT (code);
1261 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1262 {
1263 int j;
1264
1265 switch (fmt[i])
1266 {
1267 case 'E':
1268 for (j = 0; j < XVECLEN (in_this, i); j++)
1269 record_excess_regs (XVECEXP (in_this, i, j), not_in_this, output);
1270 break;
1271
1272 case 'e':
1273 record_excess_regs (XEXP (in_this, i), not_in_this, output);
1274 break;
1275 }
1276 }
1277 }
1278 \f
1279 /* Check what regs are referred to in the libcall block ending with INSN,
1280 aside from those mentioned in the equivalent value.
1281 If there are none, return 0.
1282 If there are one or more, return an EXPR_LIST containing all of them. */
1283
1284 rtx
1285 libcall_other_reg (rtx insn, rtx equiv)
1286 {
1287 rtx note = find_reg_note (insn, REG_RETVAL, NULL_RTX);
1288 rtx p = XEXP (note, 0);
1289 rtx output = 0;
1290
1291 /* First, find all the regs used in the libcall block
1292 that are not mentioned as inputs to the result. */
1293
1294 while (p != insn)
1295 {
1296 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
1297 || GET_CODE (p) == CALL_INSN)
1298 record_excess_regs (PATTERN (p), equiv, &output);
1299 p = NEXT_INSN (p);
1300 }
1301
1302 return output;
1303 }
1304 \f
1305 /* Return 1 if all uses of REG
1306 are between INSN and the end of the basic block. */
1307
1308 static int
1309 reg_in_basic_block_p (rtx insn, rtx reg)
1310 {
1311 int regno = REGNO (reg);
1312 rtx p;
1313
1314 if (REGNO_FIRST_UID (regno) != INSN_UID (insn))
1315 return 0;
1316
1317 /* Search this basic block for the already recorded last use of the reg. */
1318 for (p = insn; p; p = NEXT_INSN (p))
1319 {
1320 switch (GET_CODE (p))
1321 {
1322 case NOTE:
1323 break;
1324
1325 case INSN:
1326 case CALL_INSN:
1327 /* Ordinary insn: if this is the last use, we win. */
1328 if (REGNO_LAST_UID (regno) == INSN_UID (p))
1329 return 1;
1330 break;
1331
1332 case JUMP_INSN:
1333 /* Jump insn: if this is the last use, we win. */
1334 if (REGNO_LAST_UID (regno) == INSN_UID (p))
1335 return 1;
1336 /* Otherwise, it's the end of the basic block, so we lose. */
1337 return 0;
1338
1339 case CODE_LABEL:
1340 case BARRIER:
1341 /* It's the end of the basic block, so we lose. */
1342 return 0;
1343
1344 default:
1345 break;
1346 }
1347 }
1348
1349 /* The "last use" that was recorded can't be found after the first
1350 use. This can happen when the last use was deleted while
1351 processing an inner loop, this inner loop was then completely
1352 unrolled, and the outer loop is always exited after the inner loop,
1353 so that everything after the first use becomes a single basic block. */
1354 return 1;
1355 }
1356 \f
1357 /* Compute the benefit of eliminating the insns in the block whose
1358 last insn is LAST. This may be a group of insns used to compute a
1359 value directly or can contain a library call. */
1360
1361 static int
1362 libcall_benefit (rtx last)
1363 {
1364 rtx insn;
1365 int benefit = 0;
1366
1367 for (insn = XEXP (find_reg_note (last, REG_RETVAL, NULL_RTX), 0);
1368 insn != last; insn = NEXT_INSN (insn))
1369 {
1370 if (GET_CODE (insn) == CALL_INSN)
1371 benefit += 10; /* Assume at least this many insns in a library
1372 routine. */
1373 else if (GET_CODE (insn) == INSN
1374 && GET_CODE (PATTERN (insn)) != USE
1375 && GET_CODE (PATTERN (insn)) != CLOBBER)
1376 benefit++;
1377 }
1378
1379 return benefit;
1380 }
1381 \f
1382 /* Skip COUNT insns from INSN, counting library calls as 1 insn. */
1383
1384 static rtx
1385 skip_consec_insns (rtx insn, int count)
1386 {
1387 for (; count > 0; count--)
1388 {
1389 rtx temp;
1390
1391 /* If first insn of libcall sequence, skip to end. */
1392 /* Do this at start of loop, since INSN is guaranteed to
1393 be an insn here. */
1394 if (GET_CODE (insn) != NOTE
1395 && (temp = find_reg_note (insn, REG_LIBCALL, NULL_RTX)))
1396 insn = XEXP (temp, 0);
1397
1398 do
1399 insn = NEXT_INSN (insn);
1400 while (GET_CODE (insn) == NOTE);
1401 }
1402
1403 return insn;
1404 }
1405
1406 /* Ignore any movable whose insn falls within a libcall
1407 which is part of another movable.
1408 We make use of the fact that the movable for the libcall value
1409 was made later and so appears later on the chain. */
1410
1411 static void
1412 ignore_some_movables (struct loop_movables *movables)
1413 {
1414 struct movable *m, *m1;
1415
1416 for (m = movables->head; m; m = m->next)
1417 {
1418 /* Is this a movable for the value of a libcall? */
1419 rtx note = find_reg_note (m->insn, REG_RETVAL, NULL_RTX);
1420 if (note)
1421 {
1422 rtx insn;
1423 /* Check for earlier movables inside that range,
1424 and mark them invalid. We cannot use LUIDs here because
1425 insns created by loop.c for prior loops don't have LUIDs.
1426 Rather than reject all such insns from movables, we just
1427 explicitly check each insn in the libcall (since invariant
1428 libcalls aren't that common). */
1429 for (insn = XEXP (note, 0); insn != m->insn; insn = NEXT_INSN (insn))
1430 for (m1 = movables->head; m1 != m; m1 = m1->next)
1431 if (m1->insn == insn)
1432 m1->done = 1;
1433 }
1434 }
1435 }
1436
1437 /* For each movable insn, see if the reg that it loads
1438 leads when it dies right into another conditionally movable insn.
1439 If so, record that the second insn "forces" the first one,
1440 since the second can be moved only if the first is. */
1441
1442 static void
1443 force_movables (struct loop_movables *movables)
1444 {
1445 struct movable *m, *m1;
1446
1447 for (m1 = movables->head; m1; m1 = m1->next)
1448 /* Omit this if moving just the (SET (REG) 0) of a zero-extend. */
1449 if (!m1->partial && !m1->done)
1450 {
1451 int regno = m1->regno;
1452 for (m = m1->next; m; m = m->next)
1453 /* ??? Could this be a bug? What if CSE caused the
1454 register of M1 to be used after this insn?
1455 Since CSE does not update regno_last_uid,
1456 this insn M->insn might not be where it dies.
1457 But very likely this doesn't matter; what matters is
1458 that M's reg is computed from M1's reg. */
1459 if (INSN_UID (m->insn) == REGNO_LAST_UID (regno)
1460 && !m->done)
1461 break;
1462 if (m != 0 && m->set_src == m1->set_dest
1463 /* If m->consec, m->set_src isn't valid. */
1464 && m->consec == 0)
1465 m = 0;
1466
1467 /* Increase the priority of the moving the first insn
1468 since it permits the second to be moved as well. */
1469 if (m != 0)
1470 {
1471 m->forces = m1;
1472 m1->lifetime += m->lifetime;
1473 m1->savings += m->savings;
1474 }
1475 }
1476 }
1477 \f
1478 /* Find invariant expressions that are equal and can be combined into
1479 one register. */
1480
1481 static void
1482 combine_movables (struct loop_movables *movables, struct loop_regs *regs)
1483 {
1484 struct movable *m;
1485 char *matched_regs = xmalloc (regs->num);
1486 enum machine_mode mode;
1487
1488 /* Regs that are set more than once are not allowed to match
1489 or be matched. I'm no longer sure why not. */
1490 /* Only pseudo registers are allowed to match or be matched,
1491 since move_movables does not validate the change. */
1492 /* Perhaps testing m->consec_sets would be more appropriate here? */
1493
1494 for (m = movables->head; m; m = m->next)
1495 if (m->match == 0 && regs->array[m->regno].n_times_set == 1
1496 && m->regno >= FIRST_PSEUDO_REGISTER
1497 && !m->insert_temp
1498 && !m->partial)
1499 {
1500 struct movable *m1;
1501 int regno = m->regno;
1502
1503 memset (matched_regs, 0, regs->num);
1504 matched_regs[regno] = 1;
1505
1506 /* We want later insns to match the first one. Don't make the first
1507 one match any later ones. So start this loop at m->next. */
1508 for (m1 = m->next; m1; m1 = m1->next)
1509 if (m != m1 && m1->match == 0
1510 && !m1->insert_temp
1511 && regs->array[m1->regno].n_times_set == 1
1512 && m1->regno >= FIRST_PSEUDO_REGISTER
1513 /* A reg used outside the loop mustn't be eliminated. */
1514 && !m1->global
1515 /* A reg used for zero-extending mustn't be eliminated. */
1516 && !m1->partial
1517 && (matched_regs[m1->regno]
1518 ||
1519 (
1520 /* Can combine regs with different modes loaded from the
1521 same constant only if the modes are the same or
1522 if both are integer modes with M wider or the same
1523 width as M1. The check for integer is redundant, but
1524 safe, since the only case of differing destination
1525 modes with equal sources is when both sources are
1526 VOIDmode, i.e., CONST_INT. */
1527 (GET_MODE (m->set_dest) == GET_MODE (m1->set_dest)
1528 || (GET_MODE_CLASS (GET_MODE (m->set_dest)) == MODE_INT
1529 && GET_MODE_CLASS (GET_MODE (m1->set_dest)) == MODE_INT
1530 && (GET_MODE_BITSIZE (GET_MODE (m->set_dest))
1531 >= GET_MODE_BITSIZE (GET_MODE (m1->set_dest)))))
1532 /* See if the source of M1 says it matches M. */
1533 && ((GET_CODE (m1->set_src) == REG
1534 && matched_regs[REGNO (m1->set_src)])
1535 || rtx_equal_for_loop_p (m->set_src, m1->set_src,
1536 movables, regs))))
1537 && ((m->dependencies == m1->dependencies)
1538 || rtx_equal_p (m->dependencies, m1->dependencies)))
1539 {
1540 m->lifetime += m1->lifetime;
1541 m->savings += m1->savings;
1542 m1->done = 1;
1543 m1->match = m;
1544 matched_regs[m1->regno] = 1;
1545 }
1546 }
1547
1548 /* Now combine the regs used for zero-extension.
1549 This can be done for those not marked `global'
1550 provided their lives don't overlap. */
1551
1552 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1553 mode = GET_MODE_WIDER_MODE (mode))
1554 {
1555 struct movable *m0 = 0;
1556
1557 /* Combine all the registers for extension from mode MODE.
1558 Don't combine any that are used outside this loop. */
1559 for (m = movables->head; m; m = m->next)
1560 if (m->partial && ! m->global
1561 && mode == GET_MODE (SET_SRC (PATTERN (NEXT_INSN (m->insn)))))
1562 {
1563 struct movable *m1;
1564
1565 int first = REGNO_FIRST_LUID (m->regno);
1566 int last = REGNO_LAST_LUID (m->regno);
1567
1568 if (m0 == 0)
1569 {
1570 /* First one: don't check for overlap, just record it. */
1571 m0 = m;
1572 continue;
1573 }
1574
1575 /* Make sure they extend to the same mode.
1576 (Almost always true.) */
1577 if (GET_MODE (m->set_dest) != GET_MODE (m0->set_dest))
1578 continue;
1579
1580 /* We already have one: check for overlap with those
1581 already combined together. */
1582 for (m1 = movables->head; m1 != m; m1 = m1->next)
1583 if (m1 == m0 || (m1->partial && m1->match == m0))
1584 if (! (REGNO_FIRST_LUID (m1->regno) > last
1585 || REGNO_LAST_LUID (m1->regno) < first))
1586 goto overlap;
1587
1588 /* No overlap: we can combine this with the others. */
1589 m0->lifetime += m->lifetime;
1590 m0->savings += m->savings;
1591 m->done = 1;
1592 m->match = m0;
1593
1594 overlap:
1595 ;
1596 }
1597 }
1598
1599 /* Clean up. */
1600 free (matched_regs);
1601 }
1602
1603 /* Returns the number of movable instructions in LOOP that were not
1604 moved outside the loop. */
1605
1606 static int
1607 num_unmoved_movables (const struct loop *loop)
1608 {
1609 int num = 0;
1610 struct movable *m;
1611
1612 for (m = LOOP_MOVABLES (loop)->head; m; m = m->next)
1613 if (!m->done)
1614 ++num;
1615
1616 return num;
1617 }
1618
1619 \f
1620 /* Return 1 if regs X and Y will become the same if moved. */
1621
1622 static int
1623 regs_match_p (rtx x, rtx y, struct loop_movables *movables)
1624 {
1625 unsigned int xn = REGNO (x);
1626 unsigned int yn = REGNO (y);
1627 struct movable *mx, *my;
1628
1629 for (mx = movables->head; mx; mx = mx->next)
1630 if (mx->regno == xn)
1631 break;
1632
1633 for (my = movables->head; my; my = my->next)
1634 if (my->regno == yn)
1635 break;
1636
1637 return (mx && my
1638 && ((mx->match == my->match && mx->match != 0)
1639 || mx->match == my
1640 || mx == my->match));
1641 }
1642
1643 /* Return 1 if X and Y are identical-looking rtx's.
1644 This is the Lisp function EQUAL for rtx arguments.
1645
1646 If two registers are matching movables or a movable register and an
1647 equivalent constant, consider them equal. */
1648
1649 static int
1650 rtx_equal_for_loop_p (rtx x, rtx y, struct loop_movables *movables,
1651 struct loop_regs *regs)
1652 {
1653 int i;
1654 int j;
1655 struct movable *m;
1656 enum rtx_code code;
1657 const char *fmt;
1658
1659 if (x == y)
1660 return 1;
1661 if (x == 0 || y == 0)
1662 return 0;
1663
1664 code = GET_CODE (x);
1665
1666 /* If we have a register and a constant, they may sometimes be
1667 equal. */
1668 if (GET_CODE (x) == REG && regs->array[REGNO (x)].set_in_loop == -2
1669 && CONSTANT_P (y))
1670 {
1671 for (m = movables->head; m; m = m->next)
1672 if (m->move_insn && m->regno == REGNO (x)
1673 && rtx_equal_p (m->set_src, y))
1674 return 1;
1675 }
1676 else if (GET_CODE (y) == REG && regs->array[REGNO (y)].set_in_loop == -2
1677 && CONSTANT_P (x))
1678 {
1679 for (m = movables->head; m; m = m->next)
1680 if (m->move_insn && m->regno == REGNO (y)
1681 && rtx_equal_p (m->set_src, x))
1682 return 1;
1683 }
1684
1685 /* Otherwise, rtx's of different codes cannot be equal. */
1686 if (code != GET_CODE (y))
1687 return 0;
1688
1689 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.
1690 (REG:SI x) and (REG:HI x) are NOT equivalent. */
1691
1692 if (GET_MODE (x) != GET_MODE (y))
1693 return 0;
1694
1695 /* These three types of rtx's can be compared nonrecursively. */
1696 if (code == REG)
1697 return (REGNO (x) == REGNO (y) || regs_match_p (x, y, movables));
1698
1699 if (code == LABEL_REF)
1700 return XEXP (x, 0) == XEXP (y, 0);
1701 if (code == SYMBOL_REF)
1702 return XSTR (x, 0) == XSTR (y, 0);
1703
1704 /* Compare the elements. If any pair of corresponding elements
1705 fail to match, return 0 for the whole things. */
1706
1707 fmt = GET_RTX_FORMAT (code);
1708 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1709 {
1710 switch (fmt[i])
1711 {
1712 case 'w':
1713 if (XWINT (x, i) != XWINT (y, i))
1714 return 0;
1715 break;
1716
1717 case 'i':
1718 if (XINT (x, i) != XINT (y, i))
1719 return 0;
1720 break;
1721
1722 case 'E':
1723 /* Two vectors must have the same length. */
1724 if (XVECLEN (x, i) != XVECLEN (y, i))
1725 return 0;
1726
1727 /* And the corresponding elements must match. */
1728 for (j = 0; j < XVECLEN (x, i); j++)
1729 if (rtx_equal_for_loop_p (XVECEXP (x, i, j), XVECEXP (y, i, j),
1730 movables, regs) == 0)
1731 return 0;
1732 break;
1733
1734 case 'e':
1735 if (rtx_equal_for_loop_p (XEXP (x, i), XEXP (y, i), movables, regs)
1736 == 0)
1737 return 0;
1738 break;
1739
1740 case 's':
1741 if (strcmp (XSTR (x, i), XSTR (y, i)))
1742 return 0;
1743 break;
1744
1745 case 'u':
1746 /* These are just backpointers, so they don't matter. */
1747 break;
1748
1749 case '0':
1750 break;
1751
1752 /* It is believed that rtx's at this level will never
1753 contain anything but integers and other rtx's,
1754 except for within LABEL_REFs and SYMBOL_REFs. */
1755 default:
1756 abort ();
1757 }
1758 }
1759 return 1;
1760 }
1761 \f
1762 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to all
1763 insns in INSNS which use the reference. LABEL_NUSES for CODE_LABEL
1764 references is incremented once for each added note. */
1765
1766 static void
1767 add_label_notes (rtx x, rtx insns)
1768 {
1769 enum rtx_code code = GET_CODE (x);
1770 int i, j;
1771 const char *fmt;
1772 rtx insn;
1773
1774 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
1775 {
1776 /* This code used to ignore labels that referred to dispatch tables to
1777 avoid flow generating (slightly) worse code.
1778
1779 We no longer ignore such label references (see LABEL_REF handling in
1780 mark_jump_label for additional information). */
1781 for (insn = insns; insn; insn = NEXT_INSN (insn))
1782 if (reg_mentioned_p (XEXP (x, 0), insn))
1783 {
1784 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, XEXP (x, 0),
1785 REG_NOTES (insn));
1786 if (LABEL_P (XEXP (x, 0)))
1787 LABEL_NUSES (XEXP (x, 0))++;
1788 }
1789 }
1790
1791 fmt = GET_RTX_FORMAT (code);
1792 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1793 {
1794 if (fmt[i] == 'e')
1795 add_label_notes (XEXP (x, i), insns);
1796 else if (fmt[i] == 'E')
1797 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
1798 add_label_notes (XVECEXP (x, i, j), insns);
1799 }
1800 }
1801 \f
1802 /* Scan MOVABLES, and move the insns that deserve to be moved.
1803 If two matching movables are combined, replace one reg with the
1804 other throughout. */
1805
1806 static void
1807 move_movables (struct loop *loop, struct loop_movables *movables,
1808 int threshold, int insn_count)
1809 {
1810 struct loop_regs *regs = LOOP_REGS (loop);
1811 int nregs = regs->num;
1812 rtx new_start = 0;
1813 struct movable *m;
1814 rtx p;
1815 rtx loop_start = loop->start;
1816 rtx loop_end = loop->end;
1817 /* Map of pseudo-register replacements to handle combining
1818 when we move several insns that load the same value
1819 into different pseudo-registers. */
1820 rtx *reg_map = xcalloc (nregs, sizeof (rtx));
1821 char *already_moved = xcalloc (nregs, sizeof (char));
1822
1823 for (m = movables->head; m; m = m->next)
1824 {
1825 /* Describe this movable insn. */
1826
1827 if (loop_dump_stream)
1828 {
1829 fprintf (loop_dump_stream, "Insn %d: regno %d (life %d), ",
1830 INSN_UID (m->insn), m->regno, m->lifetime);
1831 if (m->consec > 0)
1832 fprintf (loop_dump_stream, "consec %d, ", m->consec);
1833 if (m->cond)
1834 fprintf (loop_dump_stream, "cond ");
1835 if (m->force)
1836 fprintf (loop_dump_stream, "force ");
1837 if (m->global)
1838 fprintf (loop_dump_stream, "global ");
1839 if (m->done)
1840 fprintf (loop_dump_stream, "done ");
1841 if (m->move_insn)
1842 fprintf (loop_dump_stream, "move-insn ");
1843 if (m->match)
1844 fprintf (loop_dump_stream, "matches %d ",
1845 INSN_UID (m->match->insn));
1846 if (m->forces)
1847 fprintf (loop_dump_stream, "forces %d ",
1848 INSN_UID (m->forces->insn));
1849 }
1850
1851 /* Ignore the insn if it's already done (it matched something else).
1852 Otherwise, see if it is now safe to move. */
1853
1854 if (!m->done
1855 && (! m->cond
1856 || (1 == loop_invariant_p (loop, m->set_src)
1857 && (m->dependencies == 0
1858 || 1 == loop_invariant_p (loop, m->dependencies))
1859 && (m->consec == 0
1860 || 1 == consec_sets_invariant_p (loop, m->set_dest,
1861 m->consec + 1,
1862 m->insn))))
1863 && (! m->forces || m->forces->done))
1864 {
1865 int regno;
1866 rtx p;
1867 int savings = m->savings;
1868
1869 /* We have an insn that is safe to move.
1870 Compute its desirability. */
1871
1872 p = m->insn;
1873 regno = m->regno;
1874
1875 if (loop_dump_stream)
1876 fprintf (loop_dump_stream, "savings %d ", savings);
1877
1878 if (regs->array[regno].moved_once && loop_dump_stream)
1879 fprintf (loop_dump_stream, "halved since already moved ");
1880
1881 /* An insn MUST be moved if we already moved something else
1882 which is safe only if this one is moved too: that is,
1883 if already_moved[REGNO] is nonzero. */
1884
1885 /* An insn is desirable to move if the new lifetime of the
1886 register is no more than THRESHOLD times the old lifetime.
1887 If it's not desirable, it means the loop is so big
1888 that moving won't speed things up much,
1889 and it is liable to make register usage worse. */
1890
1891 /* It is also desirable to move if it can be moved at no
1892 extra cost because something else was already moved. */
1893
1894 if (already_moved[regno]
1895 || flag_move_all_movables
1896 || (threshold * savings * m->lifetime) >=
1897 (regs->array[regno].moved_once ? insn_count * 2 : insn_count)
1898 || (m->forces && m->forces->done
1899 && regs->array[m->forces->regno].n_times_set == 1))
1900 {
1901 int count;
1902 struct movable *m1;
1903 rtx first = NULL_RTX;
1904 rtx newreg = NULL_RTX;
1905
1906 if (m->insert_temp)
1907 newreg = gen_reg_rtx (GET_MODE (m->set_dest));
1908
1909 /* Now move the insns that set the reg. */
1910
1911 if (m->partial && m->match)
1912 {
1913 rtx newpat, i1;
1914 rtx r1, r2;
1915 /* Find the end of this chain of matching regs.
1916 Thus, we load each reg in the chain from that one reg.
1917 And that reg is loaded with 0 directly,
1918 since it has ->match == 0. */
1919 for (m1 = m; m1->match; m1 = m1->match);
1920 newpat = gen_move_insn (SET_DEST (PATTERN (m->insn)),
1921 SET_DEST (PATTERN (m1->insn)));
1922 i1 = loop_insn_hoist (loop, newpat);
1923
1924 /* Mark the moved, invariant reg as being allowed to
1925 share a hard reg with the other matching invariant. */
1926 REG_NOTES (i1) = REG_NOTES (m->insn);
1927 r1 = SET_DEST (PATTERN (m->insn));
1928 r2 = SET_DEST (PATTERN (m1->insn));
1929 regs_may_share
1930 = gen_rtx_EXPR_LIST (VOIDmode, r1,
1931 gen_rtx_EXPR_LIST (VOIDmode, r2,
1932 regs_may_share));
1933 delete_insn (m->insn);
1934
1935 if (new_start == 0)
1936 new_start = i1;
1937
1938 if (loop_dump_stream)
1939 fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
1940 }
1941 /* If we are to re-generate the item being moved with a
1942 new move insn, first delete what we have and then emit
1943 the move insn before the loop. */
1944 else if (m->move_insn)
1945 {
1946 rtx i1, temp, seq;
1947
1948 for (count = m->consec; count >= 0; count--)
1949 {
1950 /* If this is the first insn of a library call sequence,
1951 something is very wrong. */
1952 if (GET_CODE (p) != NOTE
1953 && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
1954 abort ();
1955
1956 /* If this is the last insn of a libcall sequence, then
1957 delete every insn in the sequence except the last.
1958 The last insn is handled in the normal manner. */
1959 if (GET_CODE (p) != NOTE
1960 && (temp = find_reg_note (p, REG_RETVAL, NULL_RTX)))
1961 {
1962 temp = XEXP (temp, 0);
1963 while (temp != p)
1964 temp = delete_insn (temp);
1965 }
1966
1967 temp = p;
1968 p = delete_insn (p);
1969
1970 /* simplify_giv_expr expects that it can walk the insns
1971 at m->insn forwards and see this old sequence we are
1972 tossing here. delete_insn does preserve the next
1973 pointers, but when we skip over a NOTE we must fix
1974 it up. Otherwise that code walks into the non-deleted
1975 insn stream. */
1976 while (p && GET_CODE (p) == NOTE)
1977 p = NEXT_INSN (temp) = NEXT_INSN (p);
1978
1979 if (m->insert_temp)
1980 {
1981 /* Replace the original insn with a move from
1982 our newly created temp. */
1983 start_sequence ();
1984 emit_move_insn (m->set_dest, newreg);
1985 seq = get_insns ();
1986 end_sequence ();
1987 emit_insn_before (seq, p);
1988 }
1989 }
1990
1991 start_sequence ();
1992 emit_move_insn (m->insert_temp ? newreg : m->set_dest,
1993 m->set_src);
1994 seq = get_insns ();
1995 end_sequence ();
1996
1997 add_label_notes (m->set_src, seq);
1998
1999 i1 = loop_insn_hoist (loop, seq);
2000 if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
2001 set_unique_reg_note (i1,
2002 m->is_equiv ? REG_EQUIV : REG_EQUAL,
2003 m->set_src);
2004
2005 if (loop_dump_stream)
2006 fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
2007
2008 /* The more regs we move, the less we like moving them. */
2009 threshold -= 3;
2010 }
2011 else
2012 {
2013 for (count = m->consec; count >= 0; count--)
2014 {
2015 rtx i1, temp;
2016
2017 /* If first insn of libcall sequence, skip to end. */
2018 /* Do this at start of loop, since p is guaranteed to
2019 be an insn here. */
2020 if (GET_CODE (p) != NOTE
2021 && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
2022 p = XEXP (temp, 0);
2023
2024 /* If last insn of libcall sequence, move all
2025 insns except the last before the loop. The last
2026 insn is handled in the normal manner. */
2027 if (GET_CODE (p) != NOTE
2028 && (temp = find_reg_note (p, REG_RETVAL, NULL_RTX)))
2029 {
2030 rtx fn_address = 0;
2031 rtx fn_reg = 0;
2032 rtx fn_address_insn = 0;
2033
2034 first = 0;
2035 for (temp = XEXP (temp, 0); temp != p;
2036 temp = NEXT_INSN (temp))
2037 {
2038 rtx body;
2039 rtx n;
2040 rtx next;
2041
2042 if (GET_CODE (temp) == NOTE)
2043 continue;
2044
2045 body = PATTERN (temp);
2046
2047 /* Find the next insn after TEMP,
2048 not counting USE or NOTE insns. */
2049 for (next = NEXT_INSN (temp); next != p;
2050 next = NEXT_INSN (next))
2051 if (! (GET_CODE (next) == INSN
2052 && GET_CODE (PATTERN (next)) == USE)
2053 && GET_CODE (next) != NOTE)
2054 break;
2055
2056 /* If that is the call, this may be the insn
2057 that loads the function address.
2058
2059 Extract the function address from the insn
2060 that loads it into a register.
2061 If this insn was cse'd, we get incorrect code.
2062
2063 So emit a new move insn that copies the
2064 function address into the register that the
2065 call insn will use. flow.c will delete any
2066 redundant stores that we have created. */
2067 if (GET_CODE (next) == CALL_INSN
2068 && GET_CODE (body) == SET
2069 && GET_CODE (SET_DEST (body)) == REG
2070 && (n = find_reg_note (temp, REG_EQUAL,
2071 NULL_RTX)))
2072 {
2073 fn_reg = SET_SRC (body);
2074 if (GET_CODE (fn_reg) != REG)
2075 fn_reg = SET_DEST (body);
2076 fn_address = XEXP (n, 0);
2077 fn_address_insn = temp;
2078 }
2079 /* We have the call insn.
2080 If it uses the register we suspect it might,
2081 load it with the correct address directly. */
2082 if (GET_CODE (temp) == CALL_INSN
2083 && fn_address != 0
2084 && reg_referenced_p (fn_reg, body))
2085 loop_insn_emit_after (loop, 0, fn_address_insn,
2086 gen_move_insn
2087 (fn_reg, fn_address));
2088
2089 if (GET_CODE (temp) == CALL_INSN)
2090 {
2091 i1 = loop_call_insn_hoist (loop, body);
2092 /* Because the USAGE information potentially
2093 contains objects other than hard registers
2094 we need to copy it. */
2095 if (CALL_INSN_FUNCTION_USAGE (temp))
2096 CALL_INSN_FUNCTION_USAGE (i1)
2097 = copy_rtx (CALL_INSN_FUNCTION_USAGE (temp));
2098 }
2099 else
2100 i1 = loop_insn_hoist (loop, body);
2101 if (first == 0)
2102 first = i1;
2103 if (temp == fn_address_insn)
2104 fn_address_insn = i1;
2105 REG_NOTES (i1) = REG_NOTES (temp);
2106 REG_NOTES (temp) = NULL;
2107 delete_insn (temp);
2108 }
2109 if (new_start == 0)
2110 new_start = first;
2111 }
2112 if (m->savemode != VOIDmode)
2113 {
2114 /* P sets REG to zero; but we should clear only
2115 the bits that are not covered by the mode
2116 m->savemode. */
2117 rtx reg = m->set_dest;
2118 rtx sequence;
2119 rtx tem;
2120
2121 start_sequence ();
2122 tem = expand_simple_binop
2123 (GET_MODE (reg), AND, reg,
2124 GEN_INT ((((HOST_WIDE_INT) 1
2125 << GET_MODE_BITSIZE (m->savemode)))
2126 - 1),
2127 reg, 1, OPTAB_LIB_WIDEN);
2128 if (tem == 0)
2129 abort ();
2130 if (tem != reg)
2131 emit_move_insn (reg, tem);
2132 sequence = get_insns ();
2133 end_sequence ();
2134 i1 = loop_insn_hoist (loop, sequence);
2135 }
2136 else if (GET_CODE (p) == CALL_INSN)
2137 {
2138 i1 = loop_call_insn_hoist (loop, PATTERN (p));
2139 /* Because the USAGE information potentially
2140 contains objects other than hard registers
2141 we need to copy it. */
2142 if (CALL_INSN_FUNCTION_USAGE (p))
2143 CALL_INSN_FUNCTION_USAGE (i1)
2144 = copy_rtx (CALL_INSN_FUNCTION_USAGE (p));
2145 }
2146 else if (count == m->consec && m->move_insn_first)
2147 {
2148 rtx seq;
2149 /* The SET_SRC might not be invariant, so we must
2150 use the REG_EQUAL note. */
2151 start_sequence ();
2152 emit_move_insn (m->insert_temp ? newreg : m->set_dest,
2153 m->set_src);
2154 seq = get_insns ();
2155 end_sequence ();
2156
2157 add_label_notes (m->set_src, seq);
2158
2159 i1 = loop_insn_hoist (loop, seq);
2160 if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
2161 set_unique_reg_note (i1, m->is_equiv ? REG_EQUIV
2162 : REG_EQUAL, m->set_src);
2163 }
2164 else if (m->insert_temp)
2165 {
2166 rtx *reg_map2 = xcalloc (REGNO (newreg),
2167 sizeof(rtx));
2168 reg_map2 [m->regno] = newreg;
2169
2170 i1 = loop_insn_hoist (loop, copy_rtx (PATTERN (p)));
2171 replace_regs (i1, reg_map2, REGNO (newreg), 1);
2172 free (reg_map2);
2173 }
2174 else
2175 i1 = loop_insn_hoist (loop, PATTERN (p));
2176
2177 if (REG_NOTES (i1) == 0)
2178 {
2179 REG_NOTES (i1) = REG_NOTES (p);
2180 REG_NOTES (p) = NULL;
2181
2182 /* If there is a REG_EQUAL note present whose value
2183 is not loop invariant, then delete it, since it
2184 may cause problems with later optimization passes.
2185 It is possible for cse to create such notes
2186 like this as a result of record_jump_cond. */
2187
2188 if ((temp = find_reg_note (i1, REG_EQUAL, NULL_RTX))
2189 && ! loop_invariant_p (loop, XEXP (temp, 0)))
2190 remove_note (i1, temp);
2191 }
2192
2193 if (new_start == 0)
2194 new_start = i1;
2195
2196 if (loop_dump_stream)
2197 fprintf (loop_dump_stream, " moved to %d",
2198 INSN_UID (i1));
2199
2200 /* If library call, now fix the REG_NOTES that contain
2201 insn pointers, namely REG_LIBCALL on FIRST
2202 and REG_RETVAL on I1. */
2203 if ((temp = find_reg_note (i1, REG_RETVAL, NULL_RTX)))
2204 {
2205 XEXP (temp, 0) = first;
2206 temp = find_reg_note (first, REG_LIBCALL, NULL_RTX);
2207 XEXP (temp, 0) = i1;
2208 }
2209
2210 temp = p;
2211 delete_insn (p);
2212 p = NEXT_INSN (p);
2213
2214 /* simplify_giv_expr expects that it can walk the insns
2215 at m->insn forwards and see this old sequence we are
2216 tossing here. delete_insn does preserve the next
2217 pointers, but when we skip over a NOTE we must fix
2218 it up. Otherwise that code walks into the non-deleted
2219 insn stream. */
2220 while (p && GET_CODE (p) == NOTE)
2221 p = NEXT_INSN (temp) = NEXT_INSN (p);
2222
2223 if (m->insert_temp)
2224 {
2225 rtx seq;
2226 /* Replace the original insn with a move from
2227 our newly created temp. */
2228 start_sequence ();
2229 emit_move_insn (m->set_dest, newreg);
2230 seq = get_insns ();
2231 end_sequence ();
2232 emit_insn_before (seq, p);
2233 }
2234 }
2235
2236 /* The more regs we move, the less we like moving them. */
2237 threshold -= 3;
2238 }
2239
2240 m->done = 1;
2241
2242 if (!m->insert_temp)
2243 {
2244 /* Any other movable that loads the same register
2245 MUST be moved. */
2246 already_moved[regno] = 1;
2247
2248 /* This reg has been moved out of one loop. */
2249 regs->array[regno].moved_once = 1;
2250
2251 /* The reg set here is now invariant. */
2252 if (! m->partial)
2253 {
2254 int i;
2255 for (i = 0; i < LOOP_REGNO_NREGS (regno, m->set_dest); i++)
2256 regs->array[regno+i].set_in_loop = 0;
2257 }
2258
2259 /* Change the length-of-life info for the register
2260 to say it lives at least the full length of this loop.
2261 This will help guide optimizations in outer loops. */
2262
2263 if (REGNO_FIRST_LUID (regno) > INSN_LUID (loop_start))
2264 /* This is the old insn before all the moved insns.
2265 We can't use the moved insn because it is out of range
2266 in uid_luid. Only the old insns have luids. */
2267 REGNO_FIRST_UID (regno) = INSN_UID (loop_start);
2268 if (REGNO_LAST_LUID (regno) < INSN_LUID (loop_end))
2269 REGNO_LAST_UID (regno) = INSN_UID (loop_end);
2270 }
2271
2272 /* Combine with this moved insn any other matching movables. */
2273
2274 if (! m->partial)
2275 for (m1 = movables->head; m1; m1 = m1->next)
2276 if (m1->match == m)
2277 {
2278 rtx temp;
2279
2280 /* Schedule the reg loaded by M1
2281 for replacement so that shares the reg of M.
2282 If the modes differ (only possible in restricted
2283 circumstances, make a SUBREG.
2284
2285 Note this assumes that the target dependent files
2286 treat REG and SUBREG equally, including within
2287 GO_IF_LEGITIMATE_ADDRESS and in all the
2288 predicates since we never verify that replacing the
2289 original register with a SUBREG results in a
2290 recognizable insn. */
2291 if (GET_MODE (m->set_dest) == GET_MODE (m1->set_dest))
2292 reg_map[m1->regno] = m->set_dest;
2293 else
2294 reg_map[m1->regno]
2295 = gen_lowpart_common (GET_MODE (m1->set_dest),
2296 m->set_dest);
2297
2298 /* Get rid of the matching insn
2299 and prevent further processing of it. */
2300 m1->done = 1;
2301
2302 /* If library call, delete all insns. */
2303 if ((temp = find_reg_note (m1->insn, REG_RETVAL,
2304 NULL_RTX)))
2305 delete_insn_chain (XEXP (temp, 0), m1->insn);
2306 else
2307 delete_insn (m1->insn);
2308
2309 /* Any other movable that loads the same register
2310 MUST be moved. */
2311 already_moved[m1->regno] = 1;
2312
2313 /* The reg merged here is now invariant,
2314 if the reg it matches is invariant. */
2315 if (! m->partial)
2316 {
2317 int i;
2318 for (i = 0;
2319 i < LOOP_REGNO_NREGS (regno, m1->set_dest);
2320 i++)
2321 regs->array[m1->regno+i].set_in_loop = 0;
2322 }
2323 }
2324 }
2325 else if (loop_dump_stream)
2326 fprintf (loop_dump_stream, "not desirable");
2327 }
2328 else if (loop_dump_stream && !m->match)
2329 fprintf (loop_dump_stream, "not safe");
2330
2331 if (loop_dump_stream)
2332 fprintf (loop_dump_stream, "\n");
2333 }
2334
2335 if (new_start == 0)
2336 new_start = loop_start;
2337
2338 /* Go through all the instructions in the loop, making
2339 all the register substitutions scheduled in REG_MAP. */
2340 for (p = new_start; p != loop_end; p = NEXT_INSN (p))
2341 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
2342 || GET_CODE (p) == CALL_INSN)
2343 {
2344 replace_regs (PATTERN (p), reg_map, nregs, 0);
2345 replace_regs (REG_NOTES (p), reg_map, nregs, 0);
2346 INSN_CODE (p) = -1;
2347 }
2348
2349 /* Clean up. */
2350 free (reg_map);
2351 free (already_moved);
2352 }
2353
2354
2355 static void
2356 loop_movables_add (struct loop_movables *movables, struct movable *m)
2357 {
2358 if (movables->head == 0)
2359 movables->head = m;
2360 else
2361 movables->last->next = m;
2362 movables->last = m;
2363 }
2364
2365
2366 static void
2367 loop_movables_free (struct loop_movables *movables)
2368 {
2369 struct movable *m;
2370 struct movable *m_next;
2371
2372 for (m = movables->head; m; m = m_next)
2373 {
2374 m_next = m->next;
2375 free (m);
2376 }
2377 }
2378 \f
2379 #if 0
2380 /* Scan X and replace the address of any MEM in it with ADDR.
2381 REG is the address that MEM should have before the replacement. */
2382
2383 static void
2384 replace_call_address (rtx x, rtx reg, rtx addr)
2385 {
2386 enum rtx_code code;
2387 int i;
2388 const char *fmt;
2389
2390 if (x == 0)
2391 return;
2392 code = GET_CODE (x);
2393 switch (code)
2394 {
2395 case PC:
2396 case CC0:
2397 case CONST_INT:
2398 case CONST_DOUBLE:
2399 case CONST:
2400 case SYMBOL_REF:
2401 case LABEL_REF:
2402 case REG:
2403 return;
2404
2405 case SET:
2406 /* Short cut for very common case. */
2407 replace_call_address (XEXP (x, 1), reg, addr);
2408 return;
2409
2410 case CALL:
2411 /* Short cut for very common case. */
2412 replace_call_address (XEXP (x, 0), reg, addr);
2413 return;
2414
2415 case MEM:
2416 /* If this MEM uses a reg other than the one we expected,
2417 something is wrong. */
2418 if (XEXP (x, 0) != reg)
2419 abort ();
2420 XEXP (x, 0) = addr;
2421 return;
2422
2423 default:
2424 break;
2425 }
2426
2427 fmt = GET_RTX_FORMAT (code);
2428 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2429 {
2430 if (fmt[i] == 'e')
2431 replace_call_address (XEXP (x, i), reg, addr);
2432 else if (fmt[i] == 'E')
2433 {
2434 int j;
2435 for (j = 0; j < XVECLEN (x, i); j++)
2436 replace_call_address (XVECEXP (x, i, j), reg, addr);
2437 }
2438 }
2439 }
2440 #endif
2441 \f
2442 /* Return the number of memory refs to addresses that vary
2443 in the rtx X. */
2444
2445 static int
2446 count_nonfixed_reads (const struct loop *loop, rtx x)
2447 {
2448 enum rtx_code code;
2449 int i;
2450 const char *fmt;
2451 int value;
2452
2453 if (x == 0)
2454 return 0;
2455
2456 code = GET_CODE (x);
2457 switch (code)
2458 {
2459 case PC:
2460 case CC0:
2461 case CONST_INT:
2462 case CONST_DOUBLE:
2463 case CONST:
2464 case SYMBOL_REF:
2465 case LABEL_REF:
2466 case REG:
2467 return 0;
2468
2469 case MEM:
2470 return ((loop_invariant_p (loop, XEXP (x, 0)) != 1)
2471 + count_nonfixed_reads (loop, XEXP (x, 0)));
2472
2473 default:
2474 break;
2475 }
2476
2477 value = 0;
2478 fmt = GET_RTX_FORMAT (code);
2479 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2480 {
2481 if (fmt[i] == 'e')
2482 value += count_nonfixed_reads (loop, XEXP (x, i));
2483 if (fmt[i] == 'E')
2484 {
2485 int j;
2486 for (j = 0; j < XVECLEN (x, i); j++)
2487 value += count_nonfixed_reads (loop, XVECEXP (x, i, j));
2488 }
2489 }
2490 return value;
2491 }
2492 \f
2493 /* Scan a loop setting the elements `cont', `vtop', `loops_enclosed',
2494 `has_call', `has_nonconst_call', `has_volatile', `has_tablejump',
2495 `unknown_address_altered', `unknown_constant_address_altered', and
2496 `num_mem_sets' in LOOP. Also, fill in the array `mems' and the
2497 list `store_mems' in LOOP. */
2498
2499 static void
2500 prescan_loop (struct loop *loop)
2501 {
2502 int level = 1;
2503 rtx insn;
2504 struct loop_info *loop_info = LOOP_INFO (loop);
2505 rtx start = loop->start;
2506 rtx end = loop->end;
2507 /* The label after END. Jumping here is just like falling off the
2508 end of the loop. We use next_nonnote_insn instead of next_label
2509 as a hedge against the (pathological) case where some actual insn
2510 might end up between the two. */
2511 rtx exit_target = next_nonnote_insn (end);
2512
2513 loop_info->has_indirect_jump = indirect_jump_in_function;
2514 loop_info->pre_header_has_call = 0;
2515 loop_info->has_call = 0;
2516 loop_info->has_nonconst_call = 0;
2517 loop_info->has_prefetch = 0;
2518 loop_info->has_volatile = 0;
2519 loop_info->has_tablejump = 0;
2520 loop_info->has_multiple_exit_targets = 0;
2521 loop->level = 1;
2522
2523 loop_info->unknown_address_altered = 0;
2524 loop_info->unknown_constant_address_altered = 0;
2525 loop_info->store_mems = NULL_RTX;
2526 loop_info->first_loop_store_insn = NULL_RTX;
2527 loop_info->mems_idx = 0;
2528 loop_info->num_mem_sets = 0;
2529 /* If loop opts run twice, this was set on 1st pass for 2nd. */
2530 loop_info->preconditioned = NOTE_PRECONDITIONED (end);
2531
2532 for (insn = start; insn && GET_CODE (insn) != CODE_LABEL;
2533 insn = PREV_INSN (insn))
2534 {
2535 if (GET_CODE (insn) == CALL_INSN)
2536 {
2537 loop_info->pre_header_has_call = 1;
2538 break;
2539 }
2540 }
2541
2542 for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
2543 insn = NEXT_INSN (insn))
2544 {
2545 switch (GET_CODE (insn))
2546 {
2547 case NOTE:
2548 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
2549 {
2550 ++level;
2551 /* Count number of loops contained in this one. */
2552 loop->level++;
2553 }
2554 else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END)
2555 --level;
2556 break;
2557
2558 case CALL_INSN:
2559 if (! CONST_OR_PURE_CALL_P (insn))
2560 {
2561 loop_info->unknown_address_altered = 1;
2562 loop_info->has_nonconst_call = 1;
2563 }
2564 else if (pure_call_p (insn))
2565 loop_info->has_nonconst_call = 1;
2566 loop_info->has_call = 1;
2567 if (can_throw_internal (insn))
2568 loop_info->has_multiple_exit_targets = 1;
2569
2570 /* Calls initializing constant objects have CLOBBER of MEM /u in the
2571 attached FUNCTION_USAGE expression list, not accounted for by the
2572 code above. We should note these to avoid missing dependencies in
2573 later references. */
2574 {
2575 rtx fusage_entry;
2576
2577 for (fusage_entry = CALL_INSN_FUNCTION_USAGE (insn);
2578 fusage_entry; fusage_entry = XEXP (fusage_entry, 1))
2579 {
2580 rtx fusage = XEXP (fusage_entry, 0);
2581
2582 if (GET_CODE (fusage) == CLOBBER
2583 && GET_CODE (XEXP (fusage, 0)) == MEM
2584 && RTX_UNCHANGING_P (XEXP (fusage, 0)))
2585 {
2586 note_stores (fusage, note_addr_stored, loop_info);
2587 if (! loop_info->first_loop_store_insn
2588 && loop_info->store_mems)
2589 loop_info->first_loop_store_insn = insn;
2590 }
2591 }
2592 }
2593 break;
2594
2595 case JUMP_INSN:
2596 if (! loop_info->has_multiple_exit_targets)
2597 {
2598 rtx set = pc_set (insn);
2599
2600 if (set)
2601 {
2602 rtx src = SET_SRC (set);
2603 rtx label1, label2;
2604
2605 if (GET_CODE (src) == IF_THEN_ELSE)
2606 {
2607 label1 = XEXP (src, 1);
2608 label2 = XEXP (src, 2);
2609 }
2610 else
2611 {
2612 label1 = src;
2613 label2 = NULL_RTX;
2614 }
2615
2616 do
2617 {
2618 if (label1 && label1 != pc_rtx)
2619 {
2620 if (GET_CODE (label1) != LABEL_REF)
2621 {
2622 /* Something tricky. */
2623 loop_info->has_multiple_exit_targets = 1;
2624 break;
2625 }
2626 else if (XEXP (label1, 0) != exit_target
2627 && LABEL_OUTSIDE_LOOP_P (label1))
2628 {
2629 /* A jump outside the current loop. */
2630 loop_info->has_multiple_exit_targets = 1;
2631 break;
2632 }
2633 }
2634
2635 label1 = label2;
2636 label2 = NULL_RTX;
2637 }
2638 while (label1);
2639 }
2640 else
2641 {
2642 /* A return, or something tricky. */
2643 loop_info->has_multiple_exit_targets = 1;
2644 }
2645 }
2646 /* Fall through. */
2647
2648 case INSN:
2649 if (volatile_refs_p (PATTERN (insn)))
2650 loop_info->has_volatile = 1;
2651
2652 if (GET_CODE (insn) == JUMP_INSN
2653 && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
2654 || GET_CODE (PATTERN (insn)) == ADDR_VEC))
2655 loop_info->has_tablejump = 1;
2656
2657 note_stores (PATTERN (insn), note_addr_stored, loop_info);
2658 if (! loop_info->first_loop_store_insn && loop_info->store_mems)
2659 loop_info->first_loop_store_insn = insn;
2660
2661 if (flag_non_call_exceptions && can_throw_internal (insn))
2662 loop_info->has_multiple_exit_targets = 1;
2663 break;
2664
2665 default:
2666 break;
2667 }
2668 }
2669
2670 /* Now, rescan the loop, setting up the LOOP_MEMS array. */
2671 if (/* An exception thrown by a called function might land us
2672 anywhere. */
2673 ! loop_info->has_nonconst_call
2674 /* We don't want loads for MEMs moved to a location before the
2675 one at which their stack memory becomes allocated. (Note
2676 that this is not a problem for malloc, etc., since those
2677 require actual function calls. */
2678 && ! current_function_calls_alloca
2679 /* There are ways to leave the loop other than falling off the
2680 end. */
2681 && ! loop_info->has_multiple_exit_targets)
2682 for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
2683 insn = NEXT_INSN (insn))
2684 for_each_rtx (&insn, insert_loop_mem, loop_info);
2685
2686 /* BLKmode MEMs are added to LOOP_STORE_MEM as necessary so
2687 that loop_invariant_p and load_mems can use true_dependence
2688 to determine what is really clobbered. */
2689 if (loop_info->unknown_address_altered)
2690 {
2691 rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
2692
2693 loop_info->store_mems
2694 = gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
2695 }
2696 if (loop_info->unknown_constant_address_altered)
2697 {
2698 rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
2699
2700 RTX_UNCHANGING_P (mem) = 1;
2701 loop_info->store_mems
2702 = gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
2703 }
2704 }
2705 \f
2706 /* Invalidate all loops containing LABEL. */
2707
2708 static void
2709 invalidate_loops_containing_label (rtx label)
2710 {
2711 struct loop *loop;
2712 for (loop = uid_loop[INSN_UID (label)]; loop; loop = loop->outer)
2713 loop->invalid = 1;
2714 }
2715
2716 /* Scan the function looking for loops. Record the start and end of each loop.
2717 Also mark as invalid loops any loops that contain a setjmp or are branched
2718 to from outside the loop. */
2719
2720 static void
2721 find_and_verify_loops (rtx f, struct loops *loops)
2722 {
2723 rtx insn;
2724 rtx label;
2725 int num_loops;
2726 struct loop *current_loop;
2727 struct loop *next_loop;
2728 struct loop *loop;
2729
2730 num_loops = loops->num;
2731
2732 compute_luids (f, NULL_RTX, 0);
2733
2734 /* If there are jumps to undefined labels,
2735 treat them as jumps out of any/all loops.
2736 This also avoids writing past end of tables when there are no loops. */
2737 uid_loop[0] = NULL;
2738
2739 /* Find boundaries of loops, mark which loops are contained within
2740 loops, and invalidate loops that have setjmp. */
2741
2742 num_loops = 0;
2743 current_loop = NULL;
2744 for (insn = f; insn; insn = NEXT_INSN (insn))
2745 {
2746 if (GET_CODE (insn) == NOTE)
2747 switch (NOTE_LINE_NUMBER (insn))
2748 {
2749 case NOTE_INSN_LOOP_BEG:
2750 next_loop = loops->array + num_loops;
2751 next_loop->num = num_loops;
2752 num_loops++;
2753 next_loop->start = insn;
2754 next_loop->outer = current_loop;
2755 current_loop = next_loop;
2756 break;
2757
2758 case NOTE_INSN_LOOP_CONT:
2759 current_loop->cont = insn;
2760 break;
2761
2762 case NOTE_INSN_LOOP_VTOP:
2763 current_loop->vtop = insn;
2764 break;
2765
2766 case NOTE_INSN_LOOP_END:
2767 if (! current_loop)
2768 abort ();
2769
2770 current_loop->end = insn;
2771 current_loop = current_loop->outer;
2772 break;
2773
2774 default:
2775 break;
2776 }
2777
2778 if (GET_CODE (insn) == CALL_INSN
2779 && find_reg_note (insn, REG_SETJMP, NULL))
2780 {
2781 /* In this case, we must invalidate our current loop and any
2782 enclosing loop. */
2783 for (loop = current_loop; loop; loop = loop->outer)
2784 {
2785 loop->invalid = 1;
2786 if (loop_dump_stream)
2787 fprintf (loop_dump_stream,
2788 "\nLoop at %d ignored due to setjmp.\n",
2789 INSN_UID (loop->start));
2790 }
2791 }
2792
2793 /* Note that this will mark the NOTE_INSN_LOOP_END note as being in the
2794 enclosing loop, but this doesn't matter. */
2795 uid_loop[INSN_UID (insn)] = current_loop;
2796 }
2797
2798 /* Any loop containing a label used in an initializer must be invalidated,
2799 because it can be jumped into from anywhere. */
2800 for (label = forced_labels; label; label = XEXP (label, 1))
2801 invalidate_loops_containing_label (XEXP (label, 0));
2802
2803 /* Any loop containing a label used for an exception handler must be
2804 invalidated, because it can be jumped into from anywhere. */
2805 for_each_eh_label (invalidate_loops_containing_label);
2806
2807 /* Now scan all insn's in the function. If any JUMP_INSN branches into a
2808 loop that it is not contained within, that loop is marked invalid.
2809 If any INSN or CALL_INSN uses a label's address, then the loop containing
2810 that label is marked invalid, because it could be jumped into from
2811 anywhere.
2812
2813 Also look for blocks of code ending in an unconditional branch that
2814 exits the loop. If such a block is surrounded by a conditional
2815 branch around the block, move the block elsewhere (see below) and
2816 invert the jump to point to the code block. This may eliminate a
2817 label in our loop and will simplify processing by both us and a
2818 possible second cse pass. */
2819
2820 for (insn = f; insn; insn = NEXT_INSN (insn))
2821 if (INSN_P (insn))
2822 {
2823 struct loop *this_loop = uid_loop[INSN_UID (insn)];
2824
2825 if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
2826 {
2827 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
2828 if (note)
2829 invalidate_loops_containing_label (XEXP (note, 0));
2830 }
2831
2832 if (GET_CODE (insn) != JUMP_INSN)
2833 continue;
2834
2835 mark_loop_jump (PATTERN (insn), this_loop);
2836
2837 /* See if this is an unconditional branch outside the loop. */
2838 if (this_loop
2839 && (GET_CODE (PATTERN (insn)) == RETURN
2840 || (any_uncondjump_p (insn)
2841 && onlyjump_p (insn)
2842 && (uid_loop[INSN_UID (JUMP_LABEL (insn))]
2843 != this_loop)))
2844 && get_max_uid () < max_uid_for_loop)
2845 {
2846 rtx p;
2847 rtx our_next = next_real_insn (insn);
2848 rtx last_insn_to_move = NEXT_INSN (insn);
2849 struct loop *dest_loop;
2850 struct loop *outer_loop = NULL;
2851
2852 /* Go backwards until we reach the start of the loop, a label,
2853 or a JUMP_INSN. */
2854 for (p = PREV_INSN (insn);
2855 GET_CODE (p) != CODE_LABEL
2856 && ! (GET_CODE (p) == NOTE
2857 && NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
2858 && GET_CODE (p) != JUMP_INSN;
2859 p = PREV_INSN (p))
2860 ;
2861
2862 /* Check for the case where we have a jump to an inner nested
2863 loop, and do not perform the optimization in that case. */
2864
2865 if (JUMP_LABEL (insn))
2866 {
2867 dest_loop = uid_loop[INSN_UID (JUMP_LABEL (insn))];
2868 if (dest_loop)
2869 {
2870 for (outer_loop = dest_loop; outer_loop;
2871 outer_loop = outer_loop->outer)
2872 if (outer_loop == this_loop)
2873 break;
2874 }
2875 }
2876
2877 /* Make sure that the target of P is within the current loop. */
2878
2879 if (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p)
2880 && uid_loop[INSN_UID (JUMP_LABEL (p))] != this_loop)
2881 outer_loop = this_loop;
2882
2883 /* If we stopped on a JUMP_INSN to the next insn after INSN,
2884 we have a block of code to try to move.
2885
2886 We look backward and then forward from the target of INSN
2887 to find a BARRIER at the same loop depth as the target.
2888 If we find such a BARRIER, we make a new label for the start
2889 of the block, invert the jump in P and point it to that label,
2890 and move the block of code to the spot we found. */
2891
2892 if (! outer_loop
2893 && GET_CODE (p) == JUMP_INSN
2894 && JUMP_LABEL (p) != 0
2895 /* Just ignore jumps to labels that were never emitted.
2896 These always indicate compilation errors. */
2897 && INSN_UID (JUMP_LABEL (p)) != 0
2898 && any_condjump_p (p) && onlyjump_p (p)
2899 && next_real_insn (JUMP_LABEL (p)) == our_next
2900 /* If it's not safe to move the sequence, then we
2901 mustn't try. */
2902 && insns_safe_to_move_p (p, NEXT_INSN (insn),
2903 &last_insn_to_move))
2904 {
2905 rtx target
2906 = JUMP_LABEL (insn) ? JUMP_LABEL (insn) : get_last_insn ();
2907 struct loop *target_loop = uid_loop[INSN_UID (target)];
2908 rtx loc, loc2;
2909 rtx tmp;
2910
2911 /* Search for possible garbage past the conditional jumps
2912 and look for the last barrier. */
2913 for (tmp = last_insn_to_move;
2914 tmp && GET_CODE (tmp) != CODE_LABEL; tmp = NEXT_INSN (tmp))
2915 if (GET_CODE (tmp) == BARRIER)
2916 last_insn_to_move = tmp;
2917
2918 for (loc = target; loc; loc = PREV_INSN (loc))
2919 if (GET_CODE (loc) == BARRIER
2920 /* Don't move things inside a tablejump. */
2921 && ((loc2 = next_nonnote_insn (loc)) == 0
2922 || GET_CODE (loc2) != CODE_LABEL
2923 || (loc2 = next_nonnote_insn (loc2)) == 0
2924 || GET_CODE (loc2) != JUMP_INSN
2925 || (GET_CODE (PATTERN (loc2)) != ADDR_VEC
2926 && GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
2927 && uid_loop[INSN_UID (loc)] == target_loop)
2928 break;
2929
2930 if (loc == 0)
2931 for (loc = target; loc; loc = NEXT_INSN (loc))
2932 if (GET_CODE (loc) == BARRIER
2933 /* Don't move things inside a tablejump. */
2934 && ((loc2 = next_nonnote_insn (loc)) == 0
2935 || GET_CODE (loc2) != CODE_LABEL
2936 || (loc2 = next_nonnote_insn (loc2)) == 0
2937 || GET_CODE (loc2) != JUMP_INSN
2938 || (GET_CODE (PATTERN (loc2)) != ADDR_VEC
2939 && GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
2940 && uid_loop[INSN_UID (loc)] == target_loop)
2941 break;
2942
2943 if (loc)
2944 {
2945 rtx cond_label = JUMP_LABEL (p);
2946 rtx new_label = get_label_after (p);
2947
2948 /* Ensure our label doesn't go away. */
2949 LABEL_NUSES (cond_label)++;
2950
2951 /* Verify that uid_loop is large enough and that
2952 we can invert P. */
2953 if (invert_jump (p, new_label, 1))
2954 {
2955 rtx q, r;
2956
2957 /* If no suitable BARRIER was found, create a suitable
2958 one before TARGET. Since TARGET is a fall through
2959 path, we'll need to insert a jump around our block
2960 and add a BARRIER before TARGET.
2961
2962 This creates an extra unconditional jump outside
2963 the loop. However, the benefits of removing rarely
2964 executed instructions from inside the loop usually
2965 outweighs the cost of the extra unconditional jump
2966 outside the loop. */
2967 if (loc == 0)
2968 {
2969 rtx temp;
2970
2971 temp = gen_jump (JUMP_LABEL (insn));
2972 temp = emit_jump_insn_before (temp, target);
2973 JUMP_LABEL (temp) = JUMP_LABEL (insn);
2974 LABEL_NUSES (JUMP_LABEL (insn))++;
2975 loc = emit_barrier_before (target);
2976 }
2977
2978 /* Include the BARRIER after INSN and copy the
2979 block after LOC. */
2980 if (squeeze_notes (&new_label, &last_insn_to_move))
2981 abort ();
2982 reorder_insns (new_label, last_insn_to_move, loc);
2983
2984 /* All those insns are now in TARGET_LOOP. */
2985 for (q = new_label;
2986 q != NEXT_INSN (last_insn_to_move);
2987 q = NEXT_INSN (q))
2988 uid_loop[INSN_UID (q)] = target_loop;
2989
2990 /* The label jumped to by INSN is no longer a loop
2991 exit. Unless INSN does not have a label (e.g.,
2992 it is a RETURN insn), search loop->exit_labels
2993 to find its label_ref, and remove it. Also turn
2994 off LABEL_OUTSIDE_LOOP_P bit. */
2995 if (JUMP_LABEL (insn))
2996 {
2997 for (q = 0, r = this_loop->exit_labels;
2998 r;
2999 q = r, r = LABEL_NEXTREF (r))
3000 if (XEXP (r, 0) == JUMP_LABEL (insn))
3001 {
3002 LABEL_OUTSIDE_LOOP_P (r) = 0;
3003 if (q)
3004 LABEL_NEXTREF (q) = LABEL_NEXTREF (r);
3005 else
3006 this_loop->exit_labels = LABEL_NEXTREF (r);
3007 break;
3008 }
3009
3010 for (loop = this_loop; loop && loop != target_loop;
3011 loop = loop->outer)
3012 loop->exit_count--;
3013
3014 /* If we didn't find it, then something is
3015 wrong. */
3016 if (! r)
3017 abort ();
3018 }
3019
3020 /* P is now a jump outside the loop, so it must be put
3021 in loop->exit_labels, and marked as such.
3022 The easiest way to do this is to just call
3023 mark_loop_jump again for P. */
3024 mark_loop_jump (PATTERN (p), this_loop);
3025
3026 /* If INSN now jumps to the insn after it,
3027 delete INSN. */
3028 if (JUMP_LABEL (insn) != 0
3029 && (next_real_insn (JUMP_LABEL (insn))
3030 == next_real_insn (insn)))
3031 delete_related_insns (insn);
3032 }
3033
3034 /* Continue the loop after where the conditional
3035 branch used to jump, since the only branch insn
3036 in the block (if it still remains) is an inter-loop
3037 branch and hence needs no processing. */
3038 insn = NEXT_INSN (cond_label);
3039
3040 if (--LABEL_NUSES (cond_label) == 0)
3041 delete_related_insns (cond_label);
3042
3043 /* This loop will be continued with NEXT_INSN (insn). */
3044 insn = PREV_INSN (insn);
3045 }
3046 }
3047 }
3048 }
3049 }
3050
3051 /* If any label in X jumps to a loop different from LOOP_NUM and any of the
3052 loops it is contained in, mark the target loop invalid.
3053
3054 For speed, we assume that X is part of a pattern of a JUMP_INSN. */
3055
3056 static void
3057 mark_loop_jump (rtx x, struct loop *loop)
3058 {
3059 struct loop *dest_loop;
3060 struct loop *outer_loop;
3061 int i;
3062
3063 switch (GET_CODE (x))
3064 {
3065 case PC:
3066 case USE:
3067 case CLOBBER:
3068 case REG:
3069 case MEM:
3070 case CONST_INT:
3071 case CONST_DOUBLE:
3072 case RETURN:
3073 return;
3074
3075 case CONST:
3076 /* There could be a label reference in here. */
3077 mark_loop_jump (XEXP (x, 0), loop);
3078 return;
3079
3080 case PLUS:
3081 case MINUS:
3082 case MULT:
3083 mark_loop_jump (XEXP (x, 0), loop);
3084 mark_loop_jump (XEXP (x, 1), loop);
3085 return;
3086
3087 case LO_SUM:
3088 /* This may refer to a LABEL_REF or SYMBOL_REF. */
3089 mark_loop_jump (XEXP (x, 1), loop);
3090 return;
3091
3092 case SIGN_EXTEND:
3093 case ZERO_EXTEND:
3094 mark_loop_jump (XEXP (x, 0), loop);
3095 return;
3096
3097 case LABEL_REF:
3098 dest_loop = uid_loop[INSN_UID (XEXP (x, 0))];
3099
3100 /* Link together all labels that branch outside the loop. This
3101 is used by final_[bg]iv_value and the loop unrolling code. Also
3102 mark this LABEL_REF so we know that this branch should predict
3103 false. */
3104
3105 /* A check to make sure the label is not in an inner nested loop,
3106 since this does not count as a loop exit. */
3107 if (dest_loop)
3108 {
3109 for (outer_loop = dest_loop; outer_loop;
3110 outer_loop = outer_loop->outer)
3111 if (outer_loop == loop)
3112 break;
3113 }
3114 else
3115 outer_loop = NULL;
3116
3117 if (loop && ! outer_loop)
3118 {
3119 LABEL_OUTSIDE_LOOP_P (x) = 1;
3120 LABEL_NEXTREF (x) = loop->exit_labels;
3121 loop->exit_labels = x;
3122
3123 for (outer_loop = loop;
3124 outer_loop && outer_loop != dest_loop;
3125 outer_loop = outer_loop->outer)
3126 outer_loop->exit_count++;
3127 }
3128
3129 /* If this is inside a loop, but not in the current loop or one enclosed
3130 by it, it invalidates at least one loop. */
3131
3132 if (! dest_loop)
3133 return;
3134
3135 /* We must invalidate every nested loop containing the target of this
3136 label, except those that also contain the jump insn. */
3137
3138 for (; dest_loop; dest_loop = dest_loop->outer)
3139 {
3140 /* Stop when we reach a loop that also contains the jump insn. */
3141 for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
3142 if (dest_loop == outer_loop)
3143 return;
3144
3145 /* If we get here, we know we need to invalidate a loop. */
3146 if (loop_dump_stream && ! dest_loop->invalid)
3147 fprintf (loop_dump_stream,
3148 "\nLoop at %d ignored due to multiple entry points.\n",
3149 INSN_UID (dest_loop->start));
3150
3151 dest_loop->invalid = 1;
3152 }
3153 return;
3154
3155 case SET:
3156 /* If this is not setting pc, ignore. */
3157 if (SET_DEST (x) == pc_rtx)
3158 mark_loop_jump (SET_SRC (x), loop);
3159 return;
3160
3161 case IF_THEN_ELSE:
3162 mark_loop_jump (XEXP (x, 1), loop);
3163 mark_loop_jump (XEXP (x, 2), loop);
3164 return;
3165
3166 case PARALLEL:
3167 case ADDR_VEC:
3168 for (i = 0; i < XVECLEN (x, 0); i++)
3169 mark_loop_jump (XVECEXP (x, 0, i), loop);
3170 return;
3171
3172 case ADDR_DIFF_VEC:
3173 for (i = 0; i < XVECLEN (x, 1); i++)
3174 mark_loop_jump (XVECEXP (x, 1, i), loop);
3175 return;
3176
3177 default:
3178 /* Strictly speaking this is not a jump into the loop, only a possible
3179 jump out of the loop. However, we have no way to link the destination
3180 of this jump onto the list of exit labels. To be safe we mark this
3181 loop and any containing loops as invalid. */
3182 if (loop)
3183 {
3184 for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
3185 {
3186 if (loop_dump_stream && ! outer_loop->invalid)
3187 fprintf (loop_dump_stream,
3188 "\nLoop at %d ignored due to unknown exit jump.\n",
3189 INSN_UID (outer_loop->start));
3190 outer_loop->invalid = 1;
3191 }
3192 }
3193 return;
3194 }
3195 }
3196 \f
3197 /* Return nonzero if there is a label in the range from
3198 insn INSN to and including the insn whose luid is END
3199 INSN must have an assigned luid (i.e., it must not have
3200 been previously created by loop.c). */
3201
3202 static int
3203 labels_in_range_p (rtx insn, int end)
3204 {
3205 while (insn && INSN_LUID (insn) <= end)
3206 {
3207 if (GET_CODE (insn) == CODE_LABEL)
3208 return 1;
3209 insn = NEXT_INSN (insn);
3210 }
3211
3212 return 0;
3213 }
3214
3215 /* Record that a memory reference X is being set. */
3216
3217 static void
3218 note_addr_stored (rtx x, rtx y ATTRIBUTE_UNUSED,
3219 void *data ATTRIBUTE_UNUSED)
3220 {
3221 struct loop_info *loop_info = data;
3222
3223 if (x == 0 || GET_CODE (x) != MEM)
3224 return;
3225
3226 /* Count number of memory writes.
3227 This affects heuristics in strength_reduce. */
3228 loop_info->num_mem_sets++;
3229
3230 /* BLKmode MEM means all memory is clobbered. */
3231 if (GET_MODE (x) == BLKmode)
3232 {
3233 if (RTX_UNCHANGING_P (x))
3234 loop_info->unknown_constant_address_altered = 1;
3235 else
3236 loop_info->unknown_address_altered = 1;
3237
3238 return;
3239 }
3240
3241 loop_info->store_mems = gen_rtx_EXPR_LIST (VOIDmode, x,
3242 loop_info->store_mems);
3243 }
3244
3245 /* X is a value modified by an INSN that references a biv inside a loop
3246 exit test (ie, X is somehow related to the value of the biv). If X
3247 is a pseudo that is used more than once, then the biv is (effectively)
3248 used more than once. DATA is a pointer to a loop_regs structure. */
3249
3250 static void
3251 note_set_pseudo_multiple_uses (rtx x, rtx y ATTRIBUTE_UNUSED, void *data)
3252 {
3253 struct loop_regs *regs = (struct loop_regs *) data;
3254
3255 if (x == 0)
3256 return;
3257
3258 while (GET_CODE (x) == STRICT_LOW_PART
3259 || GET_CODE (x) == SIGN_EXTRACT
3260 || GET_CODE (x) == ZERO_EXTRACT
3261 || GET_CODE (x) == SUBREG)
3262 x = XEXP (x, 0);
3263
3264 if (GET_CODE (x) != REG || REGNO (x) < FIRST_PSEUDO_REGISTER)
3265 return;
3266
3267 /* If we do not have usage information, or if we know the register
3268 is used more than once, note that fact for check_dbra_loop. */
3269 if (REGNO (x) >= max_reg_before_loop
3270 || ! regs->array[REGNO (x)].single_usage
3271 || regs->array[REGNO (x)].single_usage == const0_rtx)
3272 regs->multiple_uses = 1;
3273 }
3274 \f
3275 /* Return nonzero if the rtx X is invariant over the current loop.
3276
3277 The value is 2 if we refer to something only conditionally invariant.
3278
3279 A memory ref is invariant if it is not volatile and does not conflict
3280 with anything stored in `loop_info->store_mems'. */
3281
3282 int
3283 loop_invariant_p (const struct loop *loop, rtx x)
3284 {
3285 struct loop_info *loop_info = LOOP_INFO (loop);
3286 struct loop_regs *regs = LOOP_REGS (loop);
3287 int i;
3288 enum rtx_code code;
3289 const char *fmt;
3290 int conditional = 0;
3291 rtx mem_list_entry;
3292
3293 if (x == 0)
3294 return 1;
3295 code = GET_CODE (x);
3296 switch (code)
3297 {
3298 case CONST_INT:
3299 case CONST_DOUBLE:
3300 case SYMBOL_REF:
3301 case CONST:
3302 return 1;
3303
3304 case LABEL_REF:
3305 /* A LABEL_REF is normally invariant, however, if we are unrolling
3306 loops, and this label is inside the loop, then it isn't invariant.
3307 This is because each unrolled copy of the loop body will have
3308 a copy of this label. If this was invariant, then an insn loading
3309 the address of this label into a register might get moved outside
3310 the loop, and then each loop body would end up using the same label.
3311
3312 We don't know the loop bounds here though, so just fail for all
3313 labels. */
3314 if (flag_old_unroll_loops)
3315 return 0;
3316 else
3317 return 1;
3318
3319 case PC:
3320 case CC0:
3321 case UNSPEC_VOLATILE:
3322 return 0;
3323
3324 case REG:
3325 /* We used to check RTX_UNCHANGING_P (x) here, but that is invalid
3326 since the reg might be set by initialization within the loop. */
3327
3328 if ((x == frame_pointer_rtx || x == hard_frame_pointer_rtx
3329 || x == arg_pointer_rtx || x == pic_offset_table_rtx)
3330 && ! current_function_has_nonlocal_goto)
3331 return 1;
3332
3333 if (LOOP_INFO (loop)->has_call
3334 && REGNO (x) < FIRST_PSEUDO_REGISTER && call_used_regs[REGNO (x)])
3335 return 0;
3336
3337 /* Out-of-range regs can occur when we are called from unrolling.
3338 These registers created by the unroller are set in the loop,
3339 hence are never invariant.
3340 Other out-of-range regs can be generated by load_mems; those that
3341 are written to in the loop are not invariant, while those that are
3342 not written to are invariant. It would be easy for load_mems
3343 to set n_times_set correctly for these registers, however, there
3344 is no easy way to distinguish them from registers created by the
3345 unroller. */
3346
3347 if (REGNO (x) >= (unsigned) regs->num)
3348 return 0;
3349
3350 if (regs->array[REGNO (x)].set_in_loop < 0)
3351 return 2;
3352
3353 return regs->array[REGNO (x)].set_in_loop == 0;
3354
3355 case MEM:
3356 /* Volatile memory references must be rejected. Do this before
3357 checking for read-only items, so that volatile read-only items
3358 will be rejected also. */
3359 if (MEM_VOLATILE_P (x))
3360 return 0;
3361
3362 /* See if there is any dependence between a store and this load. */
3363 mem_list_entry = loop_info->store_mems;
3364 while (mem_list_entry)
3365 {
3366 if (true_dependence (XEXP (mem_list_entry, 0), VOIDmode,
3367 x, rtx_varies_p))
3368 return 0;
3369
3370 mem_list_entry = XEXP (mem_list_entry, 1);
3371 }
3372
3373 /* It's not invalidated by a store in memory
3374 but we must still verify the address is invariant. */
3375 break;
3376
3377 case ASM_OPERANDS:
3378 /* Don't mess with insns declared volatile. */
3379 if (MEM_VOLATILE_P (x))
3380 return 0;
3381 break;
3382
3383 default:
3384 break;
3385 }
3386
3387 fmt = GET_RTX_FORMAT (code);
3388 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3389 {
3390 if (fmt[i] == 'e')
3391 {
3392 int tem = loop_invariant_p (loop, XEXP (x, i));
3393 if (tem == 0)
3394 return 0;
3395 if (tem == 2)
3396 conditional = 1;
3397 }
3398 else if (fmt[i] == 'E')
3399 {
3400 int j;
3401 for (j = 0; j < XVECLEN (x, i); j++)
3402 {
3403 int tem = loop_invariant_p (loop, XVECEXP (x, i, j));
3404 if (tem == 0)
3405 return 0;
3406 if (tem == 2)
3407 conditional = 1;
3408 }
3409
3410 }
3411 }
3412
3413 return 1 + conditional;
3414 }
3415 \f
3416 /* Return nonzero if all the insns in the loop that set REG
3417 are INSN and the immediately following insns,
3418 and if each of those insns sets REG in an invariant way
3419 (not counting uses of REG in them).
3420
3421 The value is 2 if some of these insns are only conditionally invariant.
3422
3423 We assume that INSN itself is the first set of REG
3424 and that its source is invariant. */
3425
3426 static int
3427 consec_sets_invariant_p (const struct loop *loop, rtx reg, int n_sets,
3428 rtx insn)
3429 {
3430 struct loop_regs *regs = LOOP_REGS (loop);
3431 rtx p = insn;
3432 unsigned int regno = REGNO (reg);
3433 rtx temp;
3434 /* Number of sets we have to insist on finding after INSN. */
3435 int count = n_sets - 1;
3436 int old = regs->array[regno].set_in_loop;
3437 int value = 0;
3438 int this;
3439
3440 /* If N_SETS hit the limit, we can't rely on its value. */
3441 if (n_sets == 127)
3442 return 0;
3443
3444 regs->array[regno].set_in_loop = 0;
3445
3446 while (count > 0)
3447 {
3448 enum rtx_code code;
3449 rtx set;
3450
3451 p = NEXT_INSN (p);
3452 code = GET_CODE (p);
3453
3454 /* If library call, skip to end of it. */
3455 if (code == INSN && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
3456 p = XEXP (temp, 0);
3457
3458 this = 0;
3459 if (code == INSN
3460 && (set = single_set (p))
3461 && GET_CODE (SET_DEST (set)) == REG
3462 && REGNO (SET_DEST (set)) == regno)
3463 {
3464 this = loop_invariant_p (loop, SET_SRC (set));
3465 if (this != 0)
3466 value |= this;
3467 else if ((temp = find_reg_note (p, REG_EQUAL, NULL_RTX)))
3468 {
3469 /* If this is a libcall, then any invariant REG_EQUAL note is OK.
3470 If this is an ordinary insn, then only CONSTANT_P REG_EQUAL
3471 notes are OK. */
3472 this = (CONSTANT_P (XEXP (temp, 0))
3473 || (find_reg_note (p, REG_RETVAL, NULL_RTX)
3474 && loop_invariant_p (loop, XEXP (temp, 0))));
3475 if (this != 0)
3476 value |= this;
3477 }
3478 }
3479 if (this != 0)
3480 count--;
3481 else if (code != NOTE)
3482 {
3483 regs->array[regno].set_in_loop = old;
3484 return 0;
3485 }
3486 }
3487
3488 regs->array[regno].set_in_loop = old;
3489 /* If loop_invariant_p ever returned 2, we return 2. */
3490 return 1 + (value & 2);
3491 }
3492
3493 #if 0
3494 /* I don't think this condition is sufficient to allow INSN
3495 to be moved, so we no longer test it. */
3496
3497 /* Return 1 if all insns in the basic block of INSN and following INSN
3498 that set REG are invariant according to TABLE. */
3499
3500 static int
3501 all_sets_invariant_p (rtx reg, rtx insn, short *table)
3502 {
3503 rtx p = insn;
3504 int regno = REGNO (reg);
3505
3506 while (1)
3507 {
3508 enum rtx_code code;
3509 p = NEXT_INSN (p);
3510 code = GET_CODE (p);
3511 if (code == CODE_LABEL || code == JUMP_INSN)
3512 return 1;
3513 if (code == INSN && GET_CODE (PATTERN (p)) == SET
3514 && GET_CODE (SET_DEST (PATTERN (p))) == REG
3515 && REGNO (SET_DEST (PATTERN (p))) == regno)
3516 {
3517 if (! loop_invariant_p (loop, SET_SRC (PATTERN (p)), table))
3518 return 0;
3519 }
3520 }
3521 }
3522 #endif /* 0 */
3523 \f
3524 /* Look at all uses (not sets) of registers in X. For each, if it is
3525 the single use, set USAGE[REGNO] to INSN; if there was a previous use in
3526 a different insn, set USAGE[REGNO] to const0_rtx. */
3527
3528 static void
3529 find_single_use_in_loop (struct loop_regs *regs, rtx insn, rtx x)
3530 {
3531 enum rtx_code code = GET_CODE (x);
3532 const char *fmt = GET_RTX_FORMAT (code);
3533 int i, j;
3534
3535 if (code == REG)
3536 regs->array[REGNO (x)].single_usage
3537 = (regs->array[REGNO (x)].single_usage != 0
3538 && regs->array[REGNO (x)].single_usage != insn)
3539 ? const0_rtx : insn;
3540
3541 else if (code == SET)
3542 {
3543 /* Don't count SET_DEST if it is a REG; otherwise count things
3544 in SET_DEST because if a register is partially modified, it won't
3545 show up as a potential movable so we don't care how USAGE is set
3546 for it. */
3547 if (GET_CODE (SET_DEST (x)) != REG)
3548 find_single_use_in_loop (regs, insn, SET_DEST (x));
3549 find_single_use_in_loop (regs, insn, SET_SRC (x));
3550 }
3551 else
3552 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3553 {
3554 if (fmt[i] == 'e' && XEXP (x, i) != 0)
3555 find_single_use_in_loop (regs, insn, XEXP (x, i));
3556 else if (fmt[i] == 'E')
3557 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3558 find_single_use_in_loop (regs, insn, XVECEXP (x, i, j));
3559 }
3560 }
3561 \f
3562 /* Count and record any set in X which is contained in INSN. Update
3563 REGS->array[I].MAY_NOT_OPTIMIZE and LAST_SET for any register I set
3564 in X. */
3565
3566 static void
3567 count_one_set (struct loop_regs *regs, rtx insn, rtx x, rtx *last_set)
3568 {
3569 if (GET_CODE (x) == CLOBBER && GET_CODE (XEXP (x, 0)) == REG)
3570 /* Don't move a reg that has an explicit clobber.
3571 It's not worth the pain to try to do it correctly. */
3572 regs->array[REGNO (XEXP (x, 0))].may_not_optimize = 1;
3573
3574 if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
3575 {
3576 rtx dest = SET_DEST (x);
3577 while (GET_CODE (dest) == SUBREG
3578 || GET_CODE (dest) == ZERO_EXTRACT
3579 || GET_CODE (dest) == SIGN_EXTRACT
3580 || GET_CODE (dest) == STRICT_LOW_PART)
3581 dest = XEXP (dest, 0);
3582 if (GET_CODE (dest) == REG)
3583 {
3584 int i;
3585 int regno = REGNO (dest);
3586 for (i = 0; i < LOOP_REGNO_NREGS (regno, dest); i++)
3587 {
3588 /* If this is the first setting of this reg
3589 in current basic block, and it was set before,
3590 it must be set in two basic blocks, so it cannot
3591 be moved out of the loop. */
3592 if (regs->array[regno].set_in_loop > 0
3593 && last_set[regno] == 0)
3594 regs->array[regno+i].may_not_optimize = 1;
3595 /* If this is not first setting in current basic block,
3596 see if reg was used in between previous one and this.
3597 If so, neither one can be moved. */
3598 if (last_set[regno] != 0
3599 && reg_used_between_p (dest, last_set[regno], insn))
3600 regs->array[regno+i].may_not_optimize = 1;
3601 if (regs->array[regno+i].set_in_loop < 127)
3602 ++regs->array[regno+i].set_in_loop;
3603 last_set[regno+i] = insn;
3604 }
3605 }
3606 }
3607 }
3608 \f
3609 /* Given a loop that is bounded by LOOP->START and LOOP->END and that
3610 is entered at LOOP->SCAN_START, return 1 if the register set in SET
3611 contained in insn INSN is used by any insn that precedes INSN in
3612 cyclic order starting from the loop entry point.
3613
3614 We don't want to use INSN_LUID here because if we restrict INSN to those
3615 that have a valid INSN_LUID, it means we cannot move an invariant out
3616 from an inner loop past two loops. */
3617
3618 static int
3619 loop_reg_used_before_p (const struct loop *loop, rtx set, rtx insn)
3620 {
3621 rtx reg = SET_DEST (set);
3622 rtx p;
3623
3624 /* Scan forward checking for register usage. If we hit INSN, we
3625 are done. Otherwise, if we hit LOOP->END, wrap around to LOOP->START. */
3626 for (p = loop->scan_start; p != insn; p = NEXT_INSN (p))
3627 {
3628 if (INSN_P (p) && reg_overlap_mentioned_p (reg, PATTERN (p)))
3629 return 1;
3630
3631 if (p == loop->end)
3632 p = loop->start;
3633 }
3634
3635 return 0;
3636 }
3637 \f
3638
3639 /* Information we collect about arrays that we might want to prefetch. */
3640 struct prefetch_info
3641 {
3642 struct iv_class *class; /* Class this prefetch is based on. */
3643 struct induction *giv; /* GIV this prefetch is based on. */
3644 rtx base_address; /* Start prefetching from this address plus
3645 index. */
3646 HOST_WIDE_INT index;
3647 HOST_WIDE_INT stride; /* Prefetch stride in bytes in each
3648 iteration. */
3649 unsigned int bytes_accessed; /* Sum of sizes of all accesses to this
3650 prefetch area in one iteration. */
3651 unsigned int total_bytes; /* Total bytes loop will access in this block.
3652 This is set only for loops with known
3653 iteration counts and is 0xffffffff
3654 otherwise. */
3655 int prefetch_in_loop; /* Number of prefetch insns in loop. */
3656 int prefetch_before_loop; /* Number of prefetch insns before loop. */
3657 unsigned int write : 1; /* 1 for read/write prefetches. */
3658 };
3659
3660 /* Data used by check_store function. */
3661 struct check_store_data
3662 {
3663 rtx mem_address;
3664 int mem_write;
3665 };
3666
3667 static void check_store (rtx, rtx, void *);
3668 static void emit_prefetch_instructions (struct loop *);
3669 static int rtx_equal_for_prefetch_p (rtx, rtx);
3670
3671 /* Set mem_write when mem_address is found. Used as callback to
3672 note_stores. */
3673 static void
3674 check_store (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
3675 {
3676 struct check_store_data *d = (struct check_store_data *) data;
3677
3678 if ((GET_CODE (x) == MEM) && rtx_equal_p (d->mem_address, XEXP (x, 0)))
3679 d->mem_write = 1;
3680 }
3681 \f
3682 /* Like rtx_equal_p, but attempts to swap commutative operands. This is
3683 important to get some addresses combined. Later more sophisticated
3684 transformations can be added when necessary.
3685
3686 ??? Same trick with swapping operand is done at several other places.
3687 It can be nice to develop some common way to handle this. */
3688
3689 static int
3690 rtx_equal_for_prefetch_p (rtx x, rtx y)
3691 {
3692 int i;
3693 int j;
3694 enum rtx_code code = GET_CODE (x);
3695 const char *fmt;
3696
3697 if (x == y)
3698 return 1;
3699 if (code != GET_CODE (y))
3700 return 0;
3701
3702 code = GET_CODE (x);
3703
3704 if (GET_RTX_CLASS (code) == 'c')
3705 {
3706 return ((rtx_equal_for_prefetch_p (XEXP (x, 0), XEXP (y, 0))
3707 && rtx_equal_for_prefetch_p (XEXP (x, 1), XEXP (y, 1)))
3708 || (rtx_equal_for_prefetch_p (XEXP (x, 0), XEXP (y, 1))
3709 && rtx_equal_for_prefetch_p (XEXP (x, 1), XEXP (y, 0))));
3710 }
3711 /* Compare the elements. If any pair of corresponding elements fails to
3712 match, return 0 for the whole thing. */
3713
3714 fmt = GET_RTX_FORMAT (code);
3715 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3716 {
3717 switch (fmt[i])
3718 {
3719 case 'w':
3720 if (XWINT (x, i) != XWINT (y, i))
3721 return 0;
3722 break;
3723
3724 case 'i':
3725 if (XINT (x, i) != XINT (y, i))
3726 return 0;
3727 break;
3728
3729 case 'E':
3730 /* Two vectors must have the same length. */
3731 if (XVECLEN (x, i) != XVECLEN (y, i))
3732 return 0;
3733
3734 /* And the corresponding elements must match. */
3735 for (j = 0; j < XVECLEN (x, i); j++)
3736 if (rtx_equal_for_prefetch_p (XVECEXP (x, i, j),
3737 XVECEXP (y, i, j)) == 0)
3738 return 0;
3739 break;
3740
3741 case 'e':
3742 if (rtx_equal_for_prefetch_p (XEXP (x, i), XEXP (y, i)) == 0)
3743 return 0;
3744 break;
3745
3746 case 's':
3747 if (strcmp (XSTR (x, i), XSTR (y, i)))
3748 return 0;
3749 break;
3750
3751 case 'u':
3752 /* These are just backpointers, so they don't matter. */
3753 break;
3754
3755 case '0':
3756 break;
3757
3758 /* It is believed that rtx's at this level will never
3759 contain anything but integers and other rtx's,
3760 except for within LABEL_REFs and SYMBOL_REFs. */
3761 default:
3762 abort ();
3763 }
3764 }
3765 return 1;
3766 }
3767 \f
3768 /* Remove constant addition value from the expression X (when present)
3769 and return it. */
3770
3771 static HOST_WIDE_INT
3772 remove_constant_addition (rtx *x)
3773 {
3774 HOST_WIDE_INT addval = 0;
3775 rtx exp = *x;
3776
3777 /* Avoid clobbering a shared CONST expression. */
3778 if (GET_CODE (exp) == CONST)
3779 {
3780 if (GET_CODE (XEXP (exp, 0)) == PLUS
3781 && GET_CODE (XEXP (XEXP (exp, 0), 0)) == SYMBOL_REF
3782 && GET_CODE (XEXP (XEXP (exp, 0), 1)) == CONST_INT)
3783 {
3784 *x = XEXP (XEXP (exp, 0), 0);
3785 return INTVAL (XEXP (XEXP (exp, 0), 1));
3786 }
3787 return 0;
3788 }
3789
3790 if (GET_CODE (exp) == CONST_INT)
3791 {
3792 addval = INTVAL (exp);
3793 *x = const0_rtx;
3794 }
3795
3796 /* For plus expression recurse on ourself. */
3797 else if (GET_CODE (exp) == PLUS)
3798 {
3799 addval += remove_constant_addition (&XEXP (exp, 0));
3800 addval += remove_constant_addition (&XEXP (exp, 1));
3801
3802 /* In case our parameter was constant, remove extra zero from the
3803 expression. */
3804 if (XEXP (exp, 0) == const0_rtx)
3805 *x = XEXP (exp, 1);
3806 else if (XEXP (exp, 1) == const0_rtx)
3807 *x = XEXP (exp, 0);
3808 }
3809
3810 return addval;
3811 }
3812
3813 /* Attempt to identify accesses to arrays that are most likely to cause cache
3814 misses, and emit prefetch instructions a few prefetch blocks forward.
3815
3816 To detect the arrays we use the GIV information that was collected by the
3817 strength reduction pass.
3818
3819 The prefetch instructions are generated after the GIV information is done
3820 and before the strength reduction process. The new GIVs are injected into
3821 the strength reduction tables, so the prefetch addresses are optimized as
3822 well.
3823
3824 GIVs are split into base address, stride, and constant addition values.
3825 GIVs with the same address, stride and close addition values are combined
3826 into a single prefetch. Also writes to GIVs are detected, so that prefetch
3827 for write instructions can be used for the block we write to, on machines
3828 that support write prefetches.
3829
3830 Several heuristics are used to determine when to prefetch. They are
3831 controlled by defined symbols that can be overridden for each target. */
3832
3833 static void
3834 emit_prefetch_instructions (struct loop *loop)
3835 {
3836 int num_prefetches = 0;
3837 int num_real_prefetches = 0;
3838 int num_real_write_prefetches = 0;
3839 int num_prefetches_before = 0;
3840 int num_write_prefetches_before = 0;
3841 int ahead = 0;
3842 int i;
3843 struct iv_class *bl;
3844 struct induction *iv;
3845 struct prefetch_info info[MAX_PREFETCHES];
3846 struct loop_ivs *ivs = LOOP_IVS (loop);
3847
3848 if (!HAVE_prefetch)
3849 return;
3850
3851 /* Consider only loops w/o calls. When a call is done, the loop is probably
3852 slow enough to read the memory. */
3853 if (PREFETCH_NO_CALL && LOOP_INFO (loop)->has_call)
3854 {
3855 if (loop_dump_stream)
3856 fprintf (loop_dump_stream, "Prefetch: ignoring loop: has call.\n");
3857
3858 return;
3859 }
3860
3861 /* Don't prefetch in loops known to have few iterations. */
3862 if (PREFETCH_NO_LOW_LOOPCNT
3863 && LOOP_INFO (loop)->n_iterations
3864 && LOOP_INFO (loop)->n_iterations <= PREFETCH_LOW_LOOPCNT)
3865 {
3866 if (loop_dump_stream)
3867 fprintf (loop_dump_stream,
3868 "Prefetch: ignoring loop: not enough iterations.\n");
3869 return;
3870 }
3871
3872 /* Search all induction variables and pick those interesting for the prefetch
3873 machinery. */
3874 for (bl = ivs->list; bl; bl = bl->next)
3875 {
3876 struct induction *biv = bl->biv, *biv1;
3877 int basestride = 0;
3878
3879 biv1 = biv;
3880
3881 /* Expect all BIVs to be executed in each iteration. This makes our
3882 analysis more conservative. */
3883 while (biv1)
3884 {
3885 /* Discard non-constant additions that we can't handle well yet, and
3886 BIVs that are executed multiple times; such BIVs ought to be
3887 handled in the nested loop. We accept not_every_iteration BIVs,
3888 since these only result in larger strides and make our
3889 heuristics more conservative. */
3890 if (GET_CODE (biv->add_val) != CONST_INT)
3891 {
3892 if (loop_dump_stream)
3893 {
3894 fprintf (loop_dump_stream,
3895 "Prefetch: ignoring biv %d: non-constant addition at insn %d:",
3896 REGNO (biv->src_reg), INSN_UID (biv->insn));
3897 print_rtl (loop_dump_stream, biv->add_val);
3898 fprintf (loop_dump_stream, "\n");
3899 }
3900 break;
3901 }
3902
3903 if (biv->maybe_multiple)
3904 {
3905 if (loop_dump_stream)
3906 {
3907 fprintf (loop_dump_stream,
3908 "Prefetch: ignoring biv %d: maybe_multiple at insn %i:",
3909 REGNO (biv->src_reg), INSN_UID (biv->insn));
3910 print_rtl (loop_dump_stream, biv->add_val);
3911 fprintf (loop_dump_stream, "\n");
3912 }
3913 break;
3914 }
3915
3916 basestride += INTVAL (biv1->add_val);
3917 biv1 = biv1->next_iv;
3918 }
3919
3920 if (biv1 || !basestride)
3921 continue;
3922
3923 for (iv = bl->giv; iv; iv = iv->next_iv)
3924 {
3925 rtx address;
3926 rtx temp;
3927 HOST_WIDE_INT index = 0;
3928 int add = 1;
3929 HOST_WIDE_INT stride = 0;
3930 int stride_sign = 1;
3931 struct check_store_data d;
3932 const char *ignore_reason = NULL;
3933 int size = GET_MODE_SIZE (GET_MODE (iv));
3934
3935 /* See whether an induction variable is interesting to us and if
3936 not, report the reason. */
3937 if (iv->giv_type != DEST_ADDR)
3938 ignore_reason = "giv is not a destination address";
3939
3940 /* We are interested only in constant stride memory references
3941 in order to be able to compute density easily. */
3942 else if (GET_CODE (iv->mult_val) != CONST_INT)
3943 ignore_reason = "stride is not constant";
3944
3945 else
3946 {
3947 stride = INTVAL (iv->mult_val) * basestride;
3948 if (stride < 0)
3949 {
3950 stride = -stride;
3951 stride_sign = -1;
3952 }
3953
3954 /* On some targets, reversed order prefetches are not
3955 worthwhile. */
3956 if (PREFETCH_NO_REVERSE_ORDER && stride_sign < 0)
3957 ignore_reason = "reversed order stride";
3958
3959 /* Prefetch of accesses with an extreme stride might not be
3960 worthwhile, either. */
3961 else if (PREFETCH_NO_EXTREME_STRIDE
3962 && stride > PREFETCH_EXTREME_STRIDE)
3963 ignore_reason = "extreme stride";
3964
3965 /* Ignore GIVs with varying add values; we can't predict the
3966 value for the next iteration. */
3967 else if (!loop_invariant_p (loop, iv->add_val))
3968 ignore_reason = "giv has varying add value";
3969
3970 /* Ignore GIVs in the nested loops; they ought to have been
3971 handled already. */
3972 else if (iv->maybe_multiple)
3973 ignore_reason = "giv is in nested loop";
3974 }
3975
3976 if (ignore_reason != NULL)
3977 {
3978 if (loop_dump_stream)
3979 fprintf (loop_dump_stream,
3980 "Prefetch: ignoring giv at %d: %s.\n",
3981 INSN_UID (iv->insn), ignore_reason);
3982 continue;
3983 }
3984
3985 /* Determine the pointer to the basic array we are examining. It is
3986 the sum of the BIV's initial value and the GIV's add_val. */
3987 address = copy_rtx (iv->add_val);
3988 temp = copy_rtx (bl->initial_value);
3989
3990 address = simplify_gen_binary (PLUS, Pmode, temp, address);
3991 index = remove_constant_addition (&address);
3992
3993 d.mem_write = 0;
3994 d.mem_address = *iv->location;
3995
3996 /* When the GIV is not always executed, we might be better off by
3997 not dirtying the cache pages. */
3998 if (PREFETCH_CONDITIONAL || iv->always_executed)
3999 note_stores (PATTERN (iv->insn), check_store, &d);
4000 else
4001 {
4002 if (loop_dump_stream)
4003 fprintf (loop_dump_stream, "Prefetch: Ignoring giv at %d: %s\n",
4004 INSN_UID (iv->insn), "in conditional code.");
4005 continue;
4006 }
4007
4008 /* Attempt to find another prefetch to the same array and see if we
4009 can merge this one. */
4010 for (i = 0; i < num_prefetches; i++)
4011 if (rtx_equal_for_prefetch_p (address, info[i].base_address)
4012 && stride == info[i].stride)
4013 {
4014 /* In case both access same array (same location
4015 just with small difference in constant indexes), merge
4016 the prefetches. Just do the later and the earlier will
4017 get prefetched from previous iteration.
4018 The artificial threshold should not be too small,
4019 but also not bigger than small portion of memory usually
4020 traversed by single loop. */
4021 if (index >= info[i].index
4022 && index - info[i].index < PREFETCH_EXTREME_DIFFERENCE)
4023 {
4024 info[i].write |= d.mem_write;
4025 info[i].bytes_accessed += size;
4026 info[i].index = index;
4027 info[i].giv = iv;
4028 info[i].class = bl;
4029 info[num_prefetches].base_address = address;
4030 add = 0;
4031 break;
4032 }
4033
4034 if (index < info[i].index
4035 && info[i].index - index < PREFETCH_EXTREME_DIFFERENCE)
4036 {
4037 info[i].write |= d.mem_write;
4038 info[i].bytes_accessed += size;
4039 add = 0;
4040 break;
4041 }
4042 }
4043
4044 /* Merging failed. */
4045 if (add)
4046 {
4047 info[num_prefetches].giv = iv;
4048 info[num_prefetches].class = bl;
4049 info[num_prefetches].index = index;
4050 info[num_prefetches].stride = stride;
4051 info[num_prefetches].base_address = address;
4052 info[num_prefetches].write = d.mem_write;
4053 info[num_prefetches].bytes_accessed = size;
4054 num_prefetches++;
4055 if (num_prefetches >= MAX_PREFETCHES)
4056 {
4057 if (loop_dump_stream)
4058 fprintf (loop_dump_stream,
4059 "Maximal number of prefetches exceeded.\n");
4060 return;
4061 }
4062 }
4063 }
4064 }
4065
4066 for (i = 0; i < num_prefetches; i++)
4067 {
4068 int density;
4069
4070 /* Attempt to calculate the total number of bytes fetched by all
4071 iterations of the loop. Avoid overflow. */
4072 if (LOOP_INFO (loop)->n_iterations
4073 && ((unsigned HOST_WIDE_INT) (0xffffffff / info[i].stride)
4074 >= LOOP_INFO (loop)->n_iterations))
4075 info[i].total_bytes = info[i].stride * LOOP_INFO (loop)->n_iterations;
4076 else
4077 info[i].total_bytes = 0xffffffff;
4078
4079 density = info[i].bytes_accessed * 100 / info[i].stride;
4080
4081 /* Prefetch might be worthwhile only when the loads/stores are dense. */
4082 if (PREFETCH_ONLY_DENSE_MEM)
4083 if (density * 256 > PREFETCH_DENSE_MEM * 100
4084 && (info[i].total_bytes / PREFETCH_BLOCK
4085 >= PREFETCH_BLOCKS_BEFORE_LOOP_MIN))
4086 {
4087 info[i].prefetch_before_loop = 1;
4088 info[i].prefetch_in_loop
4089 = (info[i].total_bytes / PREFETCH_BLOCK
4090 > PREFETCH_BLOCKS_BEFORE_LOOP_MAX);
4091 }
4092 else
4093 {
4094 info[i].prefetch_in_loop = 0, info[i].prefetch_before_loop = 0;
4095 if (loop_dump_stream)
4096 fprintf (loop_dump_stream,
4097 "Prefetch: ignoring giv at %d: %d%% density is too low.\n",
4098 INSN_UID (info[i].giv->insn), density);
4099 }
4100 else
4101 info[i].prefetch_in_loop = 1, info[i].prefetch_before_loop = 1;
4102
4103 /* Find how many prefetch instructions we'll use within the loop. */
4104 if (info[i].prefetch_in_loop != 0)
4105 {
4106 info[i].prefetch_in_loop = ((info[i].stride + PREFETCH_BLOCK - 1)
4107 / PREFETCH_BLOCK);
4108 num_real_prefetches += info[i].prefetch_in_loop;
4109 if (info[i].write)
4110 num_real_write_prefetches += info[i].prefetch_in_loop;
4111 }
4112 }
4113
4114 /* Determine how many iterations ahead to prefetch within the loop, based
4115 on how many prefetches we currently expect to do within the loop. */
4116 if (num_real_prefetches != 0)
4117 {
4118 if ((ahead = SIMULTANEOUS_PREFETCHES / num_real_prefetches) == 0)
4119 {
4120 if (loop_dump_stream)
4121 fprintf (loop_dump_stream,
4122 "Prefetch: ignoring prefetches within loop: ahead is zero; %d < %d\n",
4123 SIMULTANEOUS_PREFETCHES, num_real_prefetches);
4124 num_real_prefetches = 0, num_real_write_prefetches = 0;
4125 }
4126 }
4127 /* We'll also use AHEAD to determine how many prefetch instructions to
4128 emit before a loop, so don't leave it zero. */
4129 if (ahead == 0)
4130 ahead = PREFETCH_BLOCKS_BEFORE_LOOP_MAX;
4131
4132 for (i = 0; i < num_prefetches; i++)
4133 {
4134 /* Update if we've decided not to prefetch anything within the loop. */
4135 if (num_real_prefetches == 0)
4136 info[i].prefetch_in_loop = 0;
4137
4138 /* Find how many prefetch instructions we'll use before the loop. */
4139 if (info[i].prefetch_before_loop != 0)
4140 {
4141 int n = info[i].total_bytes / PREFETCH_BLOCK;
4142 if (n > ahead)
4143 n = ahead;
4144 info[i].prefetch_before_loop = n;
4145 num_prefetches_before += n;
4146 if (info[i].write)
4147 num_write_prefetches_before += n;
4148 }
4149
4150 if (loop_dump_stream)
4151 {
4152 if (info[i].prefetch_in_loop == 0
4153 && info[i].prefetch_before_loop == 0)
4154 continue;
4155 fprintf (loop_dump_stream, "Prefetch insn: %d",
4156 INSN_UID (info[i].giv->insn));
4157 fprintf (loop_dump_stream,
4158 "; in loop: %d; before: %d; %s\n",
4159 info[i].prefetch_in_loop,
4160 info[i].prefetch_before_loop,
4161 info[i].write ? "read/write" : "read only");
4162 fprintf (loop_dump_stream,
4163 " density: %d%%; bytes_accessed: %u; total_bytes: %u\n",
4164 (int) (info[i].bytes_accessed * 100 / info[i].stride),
4165 info[i].bytes_accessed, info[i].total_bytes);
4166 fprintf (loop_dump_stream, " index: " HOST_WIDE_INT_PRINT_DEC
4167 "; stride: " HOST_WIDE_INT_PRINT_DEC "; address: ",
4168 info[i].index, info[i].stride);
4169 print_rtl (loop_dump_stream, info[i].base_address);
4170 fprintf (loop_dump_stream, "\n");
4171 }
4172 }
4173
4174 if (num_real_prefetches + num_prefetches_before > 0)
4175 {
4176 /* Record that this loop uses prefetch instructions. */
4177 LOOP_INFO (loop)->has_prefetch = 1;
4178
4179 if (loop_dump_stream)
4180 {
4181 fprintf (loop_dump_stream, "Real prefetches needed within loop: %d (write: %d)\n",
4182 num_real_prefetches, num_real_write_prefetches);
4183 fprintf (loop_dump_stream, "Real prefetches needed before loop: %d (write: %d)\n",
4184 num_prefetches_before, num_write_prefetches_before);
4185 }
4186 }
4187
4188 for (i = 0; i < num_prefetches; i++)
4189 {
4190 int y;
4191
4192 for (y = 0; y < info[i].prefetch_in_loop; y++)
4193 {
4194 rtx loc = copy_rtx (*info[i].giv->location);
4195 rtx insn;
4196 int bytes_ahead = PREFETCH_BLOCK * (ahead + y);
4197 rtx before_insn = info[i].giv->insn;
4198 rtx prev_insn = PREV_INSN (info[i].giv->insn);
4199 rtx seq;
4200
4201 /* We can save some effort by offsetting the address on
4202 architectures with offsettable memory references. */
4203 if (offsettable_address_p (0, VOIDmode, loc))
4204 loc = plus_constant (loc, bytes_ahead);
4205 else
4206 {
4207 rtx reg = gen_reg_rtx (Pmode);
4208 loop_iv_add_mult_emit_before (loop, loc, const1_rtx,
4209 GEN_INT (bytes_ahead), reg,
4210 0, before_insn);
4211 loc = reg;
4212 }
4213
4214 start_sequence ();
4215 /* Make sure the address operand is valid for prefetch. */
4216 if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
4217 (loc, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
4218 loc = force_reg (Pmode, loc);
4219 emit_insn (gen_prefetch (loc, GEN_INT (info[i].write),
4220 GEN_INT (3)));
4221 seq = get_insns ();
4222 end_sequence ();
4223 emit_insn_before (seq, before_insn);
4224
4225 /* Check all insns emitted and record the new GIV
4226 information. */
4227 insn = NEXT_INSN (prev_insn);
4228 while (insn != before_insn)
4229 {
4230 insn = check_insn_for_givs (loop, insn,
4231 info[i].giv->always_executed,
4232 info[i].giv->maybe_multiple);
4233 insn = NEXT_INSN (insn);
4234 }
4235 }
4236
4237 if (PREFETCH_BEFORE_LOOP)
4238 {
4239 /* Emit insns before the loop to fetch the first cache lines or,
4240 if we're not prefetching within the loop, everything we expect
4241 to need. */
4242 for (y = 0; y < info[i].prefetch_before_loop; y++)
4243 {
4244 rtx reg = gen_reg_rtx (Pmode);
4245 rtx loop_start = loop->start;
4246 rtx init_val = info[i].class->initial_value;
4247 rtx add_val = simplify_gen_binary (PLUS, Pmode,
4248 info[i].giv->add_val,
4249 GEN_INT (y * PREFETCH_BLOCK));
4250
4251 /* Functions called by LOOP_IV_ADD_EMIT_BEFORE expect a
4252 non-constant INIT_VAL to have the same mode as REG, which
4253 in this case we know to be Pmode. */
4254 if (GET_MODE (init_val) != Pmode && !CONSTANT_P (init_val))
4255 {
4256 rtx seq;
4257
4258 start_sequence ();
4259 init_val = convert_to_mode (Pmode, init_val, 0);
4260 seq = get_insns ();
4261 end_sequence ();
4262 loop_insn_emit_before (loop, 0, loop_start, seq);
4263 }
4264 loop_iv_add_mult_emit_before (loop, init_val,
4265 info[i].giv->mult_val,
4266 add_val, reg, 0, loop_start);
4267 emit_insn_before (gen_prefetch (reg, GEN_INT (info[i].write),
4268 GEN_INT (3)),
4269 loop_start);
4270 }
4271 }
4272 }
4273
4274 return;
4275 }
4276 \f
4277 /* Communication with routines called via `note_stores'. */
4278
4279 static rtx note_insn;
4280
4281 /* Dummy register to have nonzero DEST_REG for DEST_ADDR type givs. */
4282
4283 static rtx addr_placeholder;
4284
4285 /* ??? Unfinished optimizations, and possible future optimizations,
4286 for the strength reduction code. */
4287
4288 /* ??? The interaction of biv elimination, and recognition of 'constant'
4289 bivs, may cause problems. */
4290
4291 /* ??? Add heuristics so that DEST_ADDR strength reduction does not cause
4292 performance problems.
4293
4294 Perhaps don't eliminate things that can be combined with an addressing
4295 mode. Find all givs that have the same biv, mult_val, and add_val;
4296 then for each giv, check to see if its only use dies in a following
4297 memory address. If so, generate a new memory address and check to see
4298 if it is valid. If it is valid, then store the modified memory address,
4299 otherwise, mark the giv as not done so that it will get its own iv. */
4300
4301 /* ??? Could try to optimize branches when it is known that a biv is always
4302 positive. */
4303
4304 /* ??? When replace a biv in a compare insn, we should replace with closest
4305 giv so that an optimized branch can still be recognized by the combiner,
4306 e.g. the VAX acb insn. */
4307
4308 /* ??? Many of the checks involving uid_luid could be simplified if regscan
4309 was rerun in loop_optimize whenever a register was added or moved.
4310 Also, some of the optimizations could be a little less conservative. */
4311 \f
4312 /* Scan the loop body and call FNCALL for each insn. In the addition to the
4313 LOOP and INSN parameters pass MAYBE_MULTIPLE and NOT_EVERY_ITERATION to the
4314 callback.
4315
4316 NOT_EVERY_ITERATION is 1 if current insn is not known to be executed at
4317 least once for every loop iteration except for the last one.
4318
4319 MAYBE_MULTIPLE is 1 if current insn may be executed more than once for every
4320 loop iteration.
4321 */
4322 void
4323 for_each_insn_in_loop (struct loop *loop, loop_insn_callback fncall)
4324 {
4325 int not_every_iteration = 0;
4326 int maybe_multiple = 0;
4327 int past_loop_latch = 0;
4328 int loop_depth = 0;
4329 rtx p;
4330
4331 /* If loop_scan_start points to the loop exit test, we have to be wary of
4332 subversive use of gotos inside expression statements. */
4333 if (prev_nonnote_insn (loop->scan_start) != prev_nonnote_insn (loop->start))
4334 maybe_multiple = back_branch_in_range_p (loop, loop->scan_start);
4335
4336 /* Scan through loop and update NOT_EVERY_ITERATION and MAYBE_MULTIPLE. */
4337 for (p = next_insn_in_loop (loop, loop->scan_start);
4338 p != NULL_RTX;
4339 p = next_insn_in_loop (loop, p))
4340 {
4341 p = fncall (loop, p, not_every_iteration, maybe_multiple);
4342
4343 /* Past CODE_LABEL, we get to insns that may be executed multiple
4344 times. The only way we can be sure that they can't is if every
4345 jump insn between here and the end of the loop either
4346 returns, exits the loop, is a jump to a location that is still
4347 behind the label, or is a jump to the loop start. */
4348
4349 if (GET_CODE (p) == CODE_LABEL)
4350 {
4351 rtx insn = p;
4352
4353 maybe_multiple = 0;
4354
4355 while (1)
4356 {
4357 insn = NEXT_INSN (insn);
4358 if (insn == loop->scan_start)
4359 break;
4360 if (insn == loop->end)
4361 {
4362 if (loop->top != 0)
4363 insn = loop->top;
4364 else
4365 break;
4366 if (insn == loop->scan_start)
4367 break;
4368 }
4369
4370 if (GET_CODE (insn) == JUMP_INSN
4371 && GET_CODE (PATTERN (insn)) != RETURN
4372 && (!any_condjump_p (insn)
4373 || (JUMP_LABEL (insn) != 0
4374 && JUMP_LABEL (insn) != loop->scan_start
4375 && !loop_insn_first_p (p, JUMP_LABEL (insn)))))
4376 {
4377 maybe_multiple = 1;
4378 break;
4379 }
4380 }
4381 }
4382
4383 /* Past a jump, we get to insns for which we can't count
4384 on whether they will be executed during each iteration. */
4385 /* This code appears twice in strength_reduce. There is also similar
4386 code in scan_loop. */
4387 if (GET_CODE (p) == JUMP_INSN
4388 /* If we enter the loop in the middle, and scan around to the
4389 beginning, don't set not_every_iteration for that.
4390 This can be any kind of jump, since we want to know if insns
4391 will be executed if the loop is executed. */
4392 && !(JUMP_LABEL (p) == loop->top
4393 && ((NEXT_INSN (NEXT_INSN (p)) == loop->end
4394 && any_uncondjump_p (p))
4395 || (NEXT_INSN (p) == loop->end && any_condjump_p (p)))))
4396 {
4397 rtx label = 0;
4398
4399 /* If this is a jump outside the loop, then it also doesn't
4400 matter. Check to see if the target of this branch is on the
4401 loop->exits_labels list. */
4402
4403 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
4404 if (XEXP (label, 0) == JUMP_LABEL (p))
4405 break;
4406
4407 if (!label)
4408 not_every_iteration = 1;
4409 }
4410
4411 else if (GET_CODE (p) == NOTE)
4412 {
4413 /* At the virtual top of a converted loop, insns are again known to
4414 be executed each iteration: logically, the loop begins here
4415 even though the exit code has been duplicated.
4416
4417 Insns are also again known to be executed each iteration at
4418 the LOOP_CONT note. */
4419 if ((NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_VTOP
4420 || NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_CONT)
4421 && loop_depth == 0)
4422 not_every_iteration = 0;
4423 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
4424 loop_depth++;
4425 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_END)
4426 loop_depth--;
4427 }
4428
4429 /* Note if we pass a loop latch. If we do, then we can not clear
4430 NOT_EVERY_ITERATION below when we pass the last CODE_LABEL in
4431 a loop since a jump before the last CODE_LABEL may have started
4432 a new loop iteration.
4433
4434 Note that LOOP_TOP is only set for rotated loops and we need
4435 this check for all loops, so compare against the CODE_LABEL
4436 which immediately follows LOOP_START. */
4437 if (GET_CODE (p) == JUMP_INSN
4438 && JUMP_LABEL (p) == NEXT_INSN (loop->start))
4439 past_loop_latch = 1;
4440
4441 /* Unlike in the code motion pass where MAYBE_NEVER indicates that
4442 an insn may never be executed, NOT_EVERY_ITERATION indicates whether
4443 or not an insn is known to be executed each iteration of the
4444 loop, whether or not any iterations are known to occur.
4445
4446 Therefore, if we have just passed a label and have no more labels
4447 between here and the test insn of the loop, and we have not passed
4448 a jump to the top of the loop, then we know these insns will be
4449 executed each iteration. */
4450
4451 if (not_every_iteration
4452 && !past_loop_latch
4453 && GET_CODE (p) == CODE_LABEL
4454 && no_labels_between_p (p, loop->end)
4455 && loop_insn_first_p (p, loop->cont))
4456 not_every_iteration = 0;
4457 }
4458 }
4459 \f
4460 static void
4461 loop_bivs_find (struct loop *loop)
4462 {
4463 struct loop_regs *regs = LOOP_REGS (loop);
4464 struct loop_ivs *ivs = LOOP_IVS (loop);
4465 /* Temporary list pointers for traversing ivs->list. */
4466 struct iv_class *bl, **backbl;
4467
4468 ivs->list = 0;
4469
4470 for_each_insn_in_loop (loop, check_insn_for_bivs);
4471
4472 /* Scan ivs->list to remove all regs that proved not to be bivs.
4473 Make a sanity check against regs->n_times_set. */
4474 for (backbl = &ivs->list, bl = *backbl; bl; bl = bl->next)
4475 {
4476 if (REG_IV_TYPE (ivs, bl->regno) != BASIC_INDUCT
4477 /* Above happens if register modified by subreg, etc. */
4478 /* Make sure it is not recognized as a basic induction var: */
4479 || regs->array[bl->regno].n_times_set != bl->biv_count
4480 /* If never incremented, it is invariant that we decided not to
4481 move. So leave it alone. */
4482 || ! bl->incremented)
4483 {
4484 if (loop_dump_stream)
4485 fprintf (loop_dump_stream, "Biv %d: discarded, %s\n",
4486 bl->regno,
4487 (REG_IV_TYPE (ivs, bl->regno) != BASIC_INDUCT
4488 ? "not induction variable"
4489 : (! bl->incremented ? "never incremented"
4490 : "count error")));
4491
4492 REG_IV_TYPE (ivs, bl->regno) = NOT_BASIC_INDUCT;
4493 *backbl = bl->next;
4494 }
4495 else
4496 {
4497 backbl = &bl->next;
4498
4499 if (loop_dump_stream)
4500 fprintf (loop_dump_stream, "Biv %d: verified\n", bl->regno);
4501 }
4502 }
4503 }
4504
4505
4506 /* Determine how BIVS are initialized by looking through pre-header
4507 extended basic block. */
4508 static void
4509 loop_bivs_init_find (struct loop *loop)
4510 {
4511 struct loop_ivs *ivs = LOOP_IVS (loop);
4512 /* Temporary list pointers for traversing ivs->list. */
4513 struct iv_class *bl;
4514 int call_seen;
4515 rtx p;
4516
4517 /* Find initial value for each biv by searching backwards from loop_start,
4518 halting at first label. Also record any test condition. */
4519
4520 call_seen = 0;
4521 for (p = loop->start; p && GET_CODE (p) != CODE_LABEL; p = PREV_INSN (p))
4522 {
4523 rtx test;
4524
4525 note_insn = p;
4526
4527 if (GET_CODE (p) == CALL_INSN)
4528 call_seen = 1;
4529
4530 if (INSN_P (p))
4531 note_stores (PATTERN (p), record_initial, ivs);
4532
4533 /* Record any test of a biv that branches around the loop if no store
4534 between it and the start of loop. We only care about tests with
4535 constants and registers and only certain of those. */
4536 if (GET_CODE (p) == JUMP_INSN
4537 && JUMP_LABEL (p) != 0
4538 && next_real_insn (JUMP_LABEL (p)) == next_real_insn (loop->end)
4539 && (test = get_condition_for_loop (loop, p)) != 0
4540 && GET_CODE (XEXP (test, 0)) == REG
4541 && REGNO (XEXP (test, 0)) < max_reg_before_loop
4542 && (bl = REG_IV_CLASS (ivs, REGNO (XEXP (test, 0)))) != 0
4543 && valid_initial_value_p (XEXP (test, 1), p, call_seen, loop->start)
4544 && bl->init_insn == 0)
4545 {
4546 /* If an NE test, we have an initial value! */
4547 if (GET_CODE (test) == NE)
4548 {
4549 bl->init_insn = p;
4550 bl->init_set = gen_rtx_SET (VOIDmode,
4551 XEXP (test, 0), XEXP (test, 1));
4552 }
4553 else
4554 bl->initial_test = test;
4555 }
4556 }
4557 }
4558
4559
4560 /* Look at the each biv and see if we can say anything better about its
4561 initial value from any initializing insns set up above. (This is done
4562 in two passes to avoid missing SETs in a PARALLEL.) */
4563 static void
4564 loop_bivs_check (struct loop *loop)
4565 {
4566 struct loop_ivs *ivs = LOOP_IVS (loop);
4567 /* Temporary list pointers for traversing ivs->list. */
4568 struct iv_class *bl;
4569 struct iv_class **backbl;
4570
4571 for (backbl = &ivs->list; (bl = *backbl); backbl = &bl->next)
4572 {
4573 rtx src;
4574 rtx note;
4575
4576 if (! bl->init_insn)
4577 continue;
4578
4579 /* IF INIT_INSN has a REG_EQUAL or REG_EQUIV note and the value
4580 is a constant, use the value of that. */
4581 if (((note = find_reg_note (bl->init_insn, REG_EQUAL, 0)) != NULL
4582 && CONSTANT_P (XEXP (note, 0)))
4583 || ((note = find_reg_note (bl->init_insn, REG_EQUIV, 0)) != NULL
4584 && CONSTANT_P (XEXP (note, 0))))
4585 src = XEXP (note, 0);
4586 else
4587 src = SET_SRC (bl->init_set);
4588
4589 if (loop_dump_stream)
4590 fprintf (loop_dump_stream,
4591 "Biv %d: initialized at insn %d: initial value ",
4592 bl->regno, INSN_UID (bl->init_insn));
4593
4594 if ((GET_MODE (src) == GET_MODE (regno_reg_rtx[bl->regno])
4595 || GET_MODE (src) == VOIDmode)
4596 && valid_initial_value_p (src, bl->init_insn,
4597 LOOP_INFO (loop)->pre_header_has_call,
4598 loop->start))
4599 {
4600 bl->initial_value = src;
4601
4602 if (loop_dump_stream)
4603 {
4604 print_simple_rtl (loop_dump_stream, src);
4605 fputc ('\n', loop_dump_stream);
4606 }
4607 }
4608 /* If we can't make it a giv,
4609 let biv keep initial value of "itself". */
4610 else if (loop_dump_stream)
4611 fprintf (loop_dump_stream, "is complex\n");
4612 }
4613 }
4614
4615
4616 /* Search the loop for general induction variables. */
4617
4618 static void
4619 loop_givs_find (struct loop* loop)
4620 {
4621 for_each_insn_in_loop (loop, check_insn_for_givs);
4622 }
4623
4624
4625 /* For each giv for which we still don't know whether or not it is
4626 replaceable, check to see if it is replaceable because its final value
4627 can be calculated. */
4628
4629 static void
4630 loop_givs_check (struct loop *loop)
4631 {
4632 struct loop_ivs *ivs = LOOP_IVS (loop);
4633 struct iv_class *bl;
4634
4635 for (bl = ivs->list; bl; bl = bl->next)
4636 {
4637 struct induction *v;
4638
4639 for (v = bl->giv; v; v = v->next_iv)
4640 if (! v->replaceable && ! v->not_replaceable)
4641 check_final_value (loop, v);
4642 }
4643 }
4644
4645
4646 /* Return nonzero if it is possible to eliminate the biv BL provided
4647 all givs are reduced. This is possible if either the reg is not
4648 used outside the loop, or we can compute what its final value will
4649 be. */
4650
4651 static int
4652 loop_biv_eliminable_p (struct loop *loop, struct iv_class *bl,
4653 int threshold, int insn_count)
4654 {
4655 /* For architectures with a decrement_and_branch_until_zero insn,
4656 don't do this if we put a REG_NONNEG note on the endtest for this
4657 biv. */
4658
4659 #ifdef HAVE_decrement_and_branch_until_zero
4660 if (bl->nonneg)
4661 {
4662 if (loop_dump_stream)
4663 fprintf (loop_dump_stream,
4664 "Cannot eliminate nonneg biv %d.\n", bl->regno);
4665 return 0;
4666 }
4667 #endif
4668
4669 /* Check that biv is used outside loop or if it has a final value.
4670 Compare against bl->init_insn rather than loop->start. We aren't
4671 concerned with any uses of the biv between init_insn and
4672 loop->start since these won't be affected by the value of the biv
4673 elsewhere in the function, so long as init_insn doesn't use the
4674 biv itself. */
4675
4676 if ((REGNO_LAST_LUID (bl->regno) < INSN_LUID (loop->end)
4677 && bl->init_insn
4678 && INSN_UID (bl->init_insn) < max_uid_for_loop
4679 && REGNO_FIRST_LUID (bl->regno) >= INSN_LUID (bl->init_insn)
4680 && ! reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set)))
4681 || (bl->final_value = final_biv_value (loop, bl)))
4682 return maybe_eliminate_biv (loop, bl, 0, threshold, insn_count);
4683
4684 if (loop_dump_stream)
4685 {
4686 fprintf (loop_dump_stream,
4687 "Cannot eliminate biv %d.\n",
4688 bl->regno);
4689 fprintf (loop_dump_stream,
4690 "First use: insn %d, last use: insn %d.\n",
4691 REGNO_FIRST_UID (bl->regno),
4692 REGNO_LAST_UID (bl->regno));
4693 }
4694 return 0;
4695 }
4696
4697
4698 /* Reduce each giv of BL that we have decided to reduce. */
4699
4700 static void
4701 loop_givs_reduce (struct loop *loop, struct iv_class *bl)
4702 {
4703 struct induction *v;
4704
4705 for (v = bl->giv; v; v = v->next_iv)
4706 {
4707 struct induction *tv;
4708 if (! v->ignore && v->same == 0)
4709 {
4710 int auto_inc_opt = 0;
4711
4712 /* If the code for derived givs immediately below has already
4713 allocated a new_reg, we must keep it. */
4714 if (! v->new_reg)
4715 v->new_reg = gen_reg_rtx (v->mode);
4716
4717 #ifdef AUTO_INC_DEC
4718 /* If the target has auto-increment addressing modes, and
4719 this is an address giv, then try to put the increment
4720 immediately after its use, so that flow can create an
4721 auto-increment addressing mode. */
4722 if (v->giv_type == DEST_ADDR && bl->biv_count == 1
4723 && bl->biv->always_executed && ! bl->biv->maybe_multiple
4724 /* We don't handle reversed biv's because bl->biv->insn
4725 does not have a valid INSN_LUID. */
4726 && ! bl->reversed
4727 && v->always_executed && ! v->maybe_multiple
4728 && INSN_UID (v->insn) < max_uid_for_loop)
4729 {
4730 /* If other giv's have been combined with this one, then
4731 this will work only if all uses of the other giv's occur
4732 before this giv's insn. This is difficult to check.
4733
4734 We simplify this by looking for the common case where
4735 there is one DEST_REG giv, and this giv's insn is the
4736 last use of the dest_reg of that DEST_REG giv. If the
4737 increment occurs after the address giv, then we can
4738 perform the optimization. (Otherwise, the increment
4739 would have to go before other_giv, and we would not be
4740 able to combine it with the address giv to get an
4741 auto-inc address.) */
4742 if (v->combined_with)
4743 {
4744 struct induction *other_giv = 0;
4745
4746 for (tv = bl->giv; tv; tv = tv->next_iv)
4747 if (tv->same == v)
4748 {
4749 if (other_giv)
4750 break;
4751 else
4752 other_giv = tv;
4753 }
4754 if (! tv && other_giv
4755 && REGNO (other_giv->dest_reg) < max_reg_before_loop
4756 && (REGNO_LAST_UID (REGNO (other_giv->dest_reg))
4757 == INSN_UID (v->insn))
4758 && INSN_LUID (v->insn) < INSN_LUID (bl->biv->insn))
4759 auto_inc_opt = 1;
4760 }
4761 /* Check for case where increment is before the address
4762 giv. Do this test in "loop order". */
4763 else if ((INSN_LUID (v->insn) > INSN_LUID (bl->biv->insn)
4764 && (INSN_LUID (v->insn) < INSN_LUID (loop->scan_start)
4765 || (INSN_LUID (bl->biv->insn)
4766 > INSN_LUID (loop->scan_start))))
4767 || (INSN_LUID (v->insn) < INSN_LUID (loop->scan_start)
4768 && (INSN_LUID (loop->scan_start)
4769 < INSN_LUID (bl->biv->insn))))
4770 auto_inc_opt = -1;
4771 else
4772 auto_inc_opt = 1;
4773
4774 #ifdef HAVE_cc0
4775 {
4776 rtx prev;
4777
4778 /* We can't put an insn immediately after one setting
4779 cc0, or immediately before one using cc0. */
4780 if ((auto_inc_opt == 1 && sets_cc0_p (PATTERN (v->insn)))
4781 || (auto_inc_opt == -1
4782 && (prev = prev_nonnote_insn (v->insn)) != 0
4783 && INSN_P (prev)
4784 && sets_cc0_p (PATTERN (prev))))
4785 auto_inc_opt = 0;
4786 }
4787 #endif
4788
4789 if (auto_inc_opt)
4790 v->auto_inc_opt = 1;
4791 }
4792 #endif
4793
4794 /* For each place where the biv is incremented, add an insn
4795 to increment the new, reduced reg for the giv. */
4796 for (tv = bl->biv; tv; tv = tv->next_iv)
4797 {
4798 rtx insert_before;
4799
4800 /* Skip if location is the same as a previous one. */
4801 if (tv->same)
4802 continue;
4803 if (! auto_inc_opt)
4804 insert_before = NEXT_INSN (tv->insn);
4805 else if (auto_inc_opt == 1)
4806 insert_before = NEXT_INSN (v->insn);
4807 else
4808 insert_before = v->insn;
4809
4810 if (tv->mult_val == const1_rtx)
4811 loop_iv_add_mult_emit_before (loop, tv->add_val, v->mult_val,
4812 v->new_reg, v->new_reg,
4813 0, insert_before);
4814 else /* tv->mult_val == const0_rtx */
4815 /* A multiply is acceptable here
4816 since this is presumed to be seldom executed. */
4817 loop_iv_add_mult_emit_before (loop, tv->add_val, v->mult_val,
4818 v->add_val, v->new_reg,
4819 0, insert_before);
4820 }
4821
4822 /* Add code at loop start to initialize giv's reduced reg. */
4823
4824 loop_iv_add_mult_hoist (loop,
4825 extend_value_for_giv (v, bl->initial_value),
4826 v->mult_val, v->add_val, v->new_reg);
4827 }
4828 }
4829 }
4830
4831
4832 /* Check for givs whose first use is their definition and whose
4833 last use is the definition of another giv. If so, it is likely
4834 dead and should not be used to derive another giv nor to
4835 eliminate a biv. */
4836
4837 static void
4838 loop_givs_dead_check (struct loop *loop ATTRIBUTE_UNUSED, struct iv_class *bl)
4839 {
4840 struct induction *v;
4841
4842 for (v = bl->giv; v; v = v->next_iv)
4843 {
4844 if (v->ignore
4845 || (v->same && v->same->ignore))
4846 continue;
4847
4848 if (v->giv_type == DEST_REG
4849 && REGNO_FIRST_UID (REGNO (v->dest_reg)) == INSN_UID (v->insn))
4850 {
4851 struct induction *v1;
4852
4853 for (v1 = bl->giv; v1; v1 = v1->next_iv)
4854 if (REGNO_LAST_UID (REGNO (v->dest_reg)) == INSN_UID (v1->insn))
4855 v->maybe_dead = 1;
4856 }
4857 }
4858 }
4859
4860
4861 static void
4862 loop_givs_rescan (struct loop *loop, struct iv_class *bl, rtx *reg_map)
4863 {
4864 struct induction *v;
4865
4866 for (v = bl->giv; v; v = v->next_iv)
4867 {
4868 if (v->same && v->same->ignore)
4869 v->ignore = 1;
4870
4871 if (v->ignore)
4872 continue;
4873
4874 /* Update expression if this was combined, in case other giv was
4875 replaced. */
4876 if (v->same)
4877 v->new_reg = replace_rtx (v->new_reg,
4878 v->same->dest_reg, v->same->new_reg);
4879
4880 /* See if this register is known to be a pointer to something. If
4881 so, see if we can find the alignment. First see if there is a
4882 destination register that is a pointer. If so, this shares the
4883 alignment too. Next see if we can deduce anything from the
4884 computational information. If not, and this is a DEST_ADDR
4885 giv, at least we know that it's a pointer, though we don't know
4886 the alignment. */
4887 if (GET_CODE (v->new_reg) == REG
4888 && v->giv_type == DEST_REG
4889 && REG_POINTER (v->dest_reg))
4890 mark_reg_pointer (v->new_reg,
4891 REGNO_POINTER_ALIGN (REGNO (v->dest_reg)));
4892 else if (GET_CODE (v->new_reg) == REG
4893 && REG_POINTER (v->src_reg))
4894 {
4895 unsigned int align = REGNO_POINTER_ALIGN (REGNO (v->src_reg));
4896
4897 if (align == 0
4898 || GET_CODE (v->add_val) != CONST_INT
4899 || INTVAL (v->add_val) % (align / BITS_PER_UNIT) != 0)
4900 align = 0;
4901
4902 mark_reg_pointer (v->new_reg, align);
4903 }
4904 else if (GET_CODE (v->new_reg) == REG
4905 && GET_CODE (v->add_val) == REG
4906 && REG_POINTER (v->add_val))
4907 {
4908 unsigned int align = REGNO_POINTER_ALIGN (REGNO (v->add_val));
4909
4910 if (align == 0 || GET_CODE (v->mult_val) != CONST_INT
4911 || INTVAL (v->mult_val) % (align / BITS_PER_UNIT) != 0)
4912 align = 0;
4913
4914 mark_reg_pointer (v->new_reg, align);
4915 }
4916 else if (GET_CODE (v->new_reg) == REG && v->giv_type == DEST_ADDR)
4917 mark_reg_pointer (v->new_reg, 0);
4918
4919 if (v->giv_type == DEST_ADDR)
4920 /* Store reduced reg as the address in the memref where we found
4921 this giv. */
4922 validate_change (v->insn, v->location, v->new_reg, 0);
4923 else if (v->replaceable)
4924 {
4925 reg_map[REGNO (v->dest_reg)] = v->new_reg;
4926 }
4927 else
4928 {
4929 rtx original_insn = v->insn;
4930 rtx note;
4931
4932 /* Not replaceable; emit an insn to set the original giv reg from
4933 the reduced giv, same as above. */
4934 v->insn = loop_insn_emit_after (loop, 0, original_insn,
4935 gen_move_insn (v->dest_reg,
4936 v->new_reg));
4937
4938 /* The original insn may have a REG_EQUAL note. This note is
4939 now incorrect and may result in invalid substitutions later.
4940 The original insn is dead, but may be part of a libcall
4941 sequence, which doesn't seem worth the bother of handling. */
4942 note = find_reg_note (original_insn, REG_EQUAL, NULL_RTX);
4943 if (note)
4944 remove_note (original_insn, note);
4945 }
4946
4947 /* When a loop is reversed, givs which depend on the reversed
4948 biv, and which are live outside the loop, must be set to their
4949 correct final value. This insn is only needed if the giv is
4950 not replaceable. The correct final value is the same as the
4951 value that the giv starts the reversed loop with. */
4952 if (bl->reversed && ! v->replaceable)
4953 loop_iv_add_mult_sink (loop,
4954 extend_value_for_giv (v, bl->initial_value),
4955 v->mult_val, v->add_val, v->dest_reg);
4956 else if (v->final_value)
4957 loop_insn_sink_or_swim (loop,
4958 gen_load_of_final_value (v->dest_reg,
4959 v->final_value));
4960
4961 if (loop_dump_stream)
4962 {
4963 fprintf (loop_dump_stream, "giv at %d reduced to ",
4964 INSN_UID (v->insn));
4965 print_simple_rtl (loop_dump_stream, v->new_reg);
4966 fprintf (loop_dump_stream, "\n");
4967 }
4968 }
4969 }
4970
4971
4972 static int
4973 loop_giv_reduce_benefit (struct loop *loop ATTRIBUTE_UNUSED,
4974 struct iv_class *bl, struct induction *v,
4975 rtx test_reg)
4976 {
4977 int add_cost;
4978 int benefit;
4979
4980 benefit = v->benefit;
4981 PUT_MODE (test_reg, v->mode);
4982 add_cost = iv_add_mult_cost (bl->biv->add_val, v->mult_val,
4983 test_reg, test_reg);
4984
4985 /* Reduce benefit if not replaceable, since we will insert a
4986 move-insn to replace the insn that calculates this giv. Don't do
4987 this unless the giv is a user variable, since it will often be
4988 marked non-replaceable because of the duplication of the exit
4989 code outside the loop. In such a case, the copies we insert are
4990 dead and will be deleted. So they don't have a cost. Similar
4991 situations exist. */
4992 /* ??? The new final_[bg]iv_value code does a much better job of
4993 finding replaceable giv's, and hence this code may no longer be
4994 necessary. */
4995 if (! v->replaceable && ! bl->eliminable
4996 && REG_USERVAR_P (v->dest_reg))
4997 benefit -= copy_cost;
4998
4999 /* Decrease the benefit to count the add-insns that we will insert
5000 to increment the reduced reg for the giv. ??? This can
5001 overestimate the run-time cost of the additional insns, e.g. if
5002 there are multiple basic blocks that increment the biv, but only
5003 one of these blocks is executed during each iteration. There is
5004 no good way to detect cases like this with the current structure
5005 of the loop optimizer. This code is more accurate for
5006 determining code size than run-time benefits. */
5007 benefit -= add_cost * bl->biv_count;
5008
5009 /* Decide whether to strength-reduce this giv or to leave the code
5010 unchanged (recompute it from the biv each time it is used). This
5011 decision can be made independently for each giv. */
5012
5013 #ifdef AUTO_INC_DEC
5014 /* Attempt to guess whether autoincrement will handle some of the
5015 new add insns; if so, increase BENEFIT (undo the subtraction of
5016 add_cost that was done above). */
5017 if (v->giv_type == DEST_ADDR
5018 /* Increasing the benefit is risky, since this is only a guess.
5019 Avoid increasing register pressure in cases where there would
5020 be no other benefit from reducing this giv. */
5021 && benefit > 0
5022 && GET_CODE (v->mult_val) == CONST_INT)
5023 {
5024 int size = GET_MODE_SIZE (GET_MODE (v->mem));
5025
5026 if (HAVE_POST_INCREMENT
5027 && INTVAL (v->mult_val) == size)
5028 benefit += add_cost * bl->biv_count;
5029 else if (HAVE_PRE_INCREMENT
5030 && INTVAL (v->mult_val) == size)
5031 benefit += add_cost * bl->biv_count;
5032 else if (HAVE_POST_DECREMENT
5033 && -INTVAL (v->mult_val) == size)
5034 benefit += add_cost * bl->biv_count;
5035 else if (HAVE_PRE_DECREMENT
5036 && -INTVAL (v->mult_val) == size)
5037 benefit += add_cost * bl->biv_count;
5038 }
5039 #endif
5040
5041 return benefit;
5042 }
5043
5044
5045 /* Free IV structures for LOOP. */
5046
5047 static void
5048 loop_ivs_free (struct loop *loop)
5049 {
5050 struct loop_ivs *ivs = LOOP_IVS (loop);
5051 struct iv_class *iv = ivs->list;
5052
5053 free (ivs->regs);
5054
5055 while (iv)
5056 {
5057 struct iv_class *next = iv->next;
5058 struct induction *induction;
5059 struct induction *next_induction;
5060
5061 for (induction = iv->biv; induction; induction = next_induction)
5062 {
5063 next_induction = induction->next_iv;
5064 free (induction);
5065 }
5066 for (induction = iv->giv; induction; induction = next_induction)
5067 {
5068 next_induction = induction->next_iv;
5069 free (induction);
5070 }
5071
5072 free (iv);
5073 iv = next;
5074 }
5075 }
5076
5077
5078 /* Perform strength reduction and induction variable elimination.
5079
5080 Pseudo registers created during this function will be beyond the
5081 last valid index in several tables including
5082 REGS->ARRAY[I].N_TIMES_SET and REGNO_LAST_UID. This does not cause a
5083 problem here, because the added registers cannot be givs outside of
5084 their loop, and hence will never be reconsidered. But scan_loop
5085 must check regnos to make sure they are in bounds. */
5086
5087 static void
5088 strength_reduce (struct loop *loop, int flags)
5089 {
5090 struct loop_info *loop_info = LOOP_INFO (loop);
5091 struct loop_regs *regs = LOOP_REGS (loop);
5092 struct loop_ivs *ivs = LOOP_IVS (loop);
5093 rtx p;
5094 /* Temporary list pointer for traversing ivs->list. */
5095 struct iv_class *bl;
5096 /* Ratio of extra register life span we can justify
5097 for saving an instruction. More if loop doesn't call subroutines
5098 since in that case saving an insn makes more difference
5099 and more registers are available. */
5100 /* ??? could set this to last value of threshold in move_movables */
5101 int threshold = (loop_info->has_call ? 1 : 2) * (3 + n_non_fixed_regs);
5102 /* Map of pseudo-register replacements. */
5103 rtx *reg_map = NULL;
5104 int reg_map_size;
5105 int unrolled_insn_copies = 0;
5106 rtx test_reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
5107 int insn_count = count_insns_in_loop (loop);
5108
5109 addr_placeholder = gen_reg_rtx (Pmode);
5110
5111 ivs->n_regs = max_reg_before_loop;
5112 ivs->regs = xcalloc (ivs->n_regs, sizeof (struct iv));
5113
5114 /* Find all BIVs in loop. */
5115 loop_bivs_find (loop);
5116
5117 /* Exit if there are no bivs. */
5118 if (! ivs->list)
5119 {
5120 /* Can still unroll the loop anyways, but indicate that there is no
5121 strength reduction info available. */
5122 if (flags & LOOP_UNROLL)
5123 unroll_loop (loop, insn_count, 0);
5124
5125 loop_ivs_free (loop);
5126 return;
5127 }
5128
5129 /* Determine how BIVS are initialized by looking through pre-header
5130 extended basic block. */
5131 loop_bivs_init_find (loop);
5132
5133 /* Look at the each biv and see if we can say anything better about its
5134 initial value from any initializing insns set up above. */
5135 loop_bivs_check (loop);
5136
5137 /* Search the loop for general induction variables. */
5138 loop_givs_find (loop);
5139
5140 /* Try to calculate and save the number of loop iterations. This is
5141 set to zero if the actual number can not be calculated. This must
5142 be called after all giv's have been identified, since otherwise it may
5143 fail if the iteration variable is a giv. */
5144 loop_iterations (loop);
5145
5146 #ifdef HAVE_prefetch
5147 if (flags & LOOP_PREFETCH)
5148 emit_prefetch_instructions (loop);
5149 #endif
5150
5151 /* Now for each giv for which we still don't know whether or not it is
5152 replaceable, check to see if it is replaceable because its final value
5153 can be calculated. This must be done after loop_iterations is called,
5154 so that final_giv_value will work correctly. */
5155 loop_givs_check (loop);
5156
5157 /* Try to prove that the loop counter variable (if any) is always
5158 nonnegative; if so, record that fact with a REG_NONNEG note
5159 so that "decrement and branch until zero" insn can be used. */
5160 check_dbra_loop (loop, insn_count);
5161
5162 /* Create reg_map to hold substitutions for replaceable giv regs.
5163 Some givs might have been made from biv increments, so look at
5164 ivs->reg_iv_type for a suitable size. */
5165 reg_map_size = ivs->n_regs;
5166 reg_map = xcalloc (reg_map_size, sizeof (rtx));
5167
5168 /* Examine each iv class for feasibility of strength reduction/induction
5169 variable elimination. */
5170
5171 for (bl = ivs->list; bl; bl = bl->next)
5172 {
5173 struct induction *v;
5174 int benefit;
5175
5176 /* Test whether it will be possible to eliminate this biv
5177 provided all givs are reduced. */
5178 bl->eliminable = loop_biv_eliminable_p (loop, bl, threshold, insn_count);
5179
5180 /* This will be true at the end, if all givs which depend on this
5181 biv have been strength reduced.
5182 We can't (currently) eliminate the biv unless this is so. */
5183 bl->all_reduced = 1;
5184
5185 /* Check each extension dependent giv in this class to see if its
5186 root biv is safe from wrapping in the interior mode. */
5187 check_ext_dependent_givs (loop, bl);
5188
5189 /* Combine all giv's for this iv_class. */
5190 combine_givs (regs, bl);
5191
5192 for (v = bl->giv; v; v = v->next_iv)
5193 {
5194 struct induction *tv;
5195
5196 if (v->ignore || v->same)
5197 continue;
5198
5199 benefit = loop_giv_reduce_benefit (loop, bl, v, test_reg);
5200
5201 /* If an insn is not to be strength reduced, then set its ignore
5202 flag, and clear bl->all_reduced. */
5203
5204 /* A giv that depends on a reversed biv must be reduced if it is
5205 used after the loop exit, otherwise, it would have the wrong
5206 value after the loop exit. To make it simple, just reduce all
5207 of such giv's whether or not we know they are used after the loop
5208 exit. */
5209
5210 if (! flag_reduce_all_givs
5211 && v->lifetime * threshold * benefit < insn_count
5212 && ! bl->reversed)
5213 {
5214 if (loop_dump_stream)
5215 fprintf (loop_dump_stream,
5216 "giv of insn %d not worth while, %d vs %d.\n",
5217 INSN_UID (v->insn),
5218 v->lifetime * threshold * benefit, insn_count);
5219 v->ignore = 1;
5220 bl->all_reduced = 0;
5221 }
5222 else
5223 {
5224 /* Check that we can increment the reduced giv without a
5225 multiply insn. If not, reject it. */
5226
5227 for (tv = bl->biv; tv; tv = tv->next_iv)
5228 if (tv->mult_val == const1_rtx
5229 && ! product_cheap_p (tv->add_val, v->mult_val))
5230 {
5231 if (loop_dump_stream)
5232 fprintf (loop_dump_stream,
5233 "giv of insn %d: would need a multiply.\n",
5234 INSN_UID (v->insn));
5235 v->ignore = 1;
5236 bl->all_reduced = 0;
5237 break;
5238 }
5239 }
5240 }
5241
5242 /* Check for givs whose first use is their definition and whose
5243 last use is the definition of another giv. If so, it is likely
5244 dead and should not be used to derive another giv nor to
5245 eliminate a biv. */
5246 loop_givs_dead_check (loop, bl);
5247
5248 /* Reduce each giv that we decided to reduce. */
5249 loop_givs_reduce (loop, bl);
5250
5251 /* Rescan all givs. If a giv is the same as a giv not reduced, mark it
5252 as not reduced.
5253
5254 For each giv register that can be reduced now: if replaceable,
5255 substitute reduced reg wherever the old giv occurs;
5256 else add new move insn "giv_reg = reduced_reg". */
5257 loop_givs_rescan (loop, bl, reg_map);
5258
5259 /* All the givs based on the biv bl have been reduced if they
5260 merit it. */
5261
5262 /* For each giv not marked as maybe dead that has been combined with a
5263 second giv, clear any "maybe dead" mark on that second giv.
5264 v->new_reg will either be or refer to the register of the giv it
5265 combined with.
5266
5267 Doing this clearing avoids problems in biv elimination where
5268 a giv's new_reg is a complex value that can't be put in the
5269 insn but the giv combined with (with a reg as new_reg) is
5270 marked maybe_dead. Since the register will be used in either
5271 case, we'd prefer it be used from the simpler giv. */
5272
5273 for (v = bl->giv; v; v = v->next_iv)
5274 if (! v->maybe_dead && v->same)
5275 v->same->maybe_dead = 0;
5276
5277 /* Try to eliminate the biv, if it is a candidate.
5278 This won't work if ! bl->all_reduced,
5279 since the givs we planned to use might not have been reduced.
5280
5281 We have to be careful that we didn't initially think we could
5282 eliminate this biv because of a giv that we now think may be
5283 dead and shouldn't be used as a biv replacement.
5284
5285 Also, there is the possibility that we may have a giv that looks
5286 like it can be used to eliminate a biv, but the resulting insn
5287 isn't valid. This can happen, for example, on the 88k, where a
5288 JUMP_INSN can compare a register only with zero. Attempts to
5289 replace it with a compare with a constant will fail.
5290
5291 Note that in cases where this call fails, we may have replaced some
5292 of the occurrences of the biv with a giv, but no harm was done in
5293 doing so in the rare cases where it can occur. */
5294
5295 if (bl->all_reduced == 1 && bl->eliminable
5296 && maybe_eliminate_biv (loop, bl, 1, threshold, insn_count))
5297 {
5298 /* ?? If we created a new test to bypass the loop entirely,
5299 or otherwise drop straight in, based on this test, then
5300 we might want to rewrite it also. This way some later
5301 pass has more hope of removing the initialization of this
5302 biv entirely. */
5303
5304 /* If final_value != 0, then the biv may be used after loop end
5305 and we must emit an insn to set it just in case.
5306
5307 Reversed bivs already have an insn after the loop setting their
5308 value, so we don't need another one. We can't calculate the
5309 proper final value for such a biv here anyways. */
5310 if (bl->final_value && ! bl->reversed)
5311 loop_insn_sink_or_swim (loop,
5312 gen_load_of_final_value (bl->biv->dest_reg,
5313 bl->final_value));
5314
5315 if (loop_dump_stream)
5316 fprintf (loop_dump_stream, "Reg %d: biv eliminated\n",
5317 bl->regno);
5318 }
5319 /* See above note wrt final_value. But since we couldn't eliminate
5320 the biv, we must set the value after the loop instead of before. */
5321 else if (bl->final_value && ! bl->reversed)
5322 loop_insn_sink (loop, gen_load_of_final_value (bl->biv->dest_reg,
5323 bl->final_value));
5324 }
5325
5326 /* Go through all the instructions in the loop, making all the
5327 register substitutions scheduled in REG_MAP. */
5328
5329 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
5330 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
5331 || GET_CODE (p) == CALL_INSN)
5332 {
5333 replace_regs (PATTERN (p), reg_map, reg_map_size, 0);
5334 replace_regs (REG_NOTES (p), reg_map, reg_map_size, 0);
5335 INSN_CODE (p) = -1;
5336 }
5337
5338 if (loop_info->n_iterations > 0)
5339 {
5340 /* When we completely unroll a loop we will likely not need the increment
5341 of the loop BIV and we will not need the conditional branch at the
5342 end of the loop. */
5343 unrolled_insn_copies = insn_count - 2;
5344
5345 #ifdef HAVE_cc0
5346 /* When we completely unroll a loop on a HAVE_cc0 machine we will not
5347 need the comparison before the conditional branch at the end of the
5348 loop. */
5349 unrolled_insn_copies -= 1;
5350 #endif
5351
5352 /* We'll need one copy for each loop iteration. */
5353 unrolled_insn_copies *= loop_info->n_iterations;
5354
5355 /* A little slop to account for the ability to remove initialization
5356 code, better CSE, and other secondary benefits of completely
5357 unrolling some loops. */
5358 unrolled_insn_copies -= 1;
5359
5360 /* Clamp the value. */
5361 if (unrolled_insn_copies < 0)
5362 unrolled_insn_copies = 0;
5363 }
5364
5365 /* Unroll loops from within strength reduction so that we can use the
5366 induction variable information that strength_reduce has already
5367 collected. Always unroll loops that would be as small or smaller
5368 unrolled than when rolled. */
5369 if ((flags & LOOP_UNROLL)
5370 || ((flags & LOOP_AUTO_UNROLL)
5371 && loop_info->n_iterations > 0
5372 && unrolled_insn_copies <= insn_count))
5373 unroll_loop (loop, insn_count, 1);
5374
5375 #ifdef HAVE_doloop_end
5376 if (HAVE_doloop_end && (flags & LOOP_BCT) && flag_branch_on_count_reg)
5377 doloop_optimize (loop);
5378 #endif /* HAVE_doloop_end */
5379
5380 /* In case number of iterations is known, drop branch prediction note
5381 in the branch. Do that only in second loop pass, as loop unrolling
5382 may change the number of iterations performed. */
5383 if (flags & LOOP_BCT)
5384 {
5385 unsigned HOST_WIDE_INT n
5386 = loop_info->n_iterations / loop_info->unroll_number;
5387 if (n > 1)
5388 predict_insn (prev_nonnote_insn (loop->end), PRED_LOOP_ITERATIONS,
5389 REG_BR_PROB_BASE - REG_BR_PROB_BASE / n);
5390 }
5391
5392 if (loop_dump_stream)
5393 fprintf (loop_dump_stream, "\n");
5394
5395 loop_ivs_free (loop);
5396 if (reg_map)
5397 free (reg_map);
5398 }
5399 \f
5400 /*Record all basic induction variables calculated in the insn. */
5401 static rtx
5402 check_insn_for_bivs (struct loop *loop, rtx p, int not_every_iteration,
5403 int maybe_multiple)
5404 {
5405 struct loop_ivs *ivs = LOOP_IVS (loop);
5406 rtx set;
5407 rtx dest_reg;
5408 rtx inc_val;
5409 rtx mult_val;
5410 rtx *location;
5411
5412 if (GET_CODE (p) == INSN
5413 && (set = single_set (p))
5414 && GET_CODE (SET_DEST (set)) == REG)
5415 {
5416 dest_reg = SET_DEST (set);
5417 if (REGNO (dest_reg) < max_reg_before_loop
5418 && REGNO (dest_reg) >= FIRST_PSEUDO_REGISTER
5419 && REG_IV_TYPE (ivs, REGNO (dest_reg)) != NOT_BASIC_INDUCT)
5420 {
5421 if (basic_induction_var (loop, SET_SRC (set),
5422 GET_MODE (SET_SRC (set)),
5423 dest_reg, p, &inc_val, &mult_val,
5424 &location))
5425 {
5426 /* It is a possible basic induction variable.
5427 Create and initialize an induction structure for it. */
5428
5429 struct induction *v = xmalloc (sizeof (struct induction));
5430
5431 record_biv (loop, v, p, dest_reg, inc_val, mult_val, location,
5432 not_every_iteration, maybe_multiple);
5433 REG_IV_TYPE (ivs, REGNO (dest_reg)) = BASIC_INDUCT;
5434 }
5435 else if (REGNO (dest_reg) < ivs->n_regs)
5436 REG_IV_TYPE (ivs, REGNO (dest_reg)) = NOT_BASIC_INDUCT;
5437 }
5438 }
5439 return p;
5440 }
5441 \f
5442 /* Record all givs calculated in the insn.
5443 A register is a giv if: it is only set once, it is a function of a
5444 biv and a constant (or invariant), and it is not a biv. */
5445 static rtx
5446 check_insn_for_givs (struct loop *loop, rtx p, int not_every_iteration,
5447 int maybe_multiple)
5448 {
5449 struct loop_regs *regs = LOOP_REGS (loop);
5450
5451 rtx set;
5452 /* Look for a general induction variable in a register. */
5453 if (GET_CODE (p) == INSN
5454 && (set = single_set (p))
5455 && GET_CODE (SET_DEST (set)) == REG
5456 && ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
5457 {
5458 rtx src_reg;
5459 rtx dest_reg;
5460 rtx add_val;
5461 rtx mult_val;
5462 rtx ext_val;
5463 int benefit;
5464 rtx regnote = 0;
5465 rtx last_consec_insn;
5466
5467 dest_reg = SET_DEST (set);
5468 if (REGNO (dest_reg) < FIRST_PSEUDO_REGISTER)
5469 return p;
5470
5471 if (/* SET_SRC is a giv. */
5472 (general_induction_var (loop, SET_SRC (set), &src_reg, &add_val,
5473 &mult_val, &ext_val, 0, &benefit, VOIDmode)
5474 /* Equivalent expression is a giv. */
5475 || ((regnote = find_reg_note (p, REG_EQUAL, NULL_RTX))
5476 && general_induction_var (loop, XEXP (regnote, 0), &src_reg,
5477 &add_val, &mult_val, &ext_val, 0,
5478 &benefit, VOIDmode)))
5479 /* Don't try to handle any regs made by loop optimization.
5480 We have nothing on them in regno_first_uid, etc. */
5481 && REGNO (dest_reg) < max_reg_before_loop
5482 /* Don't recognize a BASIC_INDUCT_VAR here. */
5483 && dest_reg != src_reg
5484 /* This must be the only place where the register is set. */
5485 && (regs->array[REGNO (dest_reg)].n_times_set == 1
5486 /* or all sets must be consecutive and make a giv. */
5487 || (benefit = consec_sets_giv (loop, benefit, p,
5488 src_reg, dest_reg,
5489 &add_val, &mult_val, &ext_val,
5490 &last_consec_insn))))
5491 {
5492 struct induction *v = xmalloc (sizeof (struct induction));
5493
5494 /* If this is a library call, increase benefit. */
5495 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
5496 benefit += libcall_benefit (p);
5497
5498 /* Skip the consecutive insns, if there are any. */
5499 if (regs->array[REGNO (dest_reg)].n_times_set != 1)
5500 p = last_consec_insn;
5501
5502 record_giv (loop, v, p, src_reg, dest_reg, mult_val, add_val,
5503 ext_val, benefit, DEST_REG, not_every_iteration,
5504 maybe_multiple, (rtx*) 0);
5505
5506 }
5507 }
5508
5509 /* Look for givs which are memory addresses. */
5510 if (GET_CODE (p) == INSN)
5511 find_mem_givs (loop, PATTERN (p), p, not_every_iteration,
5512 maybe_multiple);
5513
5514 /* Update the status of whether giv can derive other givs. This can
5515 change when we pass a label or an insn that updates a biv. */
5516 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
5517 || GET_CODE (p) == CODE_LABEL)
5518 update_giv_derive (loop, p);
5519 return p;
5520 }
5521 \f
5522 /* Return 1 if X is a valid source for an initial value (or as value being
5523 compared against in an initial test).
5524
5525 X must be either a register or constant and must not be clobbered between
5526 the current insn and the start of the loop.
5527
5528 INSN is the insn containing X. */
5529
5530 static int
5531 valid_initial_value_p (rtx x, rtx insn, int call_seen, rtx loop_start)
5532 {
5533 if (CONSTANT_P (x))
5534 return 1;
5535
5536 /* Only consider pseudos we know about initialized in insns whose luids
5537 we know. */
5538 if (GET_CODE (x) != REG
5539 || REGNO (x) >= max_reg_before_loop)
5540 return 0;
5541
5542 /* Don't use call-clobbered registers across a call which clobbers it. On
5543 some machines, don't use any hard registers at all. */
5544 if (REGNO (x) < FIRST_PSEUDO_REGISTER
5545 && (SMALL_REGISTER_CLASSES
5546 || (call_used_regs[REGNO (x)] && call_seen)))
5547 return 0;
5548
5549 /* Don't use registers that have been clobbered before the start of the
5550 loop. */
5551 if (reg_set_between_p (x, insn, loop_start))
5552 return 0;
5553
5554 return 1;
5555 }
5556 \f
5557 /* Scan X for memory refs and check each memory address
5558 as a possible giv. INSN is the insn whose pattern X comes from.
5559 NOT_EVERY_ITERATION is 1 if the insn might not be executed during
5560 every loop iteration. MAYBE_MULTIPLE is 1 if the insn might be executed
5561 more than once in each loop iteration. */
5562
5563 static void
5564 find_mem_givs (const struct loop *loop, rtx x, rtx insn,
5565 int not_every_iteration, int maybe_multiple)
5566 {
5567 int i, j;
5568 enum rtx_code code;
5569 const char *fmt;
5570
5571 if (x == 0)
5572 return;
5573
5574 code = GET_CODE (x);
5575 switch (code)
5576 {
5577 case REG:
5578 case CONST_INT:
5579 case CONST:
5580 case CONST_DOUBLE:
5581 case SYMBOL_REF:
5582 case LABEL_REF:
5583 case PC:
5584 case CC0:
5585 case ADDR_VEC:
5586 case ADDR_DIFF_VEC:
5587 case USE:
5588 case CLOBBER:
5589 return;
5590
5591 case MEM:
5592 {
5593 rtx src_reg;
5594 rtx add_val;
5595 rtx mult_val;
5596 rtx ext_val;
5597 int benefit;
5598
5599 /* This code used to disable creating GIVs with mult_val == 1 and
5600 add_val == 0. However, this leads to lost optimizations when
5601 it comes time to combine a set of related DEST_ADDR GIVs, since
5602 this one would not be seen. */
5603
5604 if (general_induction_var (loop, XEXP (x, 0), &src_reg, &add_val,
5605 &mult_val, &ext_val, 1, &benefit,
5606 GET_MODE (x)))
5607 {
5608 /* Found one; record it. */
5609 struct induction *v = xmalloc (sizeof (struct induction));
5610
5611 record_giv (loop, v, insn, src_reg, addr_placeholder, mult_val,
5612 add_val, ext_val, benefit, DEST_ADDR,
5613 not_every_iteration, maybe_multiple, &XEXP (x, 0));
5614
5615 v->mem = x;
5616 }
5617 }
5618 return;
5619
5620 default:
5621 break;
5622 }
5623
5624 /* Recursively scan the subexpressions for other mem refs. */
5625
5626 fmt = GET_RTX_FORMAT (code);
5627 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5628 if (fmt[i] == 'e')
5629 find_mem_givs (loop, XEXP (x, i), insn, not_every_iteration,
5630 maybe_multiple);
5631 else if (fmt[i] == 'E')
5632 for (j = 0; j < XVECLEN (x, i); j++)
5633 find_mem_givs (loop, XVECEXP (x, i, j), insn, not_every_iteration,
5634 maybe_multiple);
5635 }
5636 \f
5637 /* Fill in the data about one biv update.
5638 V is the `struct induction' in which we record the biv. (It is
5639 allocated by the caller, with alloca.)
5640 INSN is the insn that sets it.
5641 DEST_REG is the biv's reg.
5642
5643 MULT_VAL is const1_rtx if the biv is being incremented here, in which case
5644 INC_VAL is the increment. Otherwise, MULT_VAL is const0_rtx and the biv is
5645 being set to INC_VAL.
5646
5647 NOT_EVERY_ITERATION is nonzero if this biv update is not know to be
5648 executed every iteration; MAYBE_MULTIPLE is nonzero if this biv update
5649 can be executed more than once per iteration. If MAYBE_MULTIPLE
5650 and NOT_EVERY_ITERATION are both zero, we know that the biv update is
5651 executed exactly once per iteration. */
5652
5653 static void
5654 record_biv (struct loop *loop, struct induction *v, rtx insn, rtx dest_reg,
5655 rtx inc_val, rtx mult_val, rtx *location,
5656 int not_every_iteration, int maybe_multiple)
5657 {
5658 struct loop_ivs *ivs = LOOP_IVS (loop);
5659 struct iv_class *bl;
5660
5661 v->insn = insn;
5662 v->src_reg = dest_reg;
5663 v->dest_reg = dest_reg;
5664 v->mult_val = mult_val;
5665 v->add_val = inc_val;
5666 v->ext_dependent = NULL_RTX;
5667 v->location = location;
5668 v->mode = GET_MODE (dest_reg);
5669 v->always_computable = ! not_every_iteration;
5670 v->always_executed = ! not_every_iteration;
5671 v->maybe_multiple = maybe_multiple;
5672 v->same = 0;
5673
5674 /* Add this to the reg's iv_class, creating a class
5675 if this is the first incrementation of the reg. */
5676
5677 bl = REG_IV_CLASS (ivs, REGNO (dest_reg));
5678 if (bl == 0)
5679 {
5680 /* Create and initialize new iv_class. */
5681
5682 bl = xmalloc (sizeof (struct iv_class));
5683
5684 bl->regno = REGNO (dest_reg);
5685 bl->biv = 0;
5686 bl->giv = 0;
5687 bl->biv_count = 0;
5688 bl->giv_count = 0;
5689
5690 /* Set initial value to the reg itself. */
5691 bl->initial_value = dest_reg;
5692 bl->final_value = 0;
5693 /* We haven't seen the initializing insn yet. */
5694 bl->init_insn = 0;
5695 bl->init_set = 0;
5696 bl->initial_test = 0;
5697 bl->incremented = 0;
5698 bl->eliminable = 0;
5699 bl->nonneg = 0;
5700 bl->reversed = 0;
5701 bl->total_benefit = 0;
5702
5703 /* Add this class to ivs->list. */
5704 bl->next = ivs->list;
5705 ivs->list = bl;
5706
5707 /* Put it in the array of biv register classes. */
5708 REG_IV_CLASS (ivs, REGNO (dest_reg)) = bl;
5709 }
5710 else
5711 {
5712 /* Check if location is the same as a previous one. */
5713 struct induction *induction;
5714 for (induction = bl->biv; induction; induction = induction->next_iv)
5715 if (location == induction->location)
5716 {
5717 v->same = induction;
5718 break;
5719 }
5720 }
5721
5722 /* Update IV_CLASS entry for this biv. */
5723 v->next_iv = bl->biv;
5724 bl->biv = v;
5725 bl->biv_count++;
5726 if (mult_val == const1_rtx)
5727 bl->incremented = 1;
5728
5729 if (loop_dump_stream)
5730 loop_biv_dump (v, loop_dump_stream, 0);
5731 }
5732 \f
5733 /* Fill in the data about one giv.
5734 V is the `struct induction' in which we record the giv. (It is
5735 allocated by the caller, with alloca.)
5736 INSN is the insn that sets it.
5737 BENEFIT estimates the savings from deleting this insn.
5738 TYPE is DEST_REG or DEST_ADDR; it says whether the giv is computed
5739 into a register or is used as a memory address.
5740
5741 SRC_REG is the biv reg which the giv is computed from.
5742 DEST_REG is the giv's reg (if the giv is stored in a reg).
5743 MULT_VAL and ADD_VAL are the coefficients used to compute the giv.
5744 LOCATION points to the place where this giv's value appears in INSN. */
5745
5746 static void
5747 record_giv (const struct loop *loop, struct induction *v, rtx insn,
5748 rtx src_reg, rtx dest_reg, rtx mult_val, rtx add_val,
5749 rtx ext_val, int benefit, enum g_types type,
5750 int not_every_iteration, int maybe_multiple, rtx *location)
5751 {
5752 struct loop_ivs *ivs = LOOP_IVS (loop);
5753 struct induction *b;
5754 struct iv_class *bl;
5755 rtx set = single_set (insn);
5756 rtx temp;
5757
5758 /* Attempt to prove constantness of the values. Don't let simplify_rtx
5759 undo the MULT canonicalization that we performed earlier. */
5760 temp = simplify_rtx (add_val);
5761 if (temp
5762 && ! (GET_CODE (add_val) == MULT
5763 && GET_CODE (temp) == ASHIFT))
5764 add_val = temp;
5765
5766 v->insn = insn;
5767 v->src_reg = src_reg;
5768 v->giv_type = type;
5769 v->dest_reg = dest_reg;
5770 v->mult_val = mult_val;
5771 v->add_val = add_val;
5772 v->ext_dependent = ext_val;
5773 v->benefit = benefit;
5774 v->location = location;
5775 v->cant_derive = 0;
5776 v->combined_with = 0;
5777 v->maybe_multiple = maybe_multiple;
5778 v->maybe_dead = 0;
5779 v->derive_adjustment = 0;
5780 v->same = 0;
5781 v->ignore = 0;
5782 v->new_reg = 0;
5783 v->final_value = 0;
5784 v->same_insn = 0;
5785 v->auto_inc_opt = 0;
5786 v->unrolled = 0;
5787 v->shared = 0;
5788
5789 /* The v->always_computable field is used in update_giv_derive, to
5790 determine whether a giv can be used to derive another giv. For a
5791 DEST_REG giv, INSN computes a new value for the giv, so its value
5792 isn't computable if INSN insn't executed every iteration.
5793 However, for a DEST_ADDR giv, INSN merely uses the value of the giv;
5794 it does not compute a new value. Hence the value is always computable
5795 regardless of whether INSN is executed each iteration. */
5796
5797 if (type == DEST_ADDR)
5798 v->always_computable = 1;
5799 else
5800 v->always_computable = ! not_every_iteration;
5801
5802 v->always_executed = ! not_every_iteration;
5803
5804 if (type == DEST_ADDR)
5805 {
5806 v->mode = GET_MODE (*location);
5807 v->lifetime = 1;
5808 }
5809 else /* type == DEST_REG */
5810 {
5811 v->mode = GET_MODE (SET_DEST (set));
5812
5813 v->lifetime = LOOP_REG_LIFETIME (loop, REGNO (dest_reg));
5814
5815 /* If the lifetime is zero, it means that this register is
5816 really a dead store. So mark this as a giv that can be
5817 ignored. This will not prevent the biv from being eliminated. */
5818 if (v->lifetime == 0)
5819 v->ignore = 1;
5820
5821 REG_IV_TYPE (ivs, REGNO (dest_reg)) = GENERAL_INDUCT;
5822 REG_IV_INFO (ivs, REGNO (dest_reg)) = v;
5823 }
5824
5825 /* Add the giv to the class of givs computed from one biv. */
5826
5827 bl = REG_IV_CLASS (ivs, REGNO (src_reg));
5828 if (bl)
5829 {
5830 v->next_iv = bl->giv;
5831 bl->giv = v;
5832 /* Don't count DEST_ADDR. This is supposed to count the number of
5833 insns that calculate givs. */
5834 if (type == DEST_REG)
5835 bl->giv_count++;
5836 bl->total_benefit += benefit;
5837 }
5838 else
5839 /* Fatal error, biv missing for this giv? */
5840 abort ();
5841
5842 if (type == DEST_ADDR)
5843 {
5844 v->replaceable = 1;
5845 v->not_replaceable = 0;
5846 }
5847 else
5848 {
5849 /* The giv can be replaced outright by the reduced register only if all
5850 of the following conditions are true:
5851 - the insn that sets the giv is always executed on any iteration
5852 on which the giv is used at all
5853 (there are two ways to deduce this:
5854 either the insn is executed on every iteration,
5855 or all uses follow that insn in the same basic block),
5856 - the giv is not used outside the loop
5857 - no assignments to the biv occur during the giv's lifetime. */
5858
5859 if (REGNO_FIRST_UID (REGNO (dest_reg)) == INSN_UID (insn)
5860 /* Previous line always fails if INSN was moved by loop opt. */
5861 && REGNO_LAST_LUID (REGNO (dest_reg))
5862 < INSN_LUID (loop->end)
5863 && (! not_every_iteration
5864 || last_use_this_basic_block (dest_reg, insn)))
5865 {
5866 /* Now check that there are no assignments to the biv within the
5867 giv's lifetime. This requires two separate checks. */
5868
5869 /* Check each biv update, and fail if any are between the first
5870 and last use of the giv.
5871
5872 If this loop contains an inner loop that was unrolled, then
5873 the insn modifying the biv may have been emitted by the loop
5874 unrolling code, and hence does not have a valid luid. Just
5875 mark the biv as not replaceable in this case. It is not very
5876 useful as a biv, because it is used in two different loops.
5877 It is very unlikely that we would be able to optimize the giv
5878 using this biv anyways. */
5879
5880 v->replaceable = 1;
5881 v->not_replaceable = 0;
5882 for (b = bl->biv; b; b = b->next_iv)
5883 {
5884 if (INSN_UID (b->insn) >= max_uid_for_loop
5885 || ((INSN_LUID (b->insn)
5886 >= REGNO_FIRST_LUID (REGNO (dest_reg)))
5887 && (INSN_LUID (b->insn)
5888 <= REGNO_LAST_LUID (REGNO (dest_reg)))))
5889 {
5890 v->replaceable = 0;
5891 v->not_replaceable = 1;
5892 break;
5893 }
5894 }
5895
5896 /* If there are any backwards branches that go from after the
5897 biv update to before it, then this giv is not replaceable. */
5898 if (v->replaceable)
5899 for (b = bl->biv; b; b = b->next_iv)
5900 if (back_branch_in_range_p (loop, b->insn))
5901 {
5902 v->replaceable = 0;
5903 v->not_replaceable = 1;
5904 break;
5905 }
5906 }
5907 else
5908 {
5909 /* May still be replaceable, we don't have enough info here to
5910 decide. */
5911 v->replaceable = 0;
5912 v->not_replaceable = 0;
5913 }
5914 }
5915
5916 /* Record whether the add_val contains a const_int, for later use by
5917 combine_givs. */
5918 {
5919 rtx tem = add_val;
5920
5921 v->no_const_addval = 1;
5922 if (tem == const0_rtx)
5923 ;
5924 else if (CONSTANT_P (add_val))
5925 v->no_const_addval = 0;
5926 if (GET_CODE (tem) == PLUS)
5927 {
5928 while (1)
5929 {
5930 if (GET_CODE (XEXP (tem, 0)) == PLUS)
5931 tem = XEXP (tem, 0);
5932 else if (GET_CODE (XEXP (tem, 1)) == PLUS)
5933 tem = XEXP (tem, 1);
5934 else
5935 break;
5936 }
5937 if (CONSTANT_P (XEXP (tem, 1)))
5938 v->no_const_addval = 0;
5939 }
5940 }
5941
5942 if (loop_dump_stream)
5943 loop_giv_dump (v, loop_dump_stream, 0);
5944 }
5945
5946 /* All this does is determine whether a giv can be made replaceable because
5947 its final value can be calculated. This code can not be part of record_giv
5948 above, because final_giv_value requires that the number of loop iterations
5949 be known, and that can not be accurately calculated until after all givs
5950 have been identified. */
5951
5952 static void
5953 check_final_value (const struct loop *loop, struct induction *v)
5954 {
5955 rtx final_value = 0;
5956
5957 /* DEST_ADDR givs will never reach here, because they are always marked
5958 replaceable above in record_giv. */
5959
5960 /* The giv can be replaced outright by the reduced register only if all
5961 of the following conditions are true:
5962 - the insn that sets the giv is always executed on any iteration
5963 on which the giv is used at all
5964 (there are two ways to deduce this:
5965 either the insn is executed on every iteration,
5966 or all uses follow that insn in the same basic block),
5967 - its final value can be calculated (this condition is different
5968 than the one above in record_giv)
5969 - it's not used before the it's set
5970 - no assignments to the biv occur during the giv's lifetime. */
5971
5972 #if 0
5973 /* This is only called now when replaceable is known to be false. */
5974 /* Clear replaceable, so that it won't confuse final_giv_value. */
5975 v->replaceable = 0;
5976 #endif
5977
5978 if ((final_value = final_giv_value (loop, v))
5979 && (v->always_executed
5980 || last_use_this_basic_block (v->dest_reg, v->insn)))
5981 {
5982 int biv_increment_seen = 0, before_giv_insn = 0;
5983 rtx p = v->insn;
5984 rtx last_giv_use;
5985
5986 v->replaceable = 1;
5987 v->not_replaceable = 0;
5988
5989 /* When trying to determine whether or not a biv increment occurs
5990 during the lifetime of the giv, we can ignore uses of the variable
5991 outside the loop because final_value is true. Hence we can not
5992 use regno_last_uid and regno_first_uid as above in record_giv. */
5993
5994 /* Search the loop to determine whether any assignments to the
5995 biv occur during the giv's lifetime. Start with the insn
5996 that sets the giv, and search around the loop until we come
5997 back to that insn again.
5998
5999 Also fail if there is a jump within the giv's lifetime that jumps
6000 to somewhere outside the lifetime but still within the loop. This
6001 catches spaghetti code where the execution order is not linear, and
6002 hence the above test fails. Here we assume that the giv lifetime
6003 does not extend from one iteration of the loop to the next, so as
6004 to make the test easier. Since the lifetime isn't known yet,
6005 this requires two loops. See also record_giv above. */
6006
6007 last_giv_use = v->insn;
6008
6009 while (1)
6010 {
6011 p = NEXT_INSN (p);
6012 if (p == loop->end)
6013 {
6014 before_giv_insn = 1;
6015 p = NEXT_INSN (loop->start);
6016 }
6017 if (p == v->insn)
6018 break;
6019
6020 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
6021 || GET_CODE (p) == CALL_INSN)
6022 {
6023 /* It is possible for the BIV increment to use the GIV if we
6024 have a cycle. Thus we must be sure to check each insn for
6025 both BIV and GIV uses, and we must check for BIV uses
6026 first. */
6027
6028 if (! biv_increment_seen
6029 && reg_set_p (v->src_reg, PATTERN (p)))
6030 biv_increment_seen = 1;
6031
6032 if (reg_mentioned_p (v->dest_reg, PATTERN (p)))
6033 {
6034 if (biv_increment_seen || before_giv_insn)
6035 {
6036 v->replaceable = 0;
6037 v->not_replaceable = 1;
6038 break;
6039 }
6040 last_giv_use = p;
6041 }
6042 }
6043 }
6044
6045 /* Now that the lifetime of the giv is known, check for branches
6046 from within the lifetime to outside the lifetime if it is still
6047 replaceable. */
6048
6049 if (v->replaceable)
6050 {
6051 p = v->insn;
6052 while (1)
6053 {
6054 p = NEXT_INSN (p);
6055 if (p == loop->end)
6056 p = NEXT_INSN (loop->start);
6057 if (p == last_giv_use)
6058 break;
6059
6060 if (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p)
6061 && LABEL_NAME (JUMP_LABEL (p))
6062 && ((loop_insn_first_p (JUMP_LABEL (p), v->insn)
6063 && loop_insn_first_p (loop->start, JUMP_LABEL (p)))
6064 || (loop_insn_first_p (last_giv_use, JUMP_LABEL (p))
6065 && loop_insn_first_p (JUMP_LABEL (p), loop->end))))
6066 {
6067 v->replaceable = 0;
6068 v->not_replaceable = 1;
6069
6070 if (loop_dump_stream)
6071 fprintf (loop_dump_stream,
6072 "Found branch outside giv lifetime.\n");
6073
6074 break;
6075 }
6076 }
6077 }
6078
6079 /* If it is replaceable, then save the final value. */
6080 if (v->replaceable)
6081 v->final_value = final_value;
6082 }
6083
6084 if (loop_dump_stream && v->replaceable)
6085 fprintf (loop_dump_stream, "Insn %d: giv reg %d final_value replaceable\n",
6086 INSN_UID (v->insn), REGNO (v->dest_reg));
6087 }
6088 \f
6089 /* Update the status of whether a giv can derive other givs.
6090
6091 We need to do something special if there is or may be an update to the biv
6092 between the time the giv is defined and the time it is used to derive
6093 another giv.
6094
6095 In addition, a giv that is only conditionally set is not allowed to
6096 derive another giv once a label has been passed.
6097
6098 The cases we look at are when a label or an update to a biv is passed. */
6099
6100 static void
6101 update_giv_derive (const struct loop *loop, rtx p)
6102 {
6103 struct loop_ivs *ivs = LOOP_IVS (loop);
6104 struct iv_class *bl;
6105 struct induction *biv, *giv;
6106 rtx tem;
6107 int dummy;
6108
6109 /* Search all IV classes, then all bivs, and finally all givs.
6110
6111 There are three cases we are concerned with. First we have the situation
6112 of a giv that is only updated conditionally. In that case, it may not
6113 derive any givs after a label is passed.
6114
6115 The second case is when a biv update occurs, or may occur, after the
6116 definition of a giv. For certain biv updates (see below) that are
6117 known to occur between the giv definition and use, we can adjust the
6118 giv definition. For others, or when the biv update is conditional,
6119 we must prevent the giv from deriving any other givs. There are two
6120 sub-cases within this case.
6121
6122 If this is a label, we are concerned with any biv update that is done
6123 conditionally, since it may be done after the giv is defined followed by
6124 a branch here (actually, we need to pass both a jump and a label, but
6125 this extra tracking doesn't seem worth it).
6126
6127 If this is a jump, we are concerned about any biv update that may be
6128 executed multiple times. We are actually only concerned about
6129 backward jumps, but it is probably not worth performing the test
6130 on the jump again here.
6131
6132 If this is a biv update, we must adjust the giv status to show that a
6133 subsequent biv update was performed. If this adjustment cannot be done,
6134 the giv cannot derive further givs. */
6135
6136 for (bl = ivs->list; bl; bl = bl->next)
6137 for (biv = bl->biv; biv; biv = biv->next_iv)
6138 if (GET_CODE (p) == CODE_LABEL || GET_CODE (p) == JUMP_INSN
6139 || biv->insn == p)
6140 {
6141 /* Skip if location is the same as a previous one. */
6142 if (biv->same)
6143 continue;
6144
6145 for (giv = bl->giv; giv; giv = giv->next_iv)
6146 {
6147 /* If cant_derive is already true, there is no point in
6148 checking all of these conditions again. */
6149 if (giv->cant_derive)
6150 continue;
6151
6152 /* If this giv is conditionally set and we have passed a label,
6153 it cannot derive anything. */
6154 if (GET_CODE (p) == CODE_LABEL && ! giv->always_computable)
6155 giv->cant_derive = 1;
6156
6157 /* Skip givs that have mult_val == 0, since
6158 they are really invariants. Also skip those that are
6159 replaceable, since we know their lifetime doesn't contain
6160 any biv update. */
6161 else if (giv->mult_val == const0_rtx || giv->replaceable)
6162 continue;
6163
6164 /* The only way we can allow this giv to derive another
6165 is if this is a biv increment and we can form the product
6166 of biv->add_val and giv->mult_val. In this case, we will
6167 be able to compute a compensation. */
6168 else if (biv->insn == p)
6169 {
6170 rtx ext_val_dummy;
6171
6172 tem = 0;
6173 if (biv->mult_val == const1_rtx)
6174 tem = simplify_giv_expr (loop,
6175 gen_rtx_MULT (giv->mode,
6176 biv->add_val,
6177 giv->mult_val),
6178 &ext_val_dummy, &dummy);
6179
6180 if (tem && giv->derive_adjustment)
6181 tem = simplify_giv_expr
6182 (loop,
6183 gen_rtx_PLUS (giv->mode, tem, giv->derive_adjustment),
6184 &ext_val_dummy, &dummy);
6185
6186 if (tem)
6187 giv->derive_adjustment = tem;
6188 else
6189 giv->cant_derive = 1;
6190 }
6191 else if ((GET_CODE (p) == CODE_LABEL && ! biv->always_computable)
6192 || (GET_CODE (p) == JUMP_INSN && biv->maybe_multiple))
6193 giv->cant_derive = 1;
6194 }
6195 }
6196 }
6197 \f
6198 /* Check whether an insn is an increment legitimate for a basic induction var.
6199 X is the source of insn P, or a part of it.
6200 MODE is the mode in which X should be interpreted.
6201
6202 DEST_REG is the putative biv, also the destination of the insn.
6203 We accept patterns of these forms:
6204 REG = REG + INVARIANT (includes REG = REG - CONSTANT)
6205 REG = INVARIANT + REG
6206
6207 If X is suitable, we return 1, set *MULT_VAL to CONST1_RTX,
6208 store the additive term into *INC_VAL, and store the place where
6209 we found the additive term into *LOCATION.
6210
6211 If X is an assignment of an invariant into DEST_REG, we set
6212 *MULT_VAL to CONST0_RTX, and store the invariant into *INC_VAL.
6213
6214 We also want to detect a BIV when it corresponds to a variable
6215 whose mode was promoted. In that case, an increment
6216 of the variable may be a PLUS that adds a SUBREG of that variable to
6217 an invariant and then sign- or zero-extends the result of the PLUS
6218 into the variable.
6219
6220 Most GIVs in such cases will be in the promoted mode, since that is the
6221 probably the natural computation mode (and almost certainly the mode
6222 used for addresses) on the machine. So we view the pseudo-reg containing
6223 the variable as the BIV, as if it were simply incremented.
6224
6225 Note that treating the entire pseudo as a BIV will result in making
6226 simple increments to any GIVs based on it. However, if the variable
6227 overflows in its declared mode but not its promoted mode, the result will
6228 be incorrect. This is acceptable if the variable is signed, since
6229 overflows in such cases are undefined, but not if it is unsigned, since
6230 those overflows are defined. So we only check for SIGN_EXTEND and
6231 not ZERO_EXTEND.
6232
6233 If we cannot find a biv, we return 0. */
6234
6235 static int
6236 basic_induction_var (const struct loop *loop, rtx x, enum machine_mode mode,
6237 rtx dest_reg, rtx p, rtx *inc_val, rtx *mult_val,
6238 rtx **location)
6239 {
6240 enum rtx_code code;
6241 rtx *argp, arg;
6242 rtx insn, set = 0, last, inc;
6243
6244 code = GET_CODE (x);
6245 *location = NULL;
6246 switch (code)
6247 {
6248 case PLUS:
6249 if (rtx_equal_p (XEXP (x, 0), dest_reg)
6250 || (GET_CODE (XEXP (x, 0)) == SUBREG
6251 && SUBREG_PROMOTED_VAR_P (XEXP (x, 0))
6252 && SUBREG_REG (XEXP (x, 0)) == dest_reg))
6253 {
6254 argp = &XEXP (x, 1);
6255 }
6256 else if (rtx_equal_p (XEXP (x, 1), dest_reg)
6257 || (GET_CODE (XEXP (x, 1)) == SUBREG
6258 && SUBREG_PROMOTED_VAR_P (XEXP (x, 1))
6259 && SUBREG_REG (XEXP (x, 1)) == dest_reg))
6260 {
6261 argp = &XEXP (x, 0);
6262 }
6263 else
6264 return 0;
6265
6266 arg = *argp;
6267 if (loop_invariant_p (loop, arg) != 1)
6268 return 0;
6269
6270 /* convert_modes can emit new instructions, e.g. when arg is a loop
6271 invariant MEM and dest_reg has a different mode.
6272 These instructions would be emitted after the end of the function
6273 and then *inc_val would be an uninitialized pseudo.
6274 Detect this and bail in this case.
6275 Other alternatives to solve this can be introducing a convert_modes
6276 variant which is allowed to fail but not allowed to emit new
6277 instructions, emit these instructions before loop start and let
6278 it be garbage collected if *inc_val is never used or saving the
6279 *inc_val initialization sequence generated here and when *inc_val
6280 is going to be actually used, emit it at some suitable place. */
6281 last = get_last_insn ();
6282 inc = convert_modes (GET_MODE (dest_reg), GET_MODE (x), arg, 0);
6283 if (get_last_insn () != last)
6284 {
6285 delete_insns_since (last);
6286 return 0;
6287 }
6288
6289 *inc_val = inc;
6290 *mult_val = const1_rtx;
6291 *location = argp;
6292 return 1;
6293
6294 case SUBREG:
6295 /* If what's inside the SUBREG is a BIV, then the SUBREG. This will
6296 handle addition of promoted variables.
6297 ??? The comment at the start of this function is wrong: promoted
6298 variable increments don't look like it says they do. */
6299 return basic_induction_var (loop, SUBREG_REG (x),
6300 GET_MODE (SUBREG_REG (x)),
6301 dest_reg, p, inc_val, mult_val, location);
6302
6303 case REG:
6304 /* If this register is assigned in a previous insn, look at its
6305 source, but don't go outside the loop or past a label. */
6306
6307 /* If this sets a register to itself, we would repeat any previous
6308 biv increment if we applied this strategy blindly. */
6309 if (rtx_equal_p (dest_reg, x))
6310 return 0;
6311
6312 insn = p;
6313 while (1)
6314 {
6315 rtx dest;
6316 do
6317 {
6318 insn = PREV_INSN (insn);
6319 }
6320 while (insn && GET_CODE (insn) == NOTE
6321 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG);
6322
6323 if (!insn)
6324 break;
6325 set = single_set (insn);
6326 if (set == 0)
6327 break;
6328 dest = SET_DEST (set);
6329 if (dest == x
6330 || (GET_CODE (dest) == SUBREG
6331 && (GET_MODE_SIZE (GET_MODE (dest)) <= UNITS_PER_WORD)
6332 && (GET_MODE_CLASS (GET_MODE (dest)) == MODE_INT)
6333 && SUBREG_REG (dest) == x))
6334 return basic_induction_var (loop, SET_SRC (set),
6335 (GET_MODE (SET_SRC (set)) == VOIDmode
6336 ? GET_MODE (x)
6337 : GET_MODE (SET_SRC (set))),
6338 dest_reg, insn,
6339 inc_val, mult_val, location);
6340
6341 while (GET_CODE (dest) == SIGN_EXTRACT
6342 || GET_CODE (dest) == ZERO_EXTRACT
6343 || GET_CODE (dest) == SUBREG
6344 || GET_CODE (dest) == STRICT_LOW_PART)
6345 dest = XEXP (dest, 0);
6346 if (dest == x)
6347 break;
6348 }
6349 /* Fall through. */
6350
6351 /* Can accept constant setting of biv only when inside inner most loop.
6352 Otherwise, a biv of an inner loop may be incorrectly recognized
6353 as a biv of the outer loop,
6354 causing code to be moved INTO the inner loop. */
6355 case MEM:
6356 if (loop_invariant_p (loop, x) != 1)
6357 return 0;
6358 case CONST_INT:
6359 case SYMBOL_REF:
6360 case CONST:
6361 /* convert_modes aborts if we try to convert to or from CCmode, so just
6362 exclude that case. It is very unlikely that a condition code value
6363 would be a useful iterator anyways. convert_modes aborts if we try to
6364 convert a float mode to non-float or vice versa too. */
6365 if (loop->level == 1
6366 && GET_MODE_CLASS (mode) == GET_MODE_CLASS (GET_MODE (dest_reg))
6367 && GET_MODE_CLASS (mode) != MODE_CC)
6368 {
6369 /* Possible bug here? Perhaps we don't know the mode of X. */
6370 last = get_last_insn ();
6371 inc = convert_modes (GET_MODE (dest_reg), mode, x, 0);
6372 if (get_last_insn () != last)
6373 {
6374 delete_insns_since (last);
6375 return 0;
6376 }
6377
6378 *inc_val = inc;
6379 *mult_val = const0_rtx;
6380 return 1;
6381 }
6382 else
6383 return 0;
6384
6385 case SIGN_EXTEND:
6386 /* Ignore this BIV if signed arithmetic overflow is defined. */
6387 if (flag_wrapv)
6388 return 0;
6389 return basic_induction_var (loop, XEXP (x, 0), GET_MODE (XEXP (x, 0)),
6390 dest_reg, p, inc_val, mult_val, location);
6391
6392 case ASHIFTRT:
6393 /* Similar, since this can be a sign extension. */
6394 for (insn = PREV_INSN (p);
6395 (insn && GET_CODE (insn) == NOTE
6396 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG);
6397 insn = PREV_INSN (insn))
6398 ;
6399
6400 if (insn)
6401 set = single_set (insn);
6402
6403 if (! rtx_equal_p (dest_reg, XEXP (x, 0))
6404 && set && SET_DEST (set) == XEXP (x, 0)
6405 && GET_CODE (XEXP (x, 1)) == CONST_INT
6406 && INTVAL (XEXP (x, 1)) >= 0
6407 && GET_CODE (SET_SRC (set)) == ASHIFT
6408 && XEXP (x, 1) == XEXP (SET_SRC (set), 1))
6409 return basic_induction_var (loop, XEXP (SET_SRC (set), 0),
6410 GET_MODE (XEXP (x, 0)),
6411 dest_reg, insn, inc_val, mult_val,
6412 location);
6413 return 0;
6414
6415 default:
6416 return 0;
6417 }
6418 }
6419 \f
6420 /* A general induction variable (giv) is any quantity that is a linear
6421 function of a basic induction variable,
6422 i.e. giv = biv * mult_val + add_val.
6423 The coefficients can be any loop invariant quantity.
6424 A giv need not be computed directly from the biv;
6425 it can be computed by way of other givs. */
6426
6427 /* Determine whether X computes a giv.
6428 If it does, return a nonzero value
6429 which is the benefit from eliminating the computation of X;
6430 set *SRC_REG to the register of the biv that it is computed from;
6431 set *ADD_VAL and *MULT_VAL to the coefficients,
6432 such that the value of X is biv * mult + add; */
6433
6434 static int
6435 general_induction_var (const struct loop *loop, rtx x, rtx *src_reg,
6436 rtx *add_val, rtx *mult_val, rtx *ext_val,
6437 int is_addr, int *pbenefit,
6438 enum machine_mode addr_mode)
6439 {
6440 struct loop_ivs *ivs = LOOP_IVS (loop);
6441 rtx orig_x = x;
6442
6443 /* If this is an invariant, forget it, it isn't a giv. */
6444 if (loop_invariant_p (loop, x) == 1)
6445 return 0;
6446
6447 *pbenefit = 0;
6448 *ext_val = NULL_RTX;
6449 x = simplify_giv_expr (loop, x, ext_val, pbenefit);
6450 if (x == 0)
6451 return 0;
6452
6453 switch (GET_CODE (x))
6454 {
6455 case USE:
6456 case CONST_INT:
6457 /* Since this is now an invariant and wasn't before, it must be a giv
6458 with MULT_VAL == 0. It doesn't matter which BIV we associate this
6459 with. */
6460 *src_reg = ivs->list->biv->dest_reg;
6461 *mult_val = const0_rtx;
6462 *add_val = x;
6463 break;
6464
6465 case REG:
6466 /* This is equivalent to a BIV. */
6467 *src_reg = x;
6468 *mult_val = const1_rtx;
6469 *add_val = const0_rtx;
6470 break;
6471
6472 case PLUS:
6473 /* Either (plus (biv) (invar)) or
6474 (plus (mult (biv) (invar_1)) (invar_2)). */
6475 if (GET_CODE (XEXP (x, 0)) == MULT)
6476 {
6477 *src_reg = XEXP (XEXP (x, 0), 0);
6478 *mult_val = XEXP (XEXP (x, 0), 1);
6479 }
6480 else
6481 {
6482 *src_reg = XEXP (x, 0);
6483 *mult_val = const1_rtx;
6484 }
6485 *add_val = XEXP (x, 1);
6486 break;
6487
6488 case MULT:
6489 /* ADD_VAL is zero. */
6490 *src_reg = XEXP (x, 0);
6491 *mult_val = XEXP (x, 1);
6492 *add_val = const0_rtx;
6493 break;
6494
6495 default:
6496 abort ();
6497 }
6498
6499 /* Remove any enclosing USE from ADD_VAL and MULT_VAL (there will be
6500 unless they are CONST_INT). */
6501 if (GET_CODE (*add_val) == USE)
6502 *add_val = XEXP (*add_val, 0);
6503 if (GET_CODE (*mult_val) == USE)
6504 *mult_val = XEXP (*mult_val, 0);
6505
6506 if (is_addr)
6507 *pbenefit += address_cost (orig_x, addr_mode) - reg_address_cost;
6508 else
6509 *pbenefit += rtx_cost (orig_x, SET);
6510
6511 /* Always return true if this is a giv so it will be detected as such,
6512 even if the benefit is zero or negative. This allows elimination
6513 of bivs that might otherwise not be eliminated. */
6514 return 1;
6515 }
6516 \f
6517 /* Given an expression, X, try to form it as a linear function of a biv.
6518 We will canonicalize it to be of the form
6519 (plus (mult (BIV) (invar_1))
6520 (invar_2))
6521 with possible degeneracies.
6522
6523 The invariant expressions must each be of a form that can be used as a
6524 machine operand. We surround then with a USE rtx (a hack, but localized
6525 and certainly unambiguous!) if not a CONST_INT for simplicity in this
6526 routine; it is the caller's responsibility to strip them.
6527
6528 If no such canonicalization is possible (i.e., two biv's are used or an
6529 expression that is neither invariant nor a biv or giv), this routine
6530 returns 0.
6531
6532 For a nonzero return, the result will have a code of CONST_INT, USE,
6533 REG (for a BIV), PLUS, or MULT. No other codes will occur.
6534
6535 *BENEFIT will be incremented by the benefit of any sub-giv encountered. */
6536
6537 static rtx sge_plus (enum machine_mode, rtx, rtx);
6538 static rtx sge_plus_constant (rtx, rtx);
6539
6540 static rtx
6541 simplify_giv_expr (const struct loop *loop, rtx x, rtx *ext_val, int *benefit)
6542 {
6543 struct loop_ivs *ivs = LOOP_IVS (loop);
6544 struct loop_regs *regs = LOOP_REGS (loop);
6545 enum machine_mode mode = GET_MODE (x);
6546 rtx arg0, arg1;
6547 rtx tem;
6548
6549 /* If this is not an integer mode, or if we cannot do arithmetic in this
6550 mode, this can't be a giv. */
6551 if (mode != VOIDmode
6552 && (GET_MODE_CLASS (mode) != MODE_INT
6553 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT))
6554 return NULL_RTX;
6555
6556 switch (GET_CODE (x))
6557 {
6558 case PLUS:
6559 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
6560 arg1 = simplify_giv_expr (loop, XEXP (x, 1), ext_val, benefit);
6561 if (arg0 == 0 || arg1 == 0)
6562 return NULL_RTX;
6563
6564 /* Put constant last, CONST_INT last if both constant. */
6565 if ((GET_CODE (arg0) == USE
6566 || GET_CODE (arg0) == CONST_INT)
6567 && ! ((GET_CODE (arg0) == USE
6568 && GET_CODE (arg1) == USE)
6569 || GET_CODE (arg1) == CONST_INT))
6570 tem = arg0, arg0 = arg1, arg1 = tem;
6571
6572 /* Handle addition of zero, then addition of an invariant. */
6573 if (arg1 == const0_rtx)
6574 return arg0;
6575 else if (GET_CODE (arg1) == CONST_INT || GET_CODE (arg1) == USE)
6576 switch (GET_CODE (arg0))
6577 {
6578 case CONST_INT:
6579 case USE:
6580 /* Adding two invariants must result in an invariant, so enclose
6581 addition operation inside a USE and return it. */
6582 if (GET_CODE (arg0) == USE)
6583 arg0 = XEXP (arg0, 0);
6584 if (GET_CODE (arg1) == USE)
6585 arg1 = XEXP (arg1, 0);
6586
6587 if (GET_CODE (arg0) == CONST_INT)
6588 tem = arg0, arg0 = arg1, arg1 = tem;
6589 if (GET_CODE (arg1) == CONST_INT)
6590 tem = sge_plus_constant (arg0, arg1);
6591 else
6592 tem = sge_plus (mode, arg0, arg1);
6593
6594 if (GET_CODE (tem) != CONST_INT)
6595 tem = gen_rtx_USE (mode, tem);
6596 return tem;
6597
6598 case REG:
6599 case MULT:
6600 /* biv + invar or mult + invar. Return sum. */
6601 return gen_rtx_PLUS (mode, arg0, arg1);
6602
6603 case PLUS:
6604 /* (a + invar_1) + invar_2. Associate. */
6605 return
6606 simplify_giv_expr (loop,
6607 gen_rtx_PLUS (mode,
6608 XEXP (arg0, 0),
6609 gen_rtx_PLUS (mode,
6610 XEXP (arg0, 1),
6611 arg1)),
6612 ext_val, benefit);
6613
6614 default:
6615 abort ();
6616 }
6617
6618 /* Each argument must be either REG, PLUS, or MULT. Convert REG to
6619 MULT to reduce cases. */
6620 if (GET_CODE (arg0) == REG)
6621 arg0 = gen_rtx_MULT (mode, arg0, const1_rtx);
6622 if (GET_CODE (arg1) == REG)
6623 arg1 = gen_rtx_MULT (mode, arg1, const1_rtx);
6624
6625 /* Now have PLUS + PLUS, PLUS + MULT, MULT + PLUS, or MULT + MULT.
6626 Put a MULT first, leaving PLUS + PLUS, MULT + PLUS, or MULT + MULT.
6627 Recurse to associate the second PLUS. */
6628 if (GET_CODE (arg1) == MULT)
6629 tem = arg0, arg0 = arg1, arg1 = tem;
6630
6631 if (GET_CODE (arg1) == PLUS)
6632 return
6633 simplify_giv_expr (loop,
6634 gen_rtx_PLUS (mode,
6635 gen_rtx_PLUS (mode, arg0,
6636 XEXP (arg1, 0)),
6637 XEXP (arg1, 1)),
6638 ext_val, benefit);
6639
6640 /* Now must have MULT + MULT. Distribute if same biv, else not giv. */
6641 if (GET_CODE (arg0) != MULT || GET_CODE (arg1) != MULT)
6642 return NULL_RTX;
6643
6644 if (!rtx_equal_p (arg0, arg1))
6645 return NULL_RTX;
6646
6647 return simplify_giv_expr (loop,
6648 gen_rtx_MULT (mode,
6649 XEXP (arg0, 0),
6650 gen_rtx_PLUS (mode,
6651 XEXP (arg0, 1),
6652 XEXP (arg1, 1))),
6653 ext_val, benefit);
6654
6655 case MINUS:
6656 /* Handle "a - b" as "a + b * (-1)". */
6657 return simplify_giv_expr (loop,
6658 gen_rtx_PLUS (mode,
6659 XEXP (x, 0),
6660 gen_rtx_MULT (mode,
6661 XEXP (x, 1),
6662 constm1_rtx)),
6663 ext_val, benefit);
6664
6665 case MULT:
6666 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
6667 arg1 = simplify_giv_expr (loop, XEXP (x, 1), ext_val, benefit);
6668 if (arg0 == 0 || arg1 == 0)
6669 return NULL_RTX;
6670
6671 /* Put constant last, CONST_INT last if both constant. */
6672 if ((GET_CODE (arg0) == USE || GET_CODE (arg0) == CONST_INT)
6673 && GET_CODE (arg1) != CONST_INT)
6674 tem = arg0, arg0 = arg1, arg1 = tem;
6675
6676 /* If second argument is not now constant, not giv. */
6677 if (GET_CODE (arg1) != USE && GET_CODE (arg1) != CONST_INT)
6678 return NULL_RTX;
6679
6680 /* Handle multiply by 0 or 1. */
6681 if (arg1 == const0_rtx)
6682 return const0_rtx;
6683
6684 else if (arg1 == const1_rtx)
6685 return arg0;
6686
6687 switch (GET_CODE (arg0))
6688 {
6689 case REG:
6690 /* biv * invar. Done. */
6691 return gen_rtx_MULT (mode, arg0, arg1);
6692
6693 case CONST_INT:
6694 /* Product of two constants. */
6695 return GEN_INT (INTVAL (arg0) * INTVAL (arg1));
6696
6697 case USE:
6698 /* invar * invar is a giv, but attempt to simplify it somehow. */
6699 if (GET_CODE (arg1) != CONST_INT)
6700 return NULL_RTX;
6701
6702 arg0 = XEXP (arg0, 0);
6703 if (GET_CODE (arg0) == MULT)
6704 {
6705 /* (invar_0 * invar_1) * invar_2. Associate. */
6706 return simplify_giv_expr (loop,
6707 gen_rtx_MULT (mode,
6708 XEXP (arg0, 0),
6709 gen_rtx_MULT (mode,
6710 XEXP (arg0,
6711 1),
6712 arg1)),
6713 ext_val, benefit);
6714 }
6715 /* Propagate the MULT expressions to the innermost nodes. */
6716 else if (GET_CODE (arg0) == PLUS)
6717 {
6718 /* (invar_0 + invar_1) * invar_2. Distribute. */
6719 return simplify_giv_expr (loop,
6720 gen_rtx_PLUS (mode,
6721 gen_rtx_MULT (mode,
6722 XEXP (arg0,
6723 0),
6724 arg1),
6725 gen_rtx_MULT (mode,
6726 XEXP (arg0,
6727 1),
6728 arg1)),
6729 ext_val, benefit);
6730 }
6731 return gen_rtx_USE (mode, gen_rtx_MULT (mode, arg0, arg1));
6732
6733 case MULT:
6734 /* (a * invar_1) * invar_2. Associate. */
6735 return simplify_giv_expr (loop,
6736 gen_rtx_MULT (mode,
6737 XEXP (arg0, 0),
6738 gen_rtx_MULT (mode,
6739 XEXP (arg0, 1),
6740 arg1)),
6741 ext_val, benefit);
6742
6743 case PLUS:
6744 /* (a + invar_1) * invar_2. Distribute. */
6745 return simplify_giv_expr (loop,
6746 gen_rtx_PLUS (mode,
6747 gen_rtx_MULT (mode,
6748 XEXP (arg0, 0),
6749 arg1),
6750 gen_rtx_MULT (mode,
6751 XEXP (arg0, 1),
6752 arg1)),
6753 ext_val, benefit);
6754
6755 default:
6756 abort ();
6757 }
6758
6759 case ASHIFT:
6760 /* Shift by constant is multiply by power of two. */
6761 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6762 return 0;
6763
6764 return
6765 simplify_giv_expr (loop,
6766 gen_rtx_MULT (mode,
6767 XEXP (x, 0),
6768 GEN_INT ((HOST_WIDE_INT) 1
6769 << INTVAL (XEXP (x, 1)))),
6770 ext_val, benefit);
6771
6772 case NEG:
6773 /* "-a" is "a * (-1)" */
6774 return simplify_giv_expr (loop,
6775 gen_rtx_MULT (mode, XEXP (x, 0), constm1_rtx),
6776 ext_val, benefit);
6777
6778 case NOT:
6779 /* "~a" is "-a - 1". Silly, but easy. */
6780 return simplify_giv_expr (loop,
6781 gen_rtx_MINUS (mode,
6782 gen_rtx_NEG (mode, XEXP (x, 0)),
6783 const1_rtx),
6784 ext_val, benefit);
6785
6786 case USE:
6787 /* Already in proper form for invariant. */
6788 return x;
6789
6790 case SIGN_EXTEND:
6791 case ZERO_EXTEND:
6792 case TRUNCATE:
6793 /* Conditionally recognize extensions of simple IVs. After we've
6794 computed loop traversal counts and verified the range of the
6795 source IV, we'll reevaluate this as a GIV. */
6796 if (*ext_val == NULL_RTX)
6797 {
6798 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
6799 if (arg0 && *ext_val == NULL_RTX && GET_CODE (arg0) == REG)
6800 {
6801 *ext_val = gen_rtx_fmt_e (GET_CODE (x), mode, arg0);
6802 return arg0;
6803 }
6804 }
6805 goto do_default;
6806
6807 case REG:
6808 /* If this is a new register, we can't deal with it. */
6809 if (REGNO (x) >= max_reg_before_loop)
6810 return 0;
6811
6812 /* Check for biv or giv. */
6813 switch (REG_IV_TYPE (ivs, REGNO (x)))
6814 {
6815 case BASIC_INDUCT:
6816 return x;
6817 case GENERAL_INDUCT:
6818 {
6819 struct induction *v = REG_IV_INFO (ivs, REGNO (x));
6820
6821 /* Form expression from giv and add benefit. Ensure this giv
6822 can derive another and subtract any needed adjustment if so. */
6823
6824 /* Increasing the benefit here is risky. The only case in which it
6825 is arguably correct is if this is the only use of V. In other
6826 cases, this will artificially inflate the benefit of the current
6827 giv, and lead to suboptimal code. Thus, it is disabled, since
6828 potentially not reducing an only marginally beneficial giv is
6829 less harmful than reducing many givs that are not really
6830 beneficial. */
6831 {
6832 rtx single_use = regs->array[REGNO (x)].single_usage;
6833 if (single_use && single_use != const0_rtx)
6834 *benefit += v->benefit;
6835 }
6836
6837 if (v->cant_derive)
6838 return 0;
6839
6840 tem = gen_rtx_PLUS (mode, gen_rtx_MULT (mode,
6841 v->src_reg, v->mult_val),
6842 v->add_val);
6843
6844 if (v->derive_adjustment)
6845 tem = gen_rtx_MINUS (mode, tem, v->derive_adjustment);
6846 arg0 = simplify_giv_expr (loop, tem, ext_val, benefit);
6847 if (*ext_val)
6848 {
6849 if (!v->ext_dependent)
6850 return arg0;
6851 }
6852 else
6853 {
6854 *ext_val = v->ext_dependent;
6855 return arg0;
6856 }
6857 return 0;
6858 }
6859
6860 default:
6861 do_default:
6862 /* If it isn't an induction variable, and it is invariant, we
6863 may be able to simplify things further by looking through
6864 the bits we just moved outside the loop. */
6865 if (loop_invariant_p (loop, x) == 1)
6866 {
6867 struct movable *m;
6868 struct loop_movables *movables = LOOP_MOVABLES (loop);
6869
6870 for (m = movables->head; m; m = m->next)
6871 if (rtx_equal_p (x, m->set_dest))
6872 {
6873 /* Ok, we found a match. Substitute and simplify. */
6874
6875 /* If we match another movable, we must use that, as
6876 this one is going away. */
6877 if (m->match)
6878 return simplify_giv_expr (loop, m->match->set_dest,
6879 ext_val, benefit);
6880
6881 /* If consec is nonzero, this is a member of a group of
6882 instructions that were moved together. We handle this
6883 case only to the point of seeking to the last insn and
6884 looking for a REG_EQUAL. Fail if we don't find one. */
6885 if (m->consec != 0)
6886 {
6887 int i = m->consec;
6888 tem = m->insn;
6889 do
6890 {
6891 tem = NEXT_INSN (tem);
6892 }
6893 while (--i > 0);
6894
6895 tem = find_reg_note (tem, REG_EQUAL, NULL_RTX);
6896 if (tem)
6897 tem = XEXP (tem, 0);
6898 }
6899 else
6900 {
6901 tem = single_set (m->insn);
6902 if (tem)
6903 tem = SET_SRC (tem);
6904 }
6905
6906 if (tem)
6907 {
6908 /* What we are most interested in is pointer
6909 arithmetic on invariants -- only take
6910 patterns we may be able to do something with. */
6911 if (GET_CODE (tem) == PLUS
6912 || GET_CODE (tem) == MULT
6913 || GET_CODE (tem) == ASHIFT
6914 || GET_CODE (tem) == CONST_INT
6915 || GET_CODE (tem) == SYMBOL_REF)
6916 {
6917 tem = simplify_giv_expr (loop, tem, ext_val,
6918 benefit);
6919 if (tem)
6920 return tem;
6921 }
6922 else if (GET_CODE (tem) == CONST
6923 && GET_CODE (XEXP (tem, 0)) == PLUS
6924 && GET_CODE (XEXP (XEXP (tem, 0), 0)) == SYMBOL_REF
6925 && GET_CODE (XEXP (XEXP (tem, 0), 1)) == CONST_INT)
6926 {
6927 tem = simplify_giv_expr (loop, XEXP (tem, 0),
6928 ext_val, benefit);
6929 if (tem)
6930 return tem;
6931 }
6932 }
6933 break;
6934 }
6935 }
6936 break;
6937 }
6938
6939 /* Fall through to general case. */
6940 default:
6941 /* If invariant, return as USE (unless CONST_INT).
6942 Otherwise, not giv. */
6943 if (GET_CODE (x) == USE)
6944 x = XEXP (x, 0);
6945
6946 if (loop_invariant_p (loop, x) == 1)
6947 {
6948 if (GET_CODE (x) == CONST_INT)
6949 return x;
6950 if (GET_CODE (x) == CONST
6951 && GET_CODE (XEXP (x, 0)) == PLUS
6952 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6953 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
6954 x = XEXP (x, 0);
6955 return gen_rtx_USE (mode, x);
6956 }
6957 else
6958 return 0;
6959 }
6960 }
6961
6962 /* This routine folds invariants such that there is only ever one
6963 CONST_INT in the summation. It is only used by simplify_giv_expr. */
6964
6965 static rtx
6966 sge_plus_constant (rtx x, rtx c)
6967 {
6968 if (GET_CODE (x) == CONST_INT)
6969 return GEN_INT (INTVAL (x) + INTVAL (c));
6970 else if (GET_CODE (x) != PLUS)
6971 return gen_rtx_PLUS (GET_MODE (x), x, c);
6972 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6973 {
6974 return gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
6975 GEN_INT (INTVAL (XEXP (x, 1)) + INTVAL (c)));
6976 }
6977 else if (GET_CODE (XEXP (x, 0)) == PLUS
6978 || GET_CODE (XEXP (x, 1)) != PLUS)
6979 {
6980 return gen_rtx_PLUS (GET_MODE (x),
6981 sge_plus_constant (XEXP (x, 0), c), XEXP (x, 1));
6982 }
6983 else
6984 {
6985 return gen_rtx_PLUS (GET_MODE (x),
6986 sge_plus_constant (XEXP (x, 1), c), XEXP (x, 0));
6987 }
6988 }
6989
6990 static rtx
6991 sge_plus (enum machine_mode mode, rtx x, rtx y)
6992 {
6993 while (GET_CODE (y) == PLUS)
6994 {
6995 rtx a = XEXP (y, 0);
6996 if (GET_CODE (a) == CONST_INT)
6997 x = sge_plus_constant (x, a);
6998 else
6999 x = gen_rtx_PLUS (mode, x, a);
7000 y = XEXP (y, 1);
7001 }
7002 if (GET_CODE (y) == CONST_INT)
7003 x = sge_plus_constant (x, y);
7004 else
7005 x = gen_rtx_PLUS (mode, x, y);
7006 return x;
7007 }
7008 \f
7009 /* Help detect a giv that is calculated by several consecutive insns;
7010 for example,
7011 giv = biv * M
7012 giv = giv + A
7013 The caller has already identified the first insn P as having a giv as dest;
7014 we check that all other insns that set the same register follow
7015 immediately after P, that they alter nothing else,
7016 and that the result of the last is still a giv.
7017
7018 The value is 0 if the reg set in P is not really a giv.
7019 Otherwise, the value is the amount gained by eliminating
7020 all the consecutive insns that compute the value.
7021
7022 FIRST_BENEFIT is the amount gained by eliminating the first insn, P.
7023 SRC_REG is the reg of the biv; DEST_REG is the reg of the giv.
7024
7025 The coefficients of the ultimate giv value are stored in
7026 *MULT_VAL and *ADD_VAL. */
7027
7028 static int
7029 consec_sets_giv (const struct loop *loop, int first_benefit, rtx p,
7030 rtx src_reg, rtx dest_reg, rtx *add_val, rtx *mult_val,
7031 rtx *ext_val, rtx *last_consec_insn)
7032 {
7033 struct loop_ivs *ivs = LOOP_IVS (loop);
7034 struct loop_regs *regs = LOOP_REGS (loop);
7035 int count;
7036 enum rtx_code code;
7037 int benefit;
7038 rtx temp;
7039 rtx set;
7040
7041 /* Indicate that this is a giv so that we can update the value produced in
7042 each insn of the multi-insn sequence.
7043
7044 This induction structure will be used only by the call to
7045 general_induction_var below, so we can allocate it on our stack.
7046 If this is a giv, our caller will replace the induct var entry with
7047 a new induction structure. */
7048 struct induction *v;
7049
7050 if (REG_IV_TYPE (ivs, REGNO (dest_reg)) != UNKNOWN_INDUCT)
7051 return 0;
7052
7053 v = alloca (sizeof (struct induction));
7054 v->src_reg = src_reg;
7055 v->mult_val = *mult_val;
7056 v->add_val = *add_val;
7057 v->benefit = first_benefit;
7058 v->cant_derive = 0;
7059 v->derive_adjustment = 0;
7060 v->ext_dependent = NULL_RTX;
7061
7062 REG_IV_TYPE (ivs, REGNO (dest_reg)) = GENERAL_INDUCT;
7063 REG_IV_INFO (ivs, REGNO (dest_reg)) = v;
7064
7065 count = regs->array[REGNO (dest_reg)].n_times_set - 1;
7066
7067 while (count > 0)
7068 {
7069 p = NEXT_INSN (p);
7070 code = GET_CODE (p);
7071
7072 /* If libcall, skip to end of call sequence. */
7073 if (code == INSN && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
7074 p = XEXP (temp, 0);
7075
7076 if (code == INSN
7077 && (set = single_set (p))
7078 && GET_CODE (SET_DEST (set)) == REG
7079 && SET_DEST (set) == dest_reg
7080 && (general_induction_var (loop, SET_SRC (set), &src_reg,
7081 add_val, mult_val, ext_val, 0,
7082 &benefit, VOIDmode)
7083 /* Giv created by equivalent expression. */
7084 || ((temp = find_reg_note (p, REG_EQUAL, NULL_RTX))
7085 && general_induction_var (loop, XEXP (temp, 0), &src_reg,
7086 add_val, mult_val, ext_val, 0,
7087 &benefit, VOIDmode)))
7088 && src_reg == v->src_reg)
7089 {
7090 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
7091 benefit += libcall_benefit (p);
7092
7093 count--;
7094 v->mult_val = *mult_val;
7095 v->add_val = *add_val;
7096 v->benefit += benefit;
7097 }
7098 else if (code != NOTE)
7099 {
7100 /* Allow insns that set something other than this giv to a
7101 constant. Such insns are needed on machines which cannot
7102 include long constants and should not disqualify a giv. */
7103 if (code == INSN
7104 && (set = single_set (p))
7105 && SET_DEST (set) != dest_reg
7106 && CONSTANT_P (SET_SRC (set)))
7107 continue;
7108
7109 REG_IV_TYPE (ivs, REGNO (dest_reg)) = UNKNOWN_INDUCT;
7110 return 0;
7111 }
7112 }
7113
7114 REG_IV_TYPE (ivs, REGNO (dest_reg)) = UNKNOWN_INDUCT;
7115 *last_consec_insn = p;
7116 return v->benefit;
7117 }
7118 \f
7119 /* Return an rtx, if any, that expresses giv G2 as a function of the register
7120 represented by G1. If no such expression can be found, or it is clear that
7121 it cannot possibly be a valid address, 0 is returned.
7122
7123 To perform the computation, we note that
7124 G1 = x * v + a and
7125 G2 = y * v + b
7126 where `v' is the biv.
7127
7128 So G2 = (y/b) * G1 + (b - a*y/x).
7129
7130 Note that MULT = y/x.
7131
7132 Update: A and B are now allowed to be additive expressions such that
7133 B contains all variables in A. That is, computing B-A will not require
7134 subtracting variables. */
7135
7136 static rtx
7137 express_from_1 (rtx a, rtx b, rtx mult)
7138 {
7139 /* If MULT is zero, then A*MULT is zero, and our expression is B. */
7140
7141 if (mult == const0_rtx)
7142 return b;
7143
7144 /* If MULT is not 1, we cannot handle A with non-constants, since we
7145 would then be required to subtract multiples of the registers in A.
7146 This is theoretically possible, and may even apply to some Fortran
7147 constructs, but it is a lot of work and we do not attempt it here. */
7148
7149 if (mult != const1_rtx && GET_CODE (a) != CONST_INT)
7150 return NULL_RTX;
7151
7152 /* In general these structures are sorted top to bottom (down the PLUS
7153 chain), but not left to right across the PLUS. If B is a higher
7154 order giv than A, we can strip one level and recurse. If A is higher
7155 order, we'll eventually bail out, but won't know that until the end.
7156 If they are the same, we'll strip one level around this loop. */
7157
7158 while (GET_CODE (a) == PLUS && GET_CODE (b) == PLUS)
7159 {
7160 rtx ra, rb, oa, ob, tmp;
7161
7162 ra = XEXP (a, 0), oa = XEXP (a, 1);
7163 if (GET_CODE (ra) == PLUS)
7164 tmp = ra, ra = oa, oa = tmp;
7165
7166 rb = XEXP (b, 0), ob = XEXP (b, 1);
7167 if (GET_CODE (rb) == PLUS)
7168 tmp = rb, rb = ob, ob = tmp;
7169
7170 if (rtx_equal_p (ra, rb))
7171 /* We matched: remove one reg completely. */
7172 a = oa, b = ob;
7173 else if (GET_CODE (ob) != PLUS && rtx_equal_p (ra, ob))
7174 /* An alternate match. */
7175 a = oa, b = rb;
7176 else if (GET_CODE (oa) != PLUS && rtx_equal_p (oa, rb))
7177 /* An alternate match. */
7178 a = ra, b = ob;
7179 else
7180 {
7181 /* Indicates an extra register in B. Strip one level from B and
7182 recurse, hoping B was the higher order expression. */
7183 ob = express_from_1 (a, ob, mult);
7184 if (ob == NULL_RTX)
7185 return NULL_RTX;
7186 return gen_rtx_PLUS (GET_MODE (b), rb, ob);
7187 }
7188 }
7189
7190 /* Here we are at the last level of A, go through the cases hoping to
7191 get rid of everything but a constant. */
7192
7193 if (GET_CODE (a) == PLUS)
7194 {
7195 rtx ra, oa;
7196
7197 ra = XEXP (a, 0), oa = XEXP (a, 1);
7198 if (rtx_equal_p (oa, b))
7199 oa = ra;
7200 else if (!rtx_equal_p (ra, b))
7201 return NULL_RTX;
7202
7203 if (GET_CODE (oa) != CONST_INT)
7204 return NULL_RTX;
7205
7206 return GEN_INT (-INTVAL (oa) * INTVAL (mult));
7207 }
7208 else if (GET_CODE (a) == CONST_INT)
7209 {
7210 return plus_constant (b, -INTVAL (a) * INTVAL (mult));
7211 }
7212 else if (CONSTANT_P (a))
7213 {
7214 enum machine_mode mode_a = GET_MODE (a);
7215 enum machine_mode mode_b = GET_MODE (b);
7216 enum machine_mode mode = mode_b == VOIDmode ? mode_a : mode_b;
7217 return simplify_gen_binary (MINUS, mode, b, a);
7218 }
7219 else if (GET_CODE (b) == PLUS)
7220 {
7221 if (rtx_equal_p (a, XEXP (b, 0)))
7222 return XEXP (b, 1);
7223 else if (rtx_equal_p (a, XEXP (b, 1)))
7224 return XEXP (b, 0);
7225 else
7226 return NULL_RTX;
7227 }
7228 else if (rtx_equal_p (a, b))
7229 return const0_rtx;
7230
7231 return NULL_RTX;
7232 }
7233
7234 rtx
7235 express_from (struct induction *g1, struct induction *g2)
7236 {
7237 rtx mult, add;
7238
7239 /* The value that G1 will be multiplied by must be a constant integer. Also,
7240 the only chance we have of getting a valid address is if b*c/a (see above
7241 for notation) is also an integer. */
7242 if (GET_CODE (g1->mult_val) == CONST_INT
7243 && GET_CODE (g2->mult_val) == CONST_INT)
7244 {
7245 if (g1->mult_val == const0_rtx
7246 || (g1->mult_val == constm1_rtx
7247 && INTVAL (g2->mult_val)
7248 == (HOST_WIDE_INT) 1 << (HOST_BITS_PER_WIDE_INT - 1))
7249 || INTVAL (g2->mult_val) % INTVAL (g1->mult_val) != 0)
7250 return NULL_RTX;
7251 mult = GEN_INT (INTVAL (g2->mult_val) / INTVAL (g1->mult_val));
7252 }
7253 else if (rtx_equal_p (g1->mult_val, g2->mult_val))
7254 mult = const1_rtx;
7255 else
7256 {
7257 /* ??? Find out if the one is a multiple of the other? */
7258 return NULL_RTX;
7259 }
7260
7261 add = express_from_1 (g1->add_val, g2->add_val, mult);
7262 if (add == NULL_RTX)
7263 {
7264 /* Failed. If we've got a multiplication factor between G1 and G2,
7265 scale G1's addend and try again. */
7266 if (INTVAL (mult) > 1)
7267 {
7268 rtx g1_add_val = g1->add_val;
7269 if (GET_CODE (g1_add_val) == MULT
7270 && GET_CODE (XEXP (g1_add_val, 1)) == CONST_INT)
7271 {
7272 HOST_WIDE_INT m;
7273 m = INTVAL (mult) * INTVAL (XEXP (g1_add_val, 1));
7274 g1_add_val = gen_rtx_MULT (GET_MODE (g1_add_val),
7275 XEXP (g1_add_val, 0), GEN_INT (m));
7276 }
7277 else
7278 {
7279 g1_add_val = gen_rtx_MULT (GET_MODE (g1_add_val), g1_add_val,
7280 mult);
7281 }
7282
7283 add = express_from_1 (g1_add_val, g2->add_val, const1_rtx);
7284 }
7285 }
7286 if (add == NULL_RTX)
7287 return NULL_RTX;
7288
7289 /* Form simplified final result. */
7290 if (mult == const0_rtx)
7291 return add;
7292 else if (mult == const1_rtx)
7293 mult = g1->dest_reg;
7294 else
7295 mult = gen_rtx_MULT (g2->mode, g1->dest_reg, mult);
7296
7297 if (add == const0_rtx)
7298 return mult;
7299 else
7300 {
7301 if (GET_CODE (add) == PLUS
7302 && CONSTANT_P (XEXP (add, 1)))
7303 {
7304 rtx tem = XEXP (add, 1);
7305 mult = gen_rtx_PLUS (g2->mode, mult, XEXP (add, 0));
7306 add = tem;
7307 }
7308
7309 return gen_rtx_PLUS (g2->mode, mult, add);
7310 }
7311 }
7312 \f
7313 /* Return an rtx, if any, that expresses giv G2 as a function of the register
7314 represented by G1. This indicates that G2 should be combined with G1 and
7315 that G2 can use (either directly or via an address expression) a register
7316 used to represent G1. */
7317
7318 static rtx
7319 combine_givs_p (struct induction *g1, struct induction *g2)
7320 {
7321 rtx comb, ret;
7322
7323 /* With the introduction of ext dependent givs, we must care for modes.
7324 G2 must not use a wider mode than G1. */
7325 if (GET_MODE_SIZE (g1->mode) < GET_MODE_SIZE (g2->mode))
7326 return NULL_RTX;
7327
7328 ret = comb = express_from (g1, g2);
7329 if (comb == NULL_RTX)
7330 return NULL_RTX;
7331 if (g1->mode != g2->mode)
7332 ret = gen_lowpart (g2->mode, comb);
7333
7334 /* If these givs are identical, they can be combined. We use the results
7335 of express_from because the addends are not in a canonical form, so
7336 rtx_equal_p is a weaker test. */
7337 /* But don't combine a DEST_REG giv with a DEST_ADDR giv; we want the
7338 combination to be the other way round. */
7339 if (comb == g1->dest_reg
7340 && (g1->giv_type == DEST_REG || g2->giv_type == DEST_ADDR))
7341 {
7342 return ret;
7343 }
7344
7345 /* If G2 can be expressed as a function of G1 and that function is valid
7346 as an address and no more expensive than using a register for G2,
7347 the expression of G2 in terms of G1 can be used. */
7348 if (ret != NULL_RTX
7349 && g2->giv_type == DEST_ADDR
7350 && memory_address_p (GET_MODE (g2->mem), ret))
7351 return ret;
7352
7353 return NULL_RTX;
7354 }
7355 \f
7356 /* Check each extension dependent giv in this class to see if its
7357 root biv is safe from wrapping in the interior mode, which would
7358 make the giv illegal. */
7359
7360 static void
7361 check_ext_dependent_givs (const struct loop *loop, struct iv_class *bl)
7362 {
7363 struct loop_info *loop_info = LOOP_INFO (loop);
7364 int ze_ok = 0, se_ok = 0, info_ok = 0;
7365 enum machine_mode biv_mode = GET_MODE (bl->biv->src_reg);
7366 HOST_WIDE_INT start_val;
7367 unsigned HOST_WIDE_INT u_end_val = 0;
7368 unsigned HOST_WIDE_INT u_start_val = 0;
7369 rtx incr = pc_rtx;
7370 struct induction *v;
7371
7372 /* Make sure the iteration data is available. We must have
7373 constants in order to be certain of no overflow. */
7374 if (loop_info->n_iterations > 0
7375 && bl->initial_value
7376 && GET_CODE (bl->initial_value) == CONST_INT
7377 && (incr = biv_total_increment (bl))
7378 && GET_CODE (incr) == CONST_INT
7379 /* Make sure the host can represent the arithmetic. */
7380 && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (biv_mode))
7381 {
7382 unsigned HOST_WIDE_INT abs_incr, total_incr;
7383 HOST_WIDE_INT s_end_val;
7384 int neg_incr;
7385
7386 info_ok = 1;
7387 start_val = INTVAL (bl->initial_value);
7388 u_start_val = start_val;
7389
7390 neg_incr = 0, abs_incr = INTVAL (incr);
7391 if (INTVAL (incr) < 0)
7392 neg_incr = 1, abs_incr = -abs_incr;
7393 total_incr = abs_incr * loop_info->n_iterations;
7394
7395 /* Check for host arithmetic overflow. */
7396 if (total_incr / loop_info->n_iterations == abs_incr)
7397 {
7398 unsigned HOST_WIDE_INT u_max;
7399 HOST_WIDE_INT s_max;
7400
7401 u_end_val = start_val + (neg_incr ? -total_incr : total_incr);
7402 s_end_val = u_end_val;
7403 u_max = GET_MODE_MASK (biv_mode);
7404 s_max = u_max >> 1;
7405
7406 /* Check zero extension of biv ok. */
7407 if (start_val >= 0
7408 /* Check for host arithmetic overflow. */
7409 && (neg_incr
7410 ? u_end_val < u_start_val
7411 : u_end_val > u_start_val)
7412 /* Check for target arithmetic overflow. */
7413 && (neg_incr
7414 ? 1 /* taken care of with host overflow */
7415 : u_end_val <= u_max))
7416 {
7417 ze_ok = 1;
7418 }
7419
7420 /* Check sign extension of biv ok. */
7421 /* ??? While it is true that overflow with signed and pointer
7422 arithmetic is undefined, I fear too many programmers don't
7423 keep this fact in mind -- myself included on occasion.
7424 So leave alone with the signed overflow optimizations. */
7425 if (start_val >= -s_max - 1
7426 /* Check for host arithmetic overflow. */
7427 && (neg_incr
7428 ? s_end_val < start_val
7429 : s_end_val > start_val)
7430 /* Check for target arithmetic overflow. */
7431 && (neg_incr
7432 ? s_end_val >= -s_max - 1
7433 : s_end_val <= s_max))
7434 {
7435 se_ok = 1;
7436 }
7437 }
7438 }
7439
7440 /* If we know the BIV is compared at run-time against an
7441 invariant value, and the increment is +/- 1, we may also
7442 be able to prove that the BIV cannot overflow. */
7443 else if (bl->biv->src_reg == loop_info->iteration_var
7444 && loop_info->comparison_value
7445 && loop_invariant_p (loop, loop_info->comparison_value)
7446 && (incr = biv_total_increment (bl))
7447 && GET_CODE (incr) == CONST_INT)
7448 {
7449 /* If the increment is +1, and the exit test is a <,
7450 the BIV cannot overflow. (For <=, we have the
7451 problematic case that the comparison value might
7452 be the maximum value of the range.) */
7453 if (INTVAL (incr) == 1)
7454 {
7455 if (loop_info->comparison_code == LT)
7456 se_ok = ze_ok = 1;
7457 else if (loop_info->comparison_code == LTU)
7458 ze_ok = 1;
7459 }
7460
7461 /* Likewise for increment -1 and exit test >. */
7462 if (INTVAL (incr) == -1)
7463 {
7464 if (loop_info->comparison_code == GT)
7465 se_ok = ze_ok = 1;
7466 else if (loop_info->comparison_code == GTU)
7467 ze_ok = 1;
7468 }
7469 }
7470
7471 /* Invalidate givs that fail the tests. */
7472 for (v = bl->giv; v; v = v->next_iv)
7473 if (v->ext_dependent)
7474 {
7475 enum rtx_code code = GET_CODE (v->ext_dependent);
7476 int ok = 0;
7477
7478 switch (code)
7479 {
7480 case SIGN_EXTEND:
7481 ok = se_ok;
7482 break;
7483 case ZERO_EXTEND:
7484 ok = ze_ok;
7485 break;
7486
7487 case TRUNCATE:
7488 /* We don't know whether this value is being used as either
7489 signed or unsigned, so to safely truncate we must satisfy
7490 both. The initial check here verifies the BIV itself;
7491 once that is successful we may check its range wrt the
7492 derived GIV. This works only if we were able to determine
7493 constant start and end values above. */
7494 if (se_ok && ze_ok && info_ok)
7495 {
7496 enum machine_mode outer_mode = GET_MODE (v->ext_dependent);
7497 unsigned HOST_WIDE_INT max = GET_MODE_MASK (outer_mode) >> 1;
7498
7499 /* We know from the above that both endpoints are nonnegative,
7500 and that there is no wrapping. Verify that both endpoints
7501 are within the (signed) range of the outer mode. */
7502 if (u_start_val <= max && u_end_val <= max)
7503 ok = 1;
7504 }
7505 break;
7506
7507 default:
7508 abort ();
7509 }
7510
7511 if (ok)
7512 {
7513 if (loop_dump_stream)
7514 {
7515 fprintf (loop_dump_stream,
7516 "Verified ext dependent giv at %d of reg %d\n",
7517 INSN_UID (v->insn), bl->regno);
7518 }
7519 }
7520 else
7521 {
7522 if (loop_dump_stream)
7523 {
7524 const char *why;
7525
7526 if (info_ok)
7527 why = "biv iteration values overflowed";
7528 else
7529 {
7530 if (incr == pc_rtx)
7531 incr = biv_total_increment (bl);
7532 if (incr == const1_rtx)
7533 why = "biv iteration info incomplete; incr by 1";
7534 else
7535 why = "biv iteration info incomplete";
7536 }
7537
7538 fprintf (loop_dump_stream,
7539 "Failed ext dependent giv at %d, %s\n",
7540 INSN_UID (v->insn), why);
7541 }
7542 v->ignore = 1;
7543 bl->all_reduced = 0;
7544 }
7545 }
7546 }
7547
7548 /* Generate a version of VALUE in a mode appropriate for initializing V. */
7549
7550 rtx
7551 extend_value_for_giv (struct induction *v, rtx value)
7552 {
7553 rtx ext_dep = v->ext_dependent;
7554
7555 if (! ext_dep)
7556 return value;
7557
7558 /* Recall that check_ext_dependent_givs verified that the known bounds
7559 of a biv did not overflow or wrap with respect to the extension for
7560 the giv. Therefore, constants need no additional adjustment. */
7561 if (CONSTANT_P (value) && GET_MODE (value) == VOIDmode)
7562 return value;
7563
7564 /* Otherwise, we must adjust the value to compensate for the
7565 differing modes of the biv and the giv. */
7566 return gen_rtx_fmt_e (GET_CODE (ext_dep), GET_MODE (ext_dep), value);
7567 }
7568 \f
7569 struct combine_givs_stats
7570 {
7571 int giv_number;
7572 int total_benefit;
7573 };
7574
7575 static int
7576 cmp_combine_givs_stats (const void *xp, const void *yp)
7577 {
7578 const struct combine_givs_stats * const x =
7579 (const struct combine_givs_stats *) xp;
7580 const struct combine_givs_stats * const y =
7581 (const struct combine_givs_stats *) yp;
7582 int d;
7583 d = y->total_benefit - x->total_benefit;
7584 /* Stabilize the sort. */
7585 if (!d)
7586 d = x->giv_number - y->giv_number;
7587 return d;
7588 }
7589
7590 /* Check all pairs of givs for iv_class BL and see if any can be combined with
7591 any other. If so, point SAME to the giv combined with and set NEW_REG to
7592 be an expression (in terms of the other giv's DEST_REG) equivalent to the
7593 giv. Also, update BENEFIT and related fields for cost/benefit analysis. */
7594
7595 static void
7596 combine_givs (struct loop_regs *regs, struct iv_class *bl)
7597 {
7598 /* Additional benefit to add for being combined multiple times. */
7599 const int extra_benefit = 3;
7600
7601 struct induction *g1, *g2, **giv_array;
7602 int i, j, k, giv_count;
7603 struct combine_givs_stats *stats;
7604 rtx *can_combine;
7605
7606 /* Count givs, because bl->giv_count is incorrect here. */
7607 giv_count = 0;
7608 for (g1 = bl->giv; g1; g1 = g1->next_iv)
7609 if (!g1->ignore)
7610 giv_count++;
7611
7612 giv_array = alloca (giv_count * sizeof (struct induction *));
7613 i = 0;
7614 for (g1 = bl->giv; g1; g1 = g1->next_iv)
7615 if (!g1->ignore)
7616 giv_array[i++] = g1;
7617
7618 stats = xcalloc (giv_count, sizeof (*stats));
7619 can_combine = xcalloc (giv_count, giv_count * sizeof (rtx));
7620
7621 for (i = 0; i < giv_count; i++)
7622 {
7623 int this_benefit;
7624 rtx single_use;
7625
7626 g1 = giv_array[i];
7627 stats[i].giv_number = i;
7628
7629 /* If a DEST_REG GIV is used only once, do not allow it to combine
7630 with anything, for in doing so we will gain nothing that cannot
7631 be had by simply letting the GIV with which we would have combined
7632 to be reduced on its own. The losage shows up in particular with
7633 DEST_ADDR targets on hosts with reg+reg addressing, though it can
7634 be seen elsewhere as well. */
7635 if (g1->giv_type == DEST_REG
7636 && (single_use = regs->array[REGNO (g1->dest_reg)].single_usage)
7637 && single_use != const0_rtx)
7638 continue;
7639
7640 this_benefit = g1->benefit;
7641 /* Add an additional weight for zero addends. */
7642 if (g1->no_const_addval)
7643 this_benefit += 1;
7644
7645 for (j = 0; j < giv_count; j++)
7646 {
7647 rtx this_combine;
7648
7649 g2 = giv_array[j];
7650 if (g1 != g2
7651 && (this_combine = combine_givs_p (g1, g2)) != NULL_RTX)
7652 {
7653 can_combine[i * giv_count + j] = this_combine;
7654 this_benefit += g2->benefit + extra_benefit;
7655 }
7656 }
7657 stats[i].total_benefit = this_benefit;
7658 }
7659
7660 /* Iterate, combining until we can't. */
7661 restart:
7662 qsort (stats, giv_count, sizeof (*stats), cmp_combine_givs_stats);
7663
7664 if (loop_dump_stream)
7665 {
7666 fprintf (loop_dump_stream, "Sorted combine statistics:\n");
7667 for (k = 0; k < giv_count; k++)
7668 {
7669 g1 = giv_array[stats[k].giv_number];
7670 if (!g1->combined_with && !g1->same)
7671 fprintf (loop_dump_stream, " {%d, %d}",
7672 INSN_UID (giv_array[stats[k].giv_number]->insn),
7673 stats[k].total_benefit);
7674 }
7675 putc ('\n', loop_dump_stream);
7676 }
7677
7678 for (k = 0; k < giv_count; k++)
7679 {
7680 int g1_add_benefit = 0;
7681
7682 i = stats[k].giv_number;
7683 g1 = giv_array[i];
7684
7685 /* If it has already been combined, skip. */
7686 if (g1->combined_with || g1->same)
7687 continue;
7688
7689 for (j = 0; j < giv_count; j++)
7690 {
7691 g2 = giv_array[j];
7692 if (g1 != g2 && can_combine[i * giv_count + j]
7693 /* If it has already been combined, skip. */
7694 && ! g2->same && ! g2->combined_with)
7695 {
7696 int l;
7697
7698 g2->new_reg = can_combine[i * giv_count + j];
7699 g2->same = g1;
7700 /* For destination, we now may replace by mem expression instead
7701 of register. This changes the costs considerably, so add the
7702 compensation. */
7703 if (g2->giv_type == DEST_ADDR)
7704 g2->benefit = (g2->benefit + reg_address_cost
7705 - address_cost (g2->new_reg,
7706 GET_MODE (g2->mem)));
7707 g1->combined_with++;
7708 g1->lifetime += g2->lifetime;
7709
7710 g1_add_benefit += g2->benefit;
7711
7712 /* ??? The new final_[bg]iv_value code does a much better job
7713 of finding replaceable giv's, and hence this code may no
7714 longer be necessary. */
7715 if (! g2->replaceable && REG_USERVAR_P (g2->dest_reg))
7716 g1_add_benefit -= copy_cost;
7717
7718 /* To help optimize the next set of combinations, remove
7719 this giv from the benefits of other potential mates. */
7720 for (l = 0; l < giv_count; ++l)
7721 {
7722 int m = stats[l].giv_number;
7723 if (can_combine[m * giv_count + j])
7724 stats[l].total_benefit -= g2->benefit + extra_benefit;
7725 }
7726
7727 if (loop_dump_stream)
7728 fprintf (loop_dump_stream,
7729 "giv at %d combined with giv at %d; new benefit %d + %d, lifetime %d\n",
7730 INSN_UID (g2->insn), INSN_UID (g1->insn),
7731 g1->benefit, g1_add_benefit, g1->lifetime);
7732 }
7733 }
7734
7735 /* To help optimize the next set of combinations, remove
7736 this giv from the benefits of other potential mates. */
7737 if (g1->combined_with)
7738 {
7739 for (j = 0; j < giv_count; ++j)
7740 {
7741 int m = stats[j].giv_number;
7742 if (can_combine[m * giv_count + i])
7743 stats[j].total_benefit -= g1->benefit + extra_benefit;
7744 }
7745
7746 g1->benefit += g1_add_benefit;
7747
7748 /* We've finished with this giv, and everything it touched.
7749 Restart the combination so that proper weights for the
7750 rest of the givs are properly taken into account. */
7751 /* ??? Ideally we would compact the arrays at this point, so
7752 as to not cover old ground. But sanely compacting
7753 can_combine is tricky. */
7754 goto restart;
7755 }
7756 }
7757
7758 /* Clean up. */
7759 free (stats);
7760 free (can_combine);
7761 }
7762 \f
7763 /* Generate sequence for REG = B * M + A. B is the initial value of
7764 the basic induction variable, M a multiplicative constant, A an
7765 additive constant and REG the destination register. */
7766
7767 static rtx
7768 gen_add_mult (rtx b, rtx m, rtx a, rtx reg)
7769 {
7770 rtx seq;
7771 rtx result;
7772
7773 start_sequence ();
7774 /* Use unsigned arithmetic. */
7775 result = expand_mult_add (b, reg, m, a, GET_MODE (reg), 1);
7776 if (reg != result)
7777 emit_move_insn (reg, result);
7778 seq = get_insns ();
7779 end_sequence ();
7780
7781 return seq;
7782 }
7783
7784
7785 /* Update registers created in insn sequence SEQ. */
7786
7787 static void
7788 loop_regs_update (const struct loop *loop ATTRIBUTE_UNUSED, rtx seq)
7789 {
7790 rtx insn;
7791
7792 /* Update register info for alias analysis. */
7793
7794 insn = seq;
7795 while (insn != NULL_RTX)
7796 {
7797 rtx set = single_set (insn);
7798
7799 if (set && GET_CODE (SET_DEST (set)) == REG)
7800 record_base_value (REGNO (SET_DEST (set)), SET_SRC (set), 0);
7801
7802 insn = NEXT_INSN (insn);
7803 }
7804 }
7805
7806
7807 /* EMIT code before BEFORE_BB/BEFORE_INSN to set REG = B * M + A. B
7808 is the initial value of the basic induction variable, M a
7809 multiplicative constant, A an additive constant and REG the
7810 destination register. */
7811
7812 void
7813 loop_iv_add_mult_emit_before (const struct loop *loop, rtx b, rtx m, rtx a,
7814 rtx reg, basic_block before_bb, rtx before_insn)
7815 {
7816 rtx seq;
7817
7818 if (! before_insn)
7819 {
7820 loop_iv_add_mult_hoist (loop, b, m, a, reg);
7821 return;
7822 }
7823
7824 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
7825 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
7826
7827 /* Increase the lifetime of any invariants moved further in code. */
7828 update_reg_last_use (a, before_insn);
7829 update_reg_last_use (b, before_insn);
7830 update_reg_last_use (m, before_insn);
7831
7832 /* It is possible that the expansion created lots of new registers.
7833 Iterate over the sequence we just created and record them all. We
7834 must do this before inserting the sequence. */
7835 loop_regs_update (loop, seq);
7836
7837 loop_insn_emit_before (loop, before_bb, before_insn, seq);
7838 }
7839
7840
7841 /* Emit insns in loop pre-header to set REG = B * M + A. B is the
7842 initial value of the basic induction variable, M a multiplicative
7843 constant, A an additive constant and REG the destination
7844 register. */
7845
7846 void
7847 loop_iv_add_mult_sink (const struct loop *loop, rtx b, rtx m, rtx a, rtx reg)
7848 {
7849 rtx seq;
7850
7851 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
7852 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
7853
7854 /* Increase the lifetime of any invariants moved further in code.
7855 ???? Is this really necessary? */
7856 update_reg_last_use (a, loop->sink);
7857 update_reg_last_use (b, loop->sink);
7858 update_reg_last_use (m, loop->sink);
7859
7860 /* It is possible that the expansion created lots of new registers.
7861 Iterate over the sequence we just created and record them all. We
7862 must do this before inserting the sequence. */
7863 loop_regs_update (loop, seq);
7864
7865 loop_insn_sink (loop, seq);
7866 }
7867
7868
7869 /* Emit insns after loop to set REG = B * M + A. B is the initial
7870 value of the basic induction variable, M a multiplicative constant,
7871 A an additive constant and REG the destination register. */
7872
7873 void
7874 loop_iv_add_mult_hoist (const struct loop *loop, rtx b, rtx m, rtx a, rtx reg)
7875 {
7876 rtx seq;
7877
7878 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
7879 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
7880
7881 /* It is possible that the expansion created lots of new registers.
7882 Iterate over the sequence we just created and record them all. We
7883 must do this before inserting the sequence. */
7884 loop_regs_update (loop, seq);
7885
7886 loop_insn_hoist (loop, seq);
7887 }
7888
7889
7890
7891 /* Similar to gen_add_mult, but compute cost rather than generating
7892 sequence. */
7893
7894 static int
7895 iv_add_mult_cost (rtx b, rtx m, rtx a, rtx reg)
7896 {
7897 int cost = 0;
7898 rtx last, result;
7899
7900 start_sequence ();
7901 result = expand_mult_add (b, reg, m, a, GET_MODE (reg), 1);
7902 if (reg != result)
7903 emit_move_insn (reg, result);
7904 last = get_last_insn ();
7905 while (last)
7906 {
7907 rtx t = single_set (last);
7908 if (t)
7909 cost += rtx_cost (SET_SRC (t), SET);
7910 last = PREV_INSN (last);
7911 }
7912 end_sequence ();
7913 return cost;
7914 }
7915 \f
7916 /* Test whether A * B can be computed without
7917 an actual multiply insn. Value is 1 if so.
7918
7919 ??? This function stinks because it generates a ton of wasted RTL
7920 ??? and as a result fragments GC memory to no end. There are other
7921 ??? places in the compiler which are invoked a lot and do the same
7922 ??? thing, generate wasted RTL just to see if something is possible. */
7923
7924 static int
7925 product_cheap_p (rtx a, rtx b)
7926 {
7927 rtx tmp;
7928 int win, n_insns;
7929
7930 /* If only one is constant, make it B. */
7931 if (GET_CODE (a) == CONST_INT)
7932 tmp = a, a = b, b = tmp;
7933
7934 /* If first constant, both constant, so don't need multiply. */
7935 if (GET_CODE (a) == CONST_INT)
7936 return 1;
7937
7938 /* If second not constant, neither is constant, so would need multiply. */
7939 if (GET_CODE (b) != CONST_INT)
7940 return 0;
7941
7942 /* One operand is constant, so might not need multiply insn. Generate the
7943 code for the multiply and see if a call or multiply, or long sequence
7944 of insns is generated. */
7945
7946 start_sequence ();
7947 expand_mult (GET_MODE (a), a, b, NULL_RTX, 1);
7948 tmp = get_insns ();
7949 end_sequence ();
7950
7951 win = 1;
7952 if (INSN_P (tmp))
7953 {
7954 n_insns = 0;
7955 while (tmp != NULL_RTX)
7956 {
7957 rtx next = NEXT_INSN (tmp);
7958
7959 if (++n_insns > 3
7960 || GET_CODE (tmp) != INSN
7961 || (GET_CODE (PATTERN (tmp)) == SET
7962 && GET_CODE (SET_SRC (PATTERN (tmp))) == MULT)
7963 || (GET_CODE (PATTERN (tmp)) == PARALLEL
7964 && GET_CODE (XVECEXP (PATTERN (tmp), 0, 0)) == SET
7965 && GET_CODE (SET_SRC (XVECEXP (PATTERN (tmp), 0, 0))) == MULT))
7966 {
7967 win = 0;
7968 break;
7969 }
7970
7971 tmp = next;
7972 }
7973 }
7974 else if (GET_CODE (tmp) == SET
7975 && GET_CODE (SET_SRC (tmp)) == MULT)
7976 win = 0;
7977 else if (GET_CODE (tmp) == PARALLEL
7978 && GET_CODE (XVECEXP (tmp, 0, 0)) == SET
7979 && GET_CODE (SET_SRC (XVECEXP (tmp, 0, 0))) == MULT)
7980 win = 0;
7981
7982 return win;
7983 }
7984 \f
7985 /* Check to see if loop can be terminated by a "decrement and branch until
7986 zero" instruction. If so, add a REG_NONNEG note to the branch insn if so.
7987 Also try reversing an increment loop to a decrement loop
7988 to see if the optimization can be performed.
7989 Value is nonzero if optimization was performed. */
7990
7991 /* This is useful even if the architecture doesn't have such an insn,
7992 because it might change a loops which increments from 0 to n to a loop
7993 which decrements from n to 0. A loop that decrements to zero is usually
7994 faster than one that increments from zero. */
7995
7996 /* ??? This could be rewritten to use some of the loop unrolling procedures,
7997 such as approx_final_value, biv_total_increment, loop_iterations, and
7998 final_[bg]iv_value. */
7999
8000 static int
8001 check_dbra_loop (struct loop *loop, int insn_count)
8002 {
8003 struct loop_info *loop_info = LOOP_INFO (loop);
8004 struct loop_regs *regs = LOOP_REGS (loop);
8005 struct loop_ivs *ivs = LOOP_IVS (loop);
8006 struct iv_class *bl;
8007 rtx reg;
8008 rtx jump_label;
8009 rtx final_value;
8010 rtx start_value;
8011 rtx new_add_val;
8012 rtx comparison;
8013 rtx before_comparison;
8014 rtx p;
8015 rtx jump;
8016 rtx first_compare;
8017 int compare_and_branch;
8018 rtx loop_start = loop->start;
8019 rtx loop_end = loop->end;
8020
8021 /* If last insn is a conditional branch, and the insn before tests a
8022 register value, try to optimize it. Otherwise, we can't do anything. */
8023
8024 jump = PREV_INSN (loop_end);
8025 comparison = get_condition_for_loop (loop, jump);
8026 if (comparison == 0)
8027 return 0;
8028 if (!onlyjump_p (jump))
8029 return 0;
8030
8031 /* Try to compute whether the compare/branch at the loop end is one or
8032 two instructions. */
8033 get_condition (jump, &first_compare, false);
8034 if (first_compare == jump)
8035 compare_and_branch = 1;
8036 else if (first_compare == prev_nonnote_insn (jump))
8037 compare_and_branch = 2;
8038 else
8039 return 0;
8040
8041 {
8042 /* If more than one condition is present to control the loop, then
8043 do not proceed, as this function does not know how to rewrite
8044 loop tests with more than one condition.
8045
8046 Look backwards from the first insn in the last comparison
8047 sequence and see if we've got another comparison sequence. */
8048
8049 rtx jump1;
8050 if ((jump1 = prev_nonnote_insn (first_compare)) != loop->cont)
8051 if (GET_CODE (jump1) == JUMP_INSN)
8052 return 0;
8053 }
8054
8055 /* Check all of the bivs to see if the compare uses one of them.
8056 Skip biv's set more than once because we can't guarantee that
8057 it will be zero on the last iteration. Also skip if the biv is
8058 used between its update and the test insn. */
8059
8060 for (bl = ivs->list; bl; bl = bl->next)
8061 {
8062 if (bl->biv_count == 1
8063 && ! bl->biv->maybe_multiple
8064 && bl->biv->dest_reg == XEXP (comparison, 0)
8065 && ! reg_used_between_p (regno_reg_rtx[bl->regno], bl->biv->insn,
8066 first_compare))
8067 break;
8068 }
8069
8070 /* Try swapping the comparison to identify a suitable biv. */
8071 if (!bl)
8072 for (bl = ivs->list; bl; bl = bl->next)
8073 if (bl->biv_count == 1
8074 && ! bl->biv->maybe_multiple
8075 && bl->biv->dest_reg == XEXP (comparison, 1)
8076 && ! reg_used_between_p (regno_reg_rtx[bl->regno], bl->biv->insn,
8077 first_compare))
8078 {
8079 comparison = gen_rtx_fmt_ee (swap_condition (GET_CODE (comparison)),
8080 VOIDmode,
8081 XEXP (comparison, 1),
8082 XEXP (comparison, 0));
8083 break;
8084 }
8085
8086 if (! bl)
8087 return 0;
8088
8089 /* Look for the case where the basic induction variable is always
8090 nonnegative, and equals zero on the last iteration.
8091 In this case, add a reg_note REG_NONNEG, which allows the
8092 m68k DBRA instruction to be used. */
8093
8094 if (((GET_CODE (comparison) == GT && XEXP (comparison, 1) == constm1_rtx)
8095 || (GET_CODE (comparison) == NE && XEXP (comparison, 1) == const0_rtx))
8096 && GET_CODE (bl->biv->add_val) == CONST_INT
8097 && INTVAL (bl->biv->add_val) < 0)
8098 {
8099 /* Initial value must be greater than 0,
8100 init_val % -dec_value == 0 to ensure that it equals zero on
8101 the last iteration */
8102
8103 if (GET_CODE (bl->initial_value) == CONST_INT
8104 && INTVAL (bl->initial_value) > 0
8105 && (INTVAL (bl->initial_value)
8106 % (-INTVAL (bl->biv->add_val))) == 0)
8107 {
8108 /* Register always nonnegative, add REG_NOTE to branch. */
8109 if (! find_reg_note (jump, REG_NONNEG, NULL_RTX))
8110 REG_NOTES (jump)
8111 = gen_rtx_EXPR_LIST (REG_NONNEG, bl->biv->dest_reg,
8112 REG_NOTES (jump));
8113 bl->nonneg = 1;
8114
8115 return 1;
8116 }
8117
8118 /* If the decrement is 1 and the value was tested as >= 0 before
8119 the loop, then we can safely optimize. */
8120 for (p = loop_start; p; p = PREV_INSN (p))
8121 {
8122 if (GET_CODE (p) == CODE_LABEL)
8123 break;
8124 if (GET_CODE (p) != JUMP_INSN)
8125 continue;
8126
8127 before_comparison = get_condition_for_loop (loop, p);
8128 if (before_comparison
8129 && XEXP (before_comparison, 0) == bl->biv->dest_reg
8130 && (GET_CODE (before_comparison) == LT
8131 || GET_CODE (before_comparison) == LTU)
8132 && XEXP (before_comparison, 1) == const0_rtx
8133 && ! reg_set_between_p (bl->biv->dest_reg, p, loop_start)
8134 && INTVAL (bl->biv->add_val) == -1)
8135 {
8136 if (! find_reg_note (jump, REG_NONNEG, NULL_RTX))
8137 REG_NOTES (jump)
8138 = gen_rtx_EXPR_LIST (REG_NONNEG, bl->biv->dest_reg,
8139 REG_NOTES (jump));
8140 bl->nonneg = 1;
8141
8142 return 1;
8143 }
8144 }
8145 }
8146 else if (GET_CODE (bl->biv->add_val) == CONST_INT
8147 && INTVAL (bl->biv->add_val) > 0)
8148 {
8149 /* Try to change inc to dec, so can apply above optimization. */
8150 /* Can do this if:
8151 all registers modified are induction variables or invariant,
8152 all memory references have non-overlapping addresses
8153 (obviously true if only one write)
8154 allow 2 insns for the compare/jump at the end of the loop. */
8155 /* Also, we must avoid any instructions which use both the reversed
8156 biv and another biv. Such instructions will fail if the loop is
8157 reversed. We meet this condition by requiring that either
8158 no_use_except_counting is true, or else that there is only
8159 one biv. */
8160 int num_nonfixed_reads = 0;
8161 /* 1 if the iteration var is used only to count iterations. */
8162 int no_use_except_counting = 0;
8163 /* 1 if the loop has no memory store, or it has a single memory store
8164 which is reversible. */
8165 int reversible_mem_store = 1;
8166
8167 if (bl->giv_count == 0
8168 && !loop->exit_count
8169 && !loop_info->has_multiple_exit_targets)
8170 {
8171 rtx bivreg = regno_reg_rtx[bl->regno];
8172 struct iv_class *blt;
8173
8174 /* If there are no givs for this biv, and the only exit is the
8175 fall through at the end of the loop, then
8176 see if perhaps there are no uses except to count. */
8177 no_use_except_counting = 1;
8178 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
8179 if (INSN_P (p))
8180 {
8181 rtx set = single_set (p);
8182
8183 if (set && GET_CODE (SET_DEST (set)) == REG
8184 && REGNO (SET_DEST (set)) == bl->regno)
8185 /* An insn that sets the biv is okay. */
8186 ;
8187 else if (!reg_mentioned_p (bivreg, PATTERN (p)))
8188 /* An insn that doesn't mention the biv is okay. */
8189 ;
8190 else if (p == prev_nonnote_insn (prev_nonnote_insn (loop_end))
8191 || p == prev_nonnote_insn (loop_end))
8192 {
8193 /* If either of these insns uses the biv and sets a pseudo
8194 that has more than one usage, then the biv has uses
8195 other than counting since it's used to derive a value
8196 that is used more than one time. */
8197 note_stores (PATTERN (p), note_set_pseudo_multiple_uses,
8198 regs);
8199 if (regs->multiple_uses)
8200 {
8201 no_use_except_counting = 0;
8202 break;
8203 }
8204 }
8205 else
8206 {
8207 no_use_except_counting = 0;
8208 break;
8209 }
8210 }
8211
8212 /* A biv has uses besides counting if it is used to set
8213 another biv. */
8214 for (blt = ivs->list; blt; blt = blt->next)
8215 if (blt->init_set
8216 && reg_mentioned_p (bivreg, SET_SRC (blt->init_set)))
8217 {
8218 no_use_except_counting = 0;
8219 break;
8220 }
8221 }
8222
8223 if (no_use_except_counting)
8224 /* No need to worry about MEMs. */
8225 ;
8226 else if (loop_info->num_mem_sets <= 1)
8227 {
8228 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
8229 if (INSN_P (p))
8230 num_nonfixed_reads += count_nonfixed_reads (loop, PATTERN (p));
8231
8232 /* If the loop has a single store, and the destination address is
8233 invariant, then we can't reverse the loop, because this address
8234 might then have the wrong value at loop exit.
8235 This would work if the source was invariant also, however, in that
8236 case, the insn should have been moved out of the loop. */
8237
8238 if (loop_info->num_mem_sets == 1)
8239 {
8240 struct induction *v;
8241
8242 /* If we could prove that each of the memory locations
8243 written to was different, then we could reverse the
8244 store -- but we don't presently have any way of
8245 knowing that. */
8246 reversible_mem_store = 0;
8247
8248 /* If the store depends on a register that is set after the
8249 store, it depends on the initial value, and is thus not
8250 reversible. */
8251 for (v = bl->giv; reversible_mem_store && v; v = v->next_iv)
8252 {
8253 if (v->giv_type == DEST_REG
8254 && reg_mentioned_p (v->dest_reg,
8255 PATTERN (loop_info->first_loop_store_insn))
8256 && loop_insn_first_p (loop_info->first_loop_store_insn,
8257 v->insn))
8258 reversible_mem_store = 0;
8259 }
8260 }
8261 }
8262 else
8263 return 0;
8264
8265 /* This code only acts for innermost loops. Also it simplifies
8266 the memory address check by only reversing loops with
8267 zero or one memory access.
8268 Two memory accesses could involve parts of the same array,
8269 and that can't be reversed.
8270 If the biv is used only for counting, than we don't need to worry
8271 about all these things. */
8272
8273 if ((num_nonfixed_reads <= 1
8274 && ! loop_info->has_nonconst_call
8275 && ! loop_info->has_prefetch
8276 && ! loop_info->has_volatile
8277 && reversible_mem_store
8278 && (bl->giv_count + bl->biv_count + loop_info->num_mem_sets
8279 + num_unmoved_movables (loop) + compare_and_branch == insn_count)
8280 && (bl == ivs->list && bl->next == 0))
8281 || (no_use_except_counting && ! loop_info->has_prefetch))
8282 {
8283 rtx tem;
8284
8285 /* Loop can be reversed. */
8286 if (loop_dump_stream)
8287 fprintf (loop_dump_stream, "Can reverse loop\n");
8288
8289 /* Now check other conditions:
8290
8291 The increment must be a constant, as must the initial value,
8292 and the comparison code must be LT.
8293
8294 This test can probably be improved since +/- 1 in the constant
8295 can be obtained by changing LT to LE and vice versa; this is
8296 confusing. */
8297
8298 if (comparison
8299 /* for constants, LE gets turned into LT */
8300 && (GET_CODE (comparison) == LT
8301 || (GET_CODE (comparison) == LE
8302 && no_use_except_counting)
8303 || GET_CODE (comparison) == LTU))
8304 {
8305 HOST_WIDE_INT add_val, add_adjust, comparison_val = 0;
8306 rtx initial_value, comparison_value;
8307 int nonneg = 0;
8308 enum rtx_code cmp_code;
8309 int comparison_const_width;
8310 unsigned HOST_WIDE_INT comparison_sign_mask;
8311
8312 add_val = INTVAL (bl->biv->add_val);
8313 comparison_value = XEXP (comparison, 1);
8314 if (GET_MODE (comparison_value) == VOIDmode)
8315 comparison_const_width
8316 = GET_MODE_BITSIZE (GET_MODE (XEXP (comparison, 0)));
8317 else
8318 comparison_const_width
8319 = GET_MODE_BITSIZE (GET_MODE (comparison_value));
8320 if (comparison_const_width > HOST_BITS_PER_WIDE_INT)
8321 comparison_const_width = HOST_BITS_PER_WIDE_INT;
8322 comparison_sign_mask
8323 = (unsigned HOST_WIDE_INT) 1 << (comparison_const_width - 1);
8324
8325 /* If the comparison value is not a loop invariant, then we
8326 can not reverse this loop.
8327
8328 ??? If the insns which initialize the comparison value as
8329 a whole compute an invariant result, then we could move
8330 them out of the loop and proceed with loop reversal. */
8331 if (! loop_invariant_p (loop, comparison_value))
8332 return 0;
8333
8334 if (GET_CODE (comparison_value) == CONST_INT)
8335 comparison_val = INTVAL (comparison_value);
8336 initial_value = bl->initial_value;
8337
8338 /* Normalize the initial value if it is an integer and
8339 has no other use except as a counter. This will allow
8340 a few more loops to be reversed. */
8341 if (no_use_except_counting
8342 && GET_CODE (comparison_value) == CONST_INT
8343 && GET_CODE (initial_value) == CONST_INT)
8344 {
8345 comparison_val = comparison_val - INTVAL (bl->initial_value);
8346 /* The code below requires comparison_val to be a multiple
8347 of add_val in order to do the loop reversal, so
8348 round up comparison_val to a multiple of add_val.
8349 Since comparison_value is constant, we know that the
8350 current comparison code is LT. */
8351 comparison_val = comparison_val + add_val - 1;
8352 comparison_val
8353 -= (unsigned HOST_WIDE_INT) comparison_val % add_val;
8354 /* We postpone overflow checks for COMPARISON_VAL here;
8355 even if there is an overflow, we might still be able to
8356 reverse the loop, if converting the loop exit test to
8357 NE is possible. */
8358 initial_value = const0_rtx;
8359 }
8360
8361 /* First check if we can do a vanilla loop reversal. */
8362 if (initial_value == const0_rtx
8363 /* If we have a decrement_and_branch_on_count,
8364 prefer the NE test, since this will allow that
8365 instruction to be generated. Note that we must
8366 use a vanilla loop reversal if the biv is used to
8367 calculate a giv or has a non-counting use. */
8368 #if ! defined (HAVE_decrement_and_branch_until_zero) \
8369 && defined (HAVE_decrement_and_branch_on_count)
8370 && (! (add_val == 1 && loop->vtop
8371 && (bl->biv_count == 0
8372 || no_use_except_counting)))
8373 #endif
8374 && GET_CODE (comparison_value) == CONST_INT
8375 /* Now do postponed overflow checks on COMPARISON_VAL. */
8376 && ! (((comparison_val - add_val) ^ INTVAL (comparison_value))
8377 & comparison_sign_mask))
8378 {
8379 /* Register will always be nonnegative, with value
8380 0 on last iteration */
8381 add_adjust = add_val;
8382 nonneg = 1;
8383 cmp_code = GE;
8384 }
8385 else if (add_val == 1 && loop->vtop
8386 && (bl->biv_count == 0
8387 || no_use_except_counting))
8388 {
8389 add_adjust = 0;
8390 cmp_code = NE;
8391 }
8392 else
8393 return 0;
8394
8395 if (GET_CODE (comparison) == LE)
8396 add_adjust -= add_val;
8397
8398 /* If the initial value is not zero, or if the comparison
8399 value is not an exact multiple of the increment, then we
8400 can not reverse this loop. */
8401 if (initial_value == const0_rtx
8402 && GET_CODE (comparison_value) == CONST_INT)
8403 {
8404 if (((unsigned HOST_WIDE_INT) comparison_val % add_val) != 0)
8405 return 0;
8406 }
8407 else
8408 {
8409 if (! no_use_except_counting || add_val != 1)
8410 return 0;
8411 }
8412
8413 final_value = comparison_value;
8414
8415 /* Reset these in case we normalized the initial value
8416 and comparison value above. */
8417 if (GET_CODE (comparison_value) == CONST_INT
8418 && GET_CODE (initial_value) == CONST_INT)
8419 {
8420 comparison_value = GEN_INT (comparison_val);
8421 final_value
8422 = GEN_INT (comparison_val + INTVAL (bl->initial_value));
8423 }
8424 bl->initial_value = initial_value;
8425
8426 /* Save some info needed to produce the new insns. */
8427 reg = bl->biv->dest_reg;
8428 jump_label = condjump_label (PREV_INSN (loop_end));
8429 new_add_val = GEN_INT (-INTVAL (bl->biv->add_val));
8430
8431 /* Set start_value; if this is not a CONST_INT, we need
8432 to generate a SUB.
8433 Initialize biv to start_value before loop start.
8434 The old initializing insn will be deleted as a
8435 dead store by flow.c. */
8436 if (initial_value == const0_rtx
8437 && GET_CODE (comparison_value) == CONST_INT)
8438 {
8439 start_value = GEN_INT (comparison_val - add_adjust);
8440 loop_insn_hoist (loop, gen_move_insn (reg, start_value));
8441 }
8442 else if (GET_CODE (initial_value) == CONST_INT)
8443 {
8444 enum machine_mode mode = GET_MODE (reg);
8445 rtx offset = GEN_INT (-INTVAL (initial_value) - add_adjust);
8446 rtx add_insn = gen_add3_insn (reg, comparison_value, offset);
8447
8448 if (add_insn == 0)
8449 return 0;
8450
8451 start_value
8452 = gen_rtx_PLUS (mode, comparison_value, offset);
8453 loop_insn_hoist (loop, add_insn);
8454 if (GET_CODE (comparison) == LE)
8455 final_value = gen_rtx_PLUS (mode, comparison_value,
8456 GEN_INT (add_val));
8457 }
8458 else if (! add_adjust)
8459 {
8460 enum machine_mode mode = GET_MODE (reg);
8461 rtx sub_insn = gen_sub3_insn (reg, comparison_value,
8462 initial_value);
8463
8464 if (sub_insn == 0)
8465 return 0;
8466 start_value
8467 = gen_rtx_MINUS (mode, comparison_value, initial_value);
8468 loop_insn_hoist (loop, sub_insn);
8469 }
8470 else
8471 /* We could handle the other cases too, but it'll be
8472 better to have a testcase first. */
8473 return 0;
8474
8475 /* We may not have a single insn which can increment a reg, so
8476 create a sequence to hold all the insns from expand_inc. */
8477 start_sequence ();
8478 expand_inc (reg, new_add_val);
8479 tem = get_insns ();
8480 end_sequence ();
8481
8482 p = loop_insn_emit_before (loop, 0, bl->biv->insn, tem);
8483 delete_insn (bl->biv->insn);
8484
8485 /* Update biv info to reflect its new status. */
8486 bl->biv->insn = p;
8487 bl->initial_value = start_value;
8488 bl->biv->add_val = new_add_val;
8489
8490 /* Update loop info. */
8491 loop_info->initial_value = reg;
8492 loop_info->initial_equiv_value = reg;
8493 loop_info->final_value = const0_rtx;
8494 loop_info->final_equiv_value = const0_rtx;
8495 loop_info->comparison_value = const0_rtx;
8496 loop_info->comparison_code = cmp_code;
8497 loop_info->increment = new_add_val;
8498
8499 /* Inc LABEL_NUSES so that delete_insn will
8500 not delete the label. */
8501 LABEL_NUSES (XEXP (jump_label, 0))++;
8502
8503 /* Emit an insn after the end of the loop to set the biv's
8504 proper exit value if it is used anywhere outside the loop. */
8505 if ((REGNO_LAST_UID (bl->regno) != INSN_UID (first_compare))
8506 || ! bl->init_insn
8507 || REGNO_FIRST_UID (bl->regno) != INSN_UID (bl->init_insn))
8508 loop_insn_sink (loop, gen_load_of_final_value (reg, final_value));
8509
8510 /* Delete compare/branch at end of loop. */
8511 delete_related_insns (PREV_INSN (loop_end));
8512 if (compare_and_branch == 2)
8513 delete_related_insns (first_compare);
8514
8515 /* Add new compare/branch insn at end of loop. */
8516 start_sequence ();
8517 emit_cmp_and_jump_insns (reg, const0_rtx, cmp_code, NULL_RTX,
8518 GET_MODE (reg), 0,
8519 XEXP (jump_label, 0));
8520 tem = get_insns ();
8521 end_sequence ();
8522 emit_jump_insn_before (tem, loop_end);
8523
8524 for (tem = PREV_INSN (loop_end);
8525 tem && GET_CODE (tem) != JUMP_INSN;
8526 tem = PREV_INSN (tem))
8527 ;
8528
8529 if (tem)
8530 JUMP_LABEL (tem) = XEXP (jump_label, 0);
8531
8532 if (nonneg)
8533 {
8534 if (tem)
8535 {
8536 /* Increment of LABEL_NUSES done above. */
8537 /* Register is now always nonnegative,
8538 so add REG_NONNEG note to the branch. */
8539 REG_NOTES (tem) = gen_rtx_EXPR_LIST (REG_NONNEG, reg,
8540 REG_NOTES (tem));
8541 }
8542 bl->nonneg = 1;
8543 }
8544
8545 /* No insn may reference both the reversed and another biv or it
8546 will fail (see comment near the top of the loop reversal
8547 code).
8548 Earlier on, we have verified that the biv has no use except
8549 counting, or it is the only biv in this function.
8550 However, the code that computes no_use_except_counting does
8551 not verify reg notes. It's possible to have an insn that
8552 references another biv, and has a REG_EQUAL note with an
8553 expression based on the reversed biv. To avoid this case,
8554 remove all REG_EQUAL notes based on the reversed biv
8555 here. */
8556 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
8557 if (INSN_P (p))
8558 {
8559 rtx *pnote;
8560 rtx set = single_set (p);
8561 /* If this is a set of a GIV based on the reversed biv, any
8562 REG_EQUAL notes should still be correct. */
8563 if (! set
8564 || GET_CODE (SET_DEST (set)) != REG
8565 || (size_t) REGNO (SET_DEST (set)) >= ivs->n_regs
8566 || REG_IV_TYPE (ivs, REGNO (SET_DEST (set))) != GENERAL_INDUCT
8567 || REG_IV_INFO (ivs, REGNO (SET_DEST (set)))->src_reg != bl->biv->src_reg)
8568 for (pnote = &REG_NOTES (p); *pnote;)
8569 {
8570 if (REG_NOTE_KIND (*pnote) == REG_EQUAL
8571 && reg_mentioned_p (regno_reg_rtx[bl->regno],
8572 XEXP (*pnote, 0)))
8573 *pnote = XEXP (*pnote, 1);
8574 else
8575 pnote = &XEXP (*pnote, 1);
8576 }
8577 }
8578
8579 /* Mark that this biv has been reversed. Each giv which depends
8580 on this biv, and which is also live past the end of the loop
8581 will have to be fixed up. */
8582
8583 bl->reversed = 1;
8584
8585 if (loop_dump_stream)
8586 {
8587 fprintf (loop_dump_stream, "Reversed loop");
8588 if (bl->nonneg)
8589 fprintf (loop_dump_stream, " and added reg_nonneg\n");
8590 else
8591 fprintf (loop_dump_stream, "\n");
8592 }
8593
8594 return 1;
8595 }
8596 }
8597 }
8598
8599 return 0;
8600 }
8601 \f
8602 /* Verify whether the biv BL appears to be eliminable,
8603 based on the insns in the loop that refer to it.
8604
8605 If ELIMINATE_P is nonzero, actually do the elimination.
8606
8607 THRESHOLD and INSN_COUNT are from loop_optimize and are used to
8608 determine whether invariant insns should be placed inside or at the
8609 start of the loop. */
8610
8611 static int
8612 maybe_eliminate_biv (const struct loop *loop, struct iv_class *bl,
8613 int eliminate_p, int threshold, int insn_count)
8614 {
8615 struct loop_ivs *ivs = LOOP_IVS (loop);
8616 rtx reg = bl->biv->dest_reg;
8617 rtx p;
8618
8619 /* Scan all insns in the loop, stopping if we find one that uses the
8620 biv in a way that we cannot eliminate. */
8621
8622 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
8623 {
8624 enum rtx_code code = GET_CODE (p);
8625 basic_block where_bb = 0;
8626 rtx where_insn = threshold >= insn_count ? 0 : p;
8627 rtx note;
8628
8629 /* If this is a libcall that sets a giv, skip ahead to its end. */
8630 if (GET_RTX_CLASS (code) == 'i')
8631 {
8632 note = find_reg_note (p, REG_LIBCALL, NULL_RTX);
8633
8634 if (note)
8635 {
8636 rtx last = XEXP (note, 0);
8637 rtx set = single_set (last);
8638
8639 if (set && GET_CODE (SET_DEST (set)) == REG)
8640 {
8641 unsigned int regno = REGNO (SET_DEST (set));
8642
8643 if (regno < ivs->n_regs
8644 && REG_IV_TYPE (ivs, regno) == GENERAL_INDUCT
8645 && REG_IV_INFO (ivs, regno)->src_reg == bl->biv->src_reg)
8646 p = last;
8647 }
8648 }
8649 }
8650
8651 /* Closely examine the insn if the biv is mentioned. */
8652 if ((code == INSN || code == JUMP_INSN || code == CALL_INSN)
8653 && reg_mentioned_p (reg, PATTERN (p))
8654 && ! maybe_eliminate_biv_1 (loop, PATTERN (p), p, bl,
8655 eliminate_p, where_bb, where_insn))
8656 {
8657 if (loop_dump_stream)
8658 fprintf (loop_dump_stream,
8659 "Cannot eliminate biv %d: biv used in insn %d.\n",
8660 bl->regno, INSN_UID (p));
8661 break;
8662 }
8663
8664 /* If we are eliminating, kill REG_EQUAL notes mentioning the biv. */
8665 if (eliminate_p
8666 && (note = find_reg_note (p, REG_EQUAL, NULL_RTX)) != NULL_RTX
8667 && reg_mentioned_p (reg, XEXP (note, 0)))
8668 remove_note (p, note);
8669 }
8670
8671 if (p == loop->end)
8672 {
8673 if (loop_dump_stream)
8674 fprintf (loop_dump_stream, "biv %d %s eliminated.\n",
8675 bl->regno, eliminate_p ? "was" : "can be");
8676 return 1;
8677 }
8678
8679 return 0;
8680 }
8681 \f
8682 /* INSN and REFERENCE are instructions in the same insn chain.
8683 Return nonzero if INSN is first. */
8684
8685 int
8686 loop_insn_first_p (rtx insn, rtx reference)
8687 {
8688 rtx p, q;
8689
8690 for (p = insn, q = reference;;)
8691 {
8692 /* Start with test for not first so that INSN == REFERENCE yields not
8693 first. */
8694 if (q == insn || ! p)
8695 return 0;
8696 if (p == reference || ! q)
8697 return 1;
8698
8699 /* Either of P or Q might be a NOTE. Notes have the same LUID as the
8700 previous insn, hence the <= comparison below does not work if
8701 P is a note. */
8702 if (INSN_UID (p) < max_uid_for_loop
8703 && INSN_UID (q) < max_uid_for_loop
8704 && GET_CODE (p) != NOTE)
8705 return INSN_LUID (p) <= INSN_LUID (q);
8706
8707 if (INSN_UID (p) >= max_uid_for_loop
8708 || GET_CODE (p) == NOTE)
8709 p = NEXT_INSN (p);
8710 if (INSN_UID (q) >= max_uid_for_loop)
8711 q = NEXT_INSN (q);
8712 }
8713 }
8714
8715 /* We are trying to eliminate BIV in INSN using GIV. Return nonzero if
8716 the offset that we have to take into account due to auto-increment /
8717 div derivation is zero. */
8718 static int
8719 biv_elimination_giv_has_0_offset (struct induction *biv,
8720 struct induction *giv, rtx insn)
8721 {
8722 /* If the giv V had the auto-inc address optimization applied
8723 to it, and INSN occurs between the giv insn and the biv
8724 insn, then we'd have to adjust the value used here.
8725 This is rare, so we don't bother to make this possible. */
8726 if (giv->auto_inc_opt
8727 && ((loop_insn_first_p (giv->insn, insn)
8728 && loop_insn_first_p (insn, biv->insn))
8729 || (loop_insn_first_p (biv->insn, insn)
8730 && loop_insn_first_p (insn, giv->insn))))
8731 return 0;
8732
8733 return 1;
8734 }
8735
8736 /* If BL appears in X (part of the pattern of INSN), see if we can
8737 eliminate its use. If so, return 1. If not, return 0.
8738
8739 If BIV does not appear in X, return 1.
8740
8741 If ELIMINATE_P is nonzero, actually do the elimination.
8742 WHERE_INSN/WHERE_BB indicate where extra insns should be added.
8743 Depending on how many items have been moved out of the loop, it
8744 will either be before INSN (when WHERE_INSN is nonzero) or at the
8745 start of the loop (when WHERE_INSN is zero). */
8746
8747 static int
8748 maybe_eliminate_biv_1 (const struct loop *loop, rtx x, rtx insn,
8749 struct iv_class *bl, int eliminate_p,
8750 basic_block where_bb, rtx where_insn)
8751 {
8752 enum rtx_code code = GET_CODE (x);
8753 rtx reg = bl->biv->dest_reg;
8754 enum machine_mode mode = GET_MODE (reg);
8755 struct induction *v;
8756 rtx arg, tem;
8757 #ifdef HAVE_cc0
8758 rtx new;
8759 #endif
8760 int arg_operand;
8761 const char *fmt;
8762 int i, j;
8763
8764 switch (code)
8765 {
8766 case REG:
8767 /* If we haven't already been able to do something with this BIV,
8768 we can't eliminate it. */
8769 if (x == reg)
8770 return 0;
8771 return 1;
8772
8773 case SET:
8774 /* If this sets the BIV, it is not a problem. */
8775 if (SET_DEST (x) == reg)
8776 return 1;
8777
8778 /* If this is an insn that defines a giv, it is also ok because
8779 it will go away when the giv is reduced. */
8780 for (v = bl->giv; v; v = v->next_iv)
8781 if (v->giv_type == DEST_REG && SET_DEST (x) == v->dest_reg)
8782 return 1;
8783
8784 #ifdef HAVE_cc0
8785 if (SET_DEST (x) == cc0_rtx && SET_SRC (x) == reg)
8786 {
8787 /* Can replace with any giv that was reduced and
8788 that has (MULT_VAL != 0) and (ADD_VAL == 0).
8789 Require a constant for MULT_VAL, so we know it's nonzero.
8790 ??? We disable this optimization to avoid potential
8791 overflows. */
8792
8793 for (v = bl->giv; v; v = v->next_iv)
8794 if (GET_CODE (v->mult_val) == CONST_INT && v->mult_val != const0_rtx
8795 && v->add_val == const0_rtx
8796 && ! v->ignore && ! v->maybe_dead && v->always_computable
8797 && v->mode == mode
8798 && 0)
8799 {
8800 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8801 continue;
8802
8803 if (! eliminate_p)
8804 return 1;
8805
8806 /* If the giv has the opposite direction of change,
8807 then reverse the comparison. */
8808 if (INTVAL (v->mult_val) < 0)
8809 new = gen_rtx_COMPARE (GET_MODE (v->new_reg),
8810 const0_rtx, v->new_reg);
8811 else
8812 new = v->new_reg;
8813
8814 /* We can probably test that giv's reduced reg. */
8815 if (validate_change (insn, &SET_SRC (x), new, 0))
8816 return 1;
8817 }
8818
8819 /* Look for a giv with (MULT_VAL != 0) and (ADD_VAL != 0);
8820 replace test insn with a compare insn (cmp REDUCED_GIV ADD_VAL).
8821 Require a constant for MULT_VAL, so we know it's nonzero.
8822 ??? Do this only if ADD_VAL is a pointer to avoid a potential
8823 overflow problem. */
8824
8825 for (v = bl->giv; v; v = v->next_iv)
8826 if (GET_CODE (v->mult_val) == CONST_INT
8827 && v->mult_val != const0_rtx
8828 && ! v->ignore && ! v->maybe_dead && v->always_computable
8829 && v->mode == mode
8830 && (GET_CODE (v->add_val) == SYMBOL_REF
8831 || GET_CODE (v->add_val) == LABEL_REF
8832 || GET_CODE (v->add_val) == CONST
8833 || (GET_CODE (v->add_val) == REG
8834 && REG_POINTER (v->add_val))))
8835 {
8836 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8837 continue;
8838
8839 if (! eliminate_p)
8840 return 1;
8841
8842 /* If the giv has the opposite direction of change,
8843 then reverse the comparison. */
8844 if (INTVAL (v->mult_val) < 0)
8845 new = gen_rtx_COMPARE (VOIDmode, copy_rtx (v->add_val),
8846 v->new_reg);
8847 else
8848 new = gen_rtx_COMPARE (VOIDmode, v->new_reg,
8849 copy_rtx (v->add_val));
8850
8851 /* Replace biv with the giv's reduced register. */
8852 update_reg_last_use (v->add_val, insn);
8853 if (validate_change (insn, &SET_SRC (PATTERN (insn)), new, 0))
8854 return 1;
8855
8856 /* Insn doesn't support that constant or invariant. Copy it
8857 into a register (it will be a loop invariant.) */
8858 tem = gen_reg_rtx (GET_MODE (v->new_reg));
8859
8860 loop_insn_emit_before (loop, 0, where_insn,
8861 gen_move_insn (tem,
8862 copy_rtx (v->add_val)));
8863
8864 /* Substitute the new register for its invariant value in
8865 the compare expression. */
8866 XEXP (new, (INTVAL (v->mult_val) < 0) ? 0 : 1) = tem;
8867 if (validate_change (insn, &SET_SRC (PATTERN (insn)), new, 0))
8868 return 1;
8869 }
8870 }
8871 #endif
8872 break;
8873
8874 case COMPARE:
8875 case EQ: case NE:
8876 case GT: case GE: case GTU: case GEU:
8877 case LT: case LE: case LTU: case LEU:
8878 /* See if either argument is the biv. */
8879 if (XEXP (x, 0) == reg)
8880 arg = XEXP (x, 1), arg_operand = 1;
8881 else if (XEXP (x, 1) == reg)
8882 arg = XEXP (x, 0), arg_operand = 0;
8883 else
8884 break;
8885
8886 if (CONSTANT_P (arg))
8887 {
8888 /* First try to replace with any giv that has constant positive
8889 mult_val and constant add_val. We might be able to support
8890 negative mult_val, but it seems complex to do it in general. */
8891
8892 for (v = bl->giv; v; v = v->next_iv)
8893 if (GET_CODE (v->mult_val) == CONST_INT
8894 && INTVAL (v->mult_val) > 0
8895 && (GET_CODE (v->add_val) == SYMBOL_REF
8896 || GET_CODE (v->add_val) == LABEL_REF
8897 || GET_CODE (v->add_val) == CONST
8898 || (GET_CODE (v->add_val) == REG
8899 && REG_POINTER (v->add_val)))
8900 && ! v->ignore && ! v->maybe_dead && v->always_computable
8901 && v->mode == mode)
8902 {
8903 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8904 continue;
8905
8906 /* Don't eliminate if the linear combination that makes up
8907 the giv overflows when it is applied to ARG. */
8908 if (GET_CODE (arg) == CONST_INT)
8909 {
8910 rtx add_val;
8911
8912 if (GET_CODE (v->add_val) == CONST_INT)
8913 add_val = v->add_val;
8914 else
8915 add_val = const0_rtx;
8916
8917 if (const_mult_add_overflow_p (arg, v->mult_val,
8918 add_val, mode, 1))
8919 continue;
8920 }
8921
8922 if (! eliminate_p)
8923 return 1;
8924
8925 /* Replace biv with the giv's reduced reg. */
8926 validate_change (insn, &XEXP (x, 1 - arg_operand), v->new_reg, 1);
8927
8928 /* If all constants are actually constant integers and
8929 the derived constant can be directly placed in the COMPARE,
8930 do so. */
8931 if (GET_CODE (arg) == CONST_INT
8932 && GET_CODE (v->add_val) == CONST_INT)
8933 {
8934 tem = expand_mult_add (arg, NULL_RTX, v->mult_val,
8935 v->add_val, mode, 1);
8936 }
8937 else
8938 {
8939 /* Otherwise, load it into a register. */
8940 tem = gen_reg_rtx (mode);
8941 loop_iv_add_mult_emit_before (loop, arg,
8942 v->mult_val, v->add_val,
8943 tem, where_bb, where_insn);
8944 }
8945
8946 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
8947
8948 if (apply_change_group ())
8949 return 1;
8950 }
8951
8952 /* Look for giv with positive constant mult_val and nonconst add_val.
8953 Insert insns to calculate new compare value.
8954 ??? Turn this off due to possible overflow. */
8955
8956 for (v = bl->giv; v; v = v->next_iv)
8957 if (GET_CODE (v->mult_val) == CONST_INT
8958 && INTVAL (v->mult_val) > 0
8959 && ! v->ignore && ! v->maybe_dead && v->always_computable
8960 && v->mode == mode
8961 && 0)
8962 {
8963 rtx tem;
8964
8965 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8966 continue;
8967
8968 if (! eliminate_p)
8969 return 1;
8970
8971 tem = gen_reg_rtx (mode);
8972
8973 /* Replace biv with giv's reduced register. */
8974 validate_change (insn, &XEXP (x, 1 - arg_operand),
8975 v->new_reg, 1);
8976
8977 /* Compute value to compare against. */
8978 loop_iv_add_mult_emit_before (loop, arg,
8979 v->mult_val, v->add_val,
8980 tem, where_bb, where_insn);
8981 /* Use it in this insn. */
8982 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
8983 if (apply_change_group ())
8984 return 1;
8985 }
8986 }
8987 else if (GET_CODE (arg) == REG || GET_CODE (arg) == MEM)
8988 {
8989 if (loop_invariant_p (loop, arg) == 1)
8990 {
8991 /* Look for giv with constant positive mult_val and nonconst
8992 add_val. Insert insns to compute new compare value.
8993 ??? Turn this off due to possible overflow. */
8994
8995 for (v = bl->giv; v; v = v->next_iv)
8996 if (GET_CODE (v->mult_val) == CONST_INT && INTVAL (v->mult_val) > 0
8997 && ! v->ignore && ! v->maybe_dead && v->always_computable
8998 && v->mode == mode
8999 && 0)
9000 {
9001 rtx tem;
9002
9003 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
9004 continue;
9005
9006 if (! eliminate_p)
9007 return 1;
9008
9009 tem = gen_reg_rtx (mode);
9010
9011 /* Replace biv with giv's reduced register. */
9012 validate_change (insn, &XEXP (x, 1 - arg_operand),
9013 v->new_reg, 1);
9014
9015 /* Compute value to compare against. */
9016 loop_iv_add_mult_emit_before (loop, arg,
9017 v->mult_val, v->add_val,
9018 tem, where_bb, where_insn);
9019 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
9020 if (apply_change_group ())
9021 return 1;
9022 }
9023 }
9024
9025 /* This code has problems. Basically, you can't know when
9026 seeing if we will eliminate BL, whether a particular giv
9027 of ARG will be reduced. If it isn't going to be reduced,
9028 we can't eliminate BL. We can try forcing it to be reduced,
9029 but that can generate poor code.
9030
9031 The problem is that the benefit of reducing TV, below should
9032 be increased if BL can actually be eliminated, but this means
9033 we might have to do a topological sort of the order in which
9034 we try to process biv. It doesn't seem worthwhile to do
9035 this sort of thing now. */
9036
9037 #if 0
9038 /* Otherwise the reg compared with had better be a biv. */
9039 if (GET_CODE (arg) != REG
9040 || REG_IV_TYPE (ivs, REGNO (arg)) != BASIC_INDUCT)
9041 return 0;
9042
9043 /* Look for a pair of givs, one for each biv,
9044 with identical coefficients. */
9045 for (v = bl->giv; v; v = v->next_iv)
9046 {
9047 struct induction *tv;
9048
9049 if (v->ignore || v->maybe_dead || v->mode != mode)
9050 continue;
9051
9052 for (tv = REG_IV_CLASS (ivs, REGNO (arg))->giv; tv;
9053 tv = tv->next_iv)
9054 if (! tv->ignore && ! tv->maybe_dead
9055 && rtx_equal_p (tv->mult_val, v->mult_val)
9056 && rtx_equal_p (tv->add_val, v->add_val)
9057 && tv->mode == mode)
9058 {
9059 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
9060 continue;
9061
9062 if (! eliminate_p)
9063 return 1;
9064
9065 /* Replace biv with its giv's reduced reg. */
9066 XEXP (x, 1 - arg_operand) = v->new_reg;
9067 /* Replace other operand with the other giv's
9068 reduced reg. */
9069 XEXP (x, arg_operand) = tv->new_reg;
9070 return 1;
9071 }
9072 }
9073 #endif
9074 }
9075
9076 /* If we get here, the biv can't be eliminated. */
9077 return 0;
9078
9079 case MEM:
9080 /* If this address is a DEST_ADDR giv, it doesn't matter if the
9081 biv is used in it, since it will be replaced. */
9082 for (v = bl->giv; v; v = v->next_iv)
9083 if (v->giv_type == DEST_ADDR && v->location == &XEXP (x, 0))
9084 return 1;
9085 break;
9086
9087 default:
9088 break;
9089 }
9090
9091 /* See if any subexpression fails elimination. */
9092 fmt = GET_RTX_FORMAT (code);
9093 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
9094 {
9095 switch (fmt[i])
9096 {
9097 case 'e':
9098 if (! maybe_eliminate_biv_1 (loop, XEXP (x, i), insn, bl,
9099 eliminate_p, where_bb, where_insn))
9100 return 0;
9101 break;
9102
9103 case 'E':
9104 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9105 if (! maybe_eliminate_biv_1 (loop, XVECEXP (x, i, j), insn, bl,
9106 eliminate_p, where_bb, where_insn))
9107 return 0;
9108 break;
9109 }
9110 }
9111
9112 return 1;
9113 }
9114 \f
9115 /* Return nonzero if the last use of REG
9116 is in an insn following INSN in the same basic block. */
9117
9118 static int
9119 last_use_this_basic_block (rtx reg, rtx insn)
9120 {
9121 rtx n;
9122 for (n = insn;
9123 n && GET_CODE (n) != CODE_LABEL && GET_CODE (n) != JUMP_INSN;
9124 n = NEXT_INSN (n))
9125 {
9126 if (REGNO_LAST_UID (REGNO (reg)) == INSN_UID (n))
9127 return 1;
9128 }
9129 return 0;
9130 }
9131 \f
9132 /* Called via `note_stores' to record the initial value of a biv. Here we
9133 just record the location of the set and process it later. */
9134
9135 static void
9136 record_initial (rtx dest, rtx set, void *data ATTRIBUTE_UNUSED)
9137 {
9138 struct loop_ivs *ivs = (struct loop_ivs *) data;
9139 struct iv_class *bl;
9140
9141 if (GET_CODE (dest) != REG
9142 || REGNO (dest) >= ivs->n_regs
9143 || REG_IV_TYPE (ivs, REGNO (dest)) != BASIC_INDUCT)
9144 return;
9145
9146 bl = REG_IV_CLASS (ivs, REGNO (dest));
9147
9148 /* If this is the first set found, record it. */
9149 if (bl->init_insn == 0)
9150 {
9151 bl->init_insn = note_insn;
9152 bl->init_set = set;
9153 }
9154 }
9155 \f
9156 /* If any of the registers in X are "old" and currently have a last use earlier
9157 than INSN, update them to have a last use of INSN. Their actual last use
9158 will be the previous insn but it will not have a valid uid_luid so we can't
9159 use it. X must be a source expression only. */
9160
9161 static void
9162 update_reg_last_use (rtx x, rtx insn)
9163 {
9164 /* Check for the case where INSN does not have a valid luid. In this case,
9165 there is no need to modify the regno_last_uid, as this can only happen
9166 when code is inserted after the loop_end to set a pseudo's final value,
9167 and hence this insn will never be the last use of x.
9168 ???? This comment is not correct. See for example loop_givs_reduce.
9169 This may insert an insn before another new insn. */
9170 if (GET_CODE (x) == REG && REGNO (x) < max_reg_before_loop
9171 && INSN_UID (insn) < max_uid_for_loop
9172 && REGNO_LAST_LUID (REGNO (x)) < INSN_LUID (insn))
9173 {
9174 REGNO_LAST_UID (REGNO (x)) = INSN_UID (insn);
9175 }
9176 else
9177 {
9178 int i, j;
9179 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
9180 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9181 {
9182 if (fmt[i] == 'e')
9183 update_reg_last_use (XEXP (x, i), insn);
9184 else if (fmt[i] == 'E')
9185 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9186 update_reg_last_use (XVECEXP (x, i, j), insn);
9187 }
9188 }
9189 }
9190 \f
9191 /* Given an insn INSN and condition COND, return the condition in a
9192 canonical form to simplify testing by callers. Specifically:
9193
9194 (1) The code will always be a comparison operation (EQ, NE, GT, etc.).
9195 (2) Both operands will be machine operands; (cc0) will have been replaced.
9196 (3) If an operand is a constant, it will be the second operand.
9197 (4) (LE x const) will be replaced with (LT x <const+1>) and similarly
9198 for GE, GEU, and LEU.
9199
9200 If the condition cannot be understood, or is an inequality floating-point
9201 comparison which needs to be reversed, 0 will be returned.
9202
9203 If REVERSE is nonzero, then reverse the condition prior to canonizing it.
9204
9205 If EARLIEST is nonzero, it is a pointer to a place where the earliest
9206 insn used in locating the condition was found. If a replacement test
9207 of the condition is desired, it should be placed in front of that
9208 insn and we will be sure that the inputs are still valid.
9209
9210 If WANT_REG is nonzero, we wish the condition to be relative to that
9211 register, if possible. Therefore, do not canonicalize the condition
9212 further. If ALLOW_CC_MODE is nonzero, allow the condition returned
9213 to be a compare to a CC mode register. */
9214
9215 rtx
9216 canonicalize_condition (rtx insn, rtx cond, int reverse, rtx *earliest,
9217 rtx want_reg, int allow_cc_mode)
9218 {
9219 enum rtx_code code;
9220 rtx prev = insn;
9221 rtx set;
9222 rtx tem;
9223 rtx op0, op1;
9224 int reverse_code = 0;
9225 enum machine_mode mode;
9226
9227 code = GET_CODE (cond);
9228 mode = GET_MODE (cond);
9229 op0 = XEXP (cond, 0);
9230 op1 = XEXP (cond, 1);
9231
9232 if (reverse)
9233 code = reversed_comparison_code (cond, insn);
9234 if (code == UNKNOWN)
9235 return 0;
9236
9237 if (earliest)
9238 *earliest = insn;
9239
9240 /* If we are comparing a register with zero, see if the register is set
9241 in the previous insn to a COMPARE or a comparison operation. Perform
9242 the same tests as a function of STORE_FLAG_VALUE as find_comparison_args
9243 in cse.c */
9244
9245 while (GET_RTX_CLASS (code) == '<'
9246 && op1 == CONST0_RTX (GET_MODE (op0))
9247 && op0 != want_reg)
9248 {
9249 /* Set nonzero when we find something of interest. */
9250 rtx x = 0;
9251
9252 #ifdef HAVE_cc0
9253 /* If comparison with cc0, import actual comparison from compare
9254 insn. */
9255 if (op0 == cc0_rtx)
9256 {
9257 if ((prev = prev_nonnote_insn (prev)) == 0
9258 || GET_CODE (prev) != INSN
9259 || (set = single_set (prev)) == 0
9260 || SET_DEST (set) != cc0_rtx)
9261 return 0;
9262
9263 op0 = SET_SRC (set);
9264 op1 = CONST0_RTX (GET_MODE (op0));
9265 if (earliest)
9266 *earliest = prev;
9267 }
9268 #endif
9269
9270 /* If this is a COMPARE, pick up the two things being compared. */
9271 if (GET_CODE (op0) == COMPARE)
9272 {
9273 op1 = XEXP (op0, 1);
9274 op0 = XEXP (op0, 0);
9275 continue;
9276 }
9277 else if (GET_CODE (op0) != REG)
9278 break;
9279
9280 /* Go back to the previous insn. Stop if it is not an INSN. We also
9281 stop if it isn't a single set or if it has a REG_INC note because
9282 we don't want to bother dealing with it. */
9283
9284 if ((prev = prev_nonnote_insn (prev)) == 0
9285 || GET_CODE (prev) != INSN
9286 || FIND_REG_INC_NOTE (prev, NULL_RTX))
9287 break;
9288
9289 set = set_of (op0, prev);
9290
9291 if (set
9292 && (GET_CODE (set) != SET
9293 || !rtx_equal_p (SET_DEST (set), op0)))
9294 break;
9295
9296 /* If this is setting OP0, get what it sets it to if it looks
9297 relevant. */
9298 if (set)
9299 {
9300 enum machine_mode inner_mode = GET_MODE (SET_DEST (set));
9301 #ifdef FLOAT_STORE_FLAG_VALUE
9302 REAL_VALUE_TYPE fsfv;
9303 #endif
9304
9305 /* ??? We may not combine comparisons done in a CCmode with
9306 comparisons not done in a CCmode. This is to aid targets
9307 like Alpha that have an IEEE compliant EQ instruction, and
9308 a non-IEEE compliant BEQ instruction. The use of CCmode is
9309 actually artificial, simply to prevent the combination, but
9310 should not affect other platforms.
9311
9312 However, we must allow VOIDmode comparisons to match either
9313 CCmode or non-CCmode comparison, because some ports have
9314 modeless comparisons inside branch patterns.
9315
9316 ??? This mode check should perhaps look more like the mode check
9317 in simplify_comparison in combine. */
9318
9319 if ((GET_CODE (SET_SRC (set)) == COMPARE
9320 || (((code == NE
9321 || (code == LT
9322 && GET_MODE_CLASS (inner_mode) == MODE_INT
9323 && (GET_MODE_BITSIZE (inner_mode)
9324 <= HOST_BITS_PER_WIDE_INT)
9325 && (STORE_FLAG_VALUE
9326 & ((HOST_WIDE_INT) 1
9327 << (GET_MODE_BITSIZE (inner_mode) - 1))))
9328 #ifdef FLOAT_STORE_FLAG_VALUE
9329 || (code == LT
9330 && GET_MODE_CLASS (inner_mode) == MODE_FLOAT
9331 && (fsfv = FLOAT_STORE_FLAG_VALUE (inner_mode),
9332 REAL_VALUE_NEGATIVE (fsfv)))
9333 #endif
9334 ))
9335 && GET_RTX_CLASS (GET_CODE (SET_SRC (set))) == '<'))
9336 && (((GET_MODE_CLASS (mode) == MODE_CC)
9337 == (GET_MODE_CLASS (inner_mode) == MODE_CC))
9338 || mode == VOIDmode || inner_mode == VOIDmode))
9339 x = SET_SRC (set);
9340 else if (((code == EQ
9341 || (code == GE
9342 && (GET_MODE_BITSIZE (inner_mode)
9343 <= HOST_BITS_PER_WIDE_INT)
9344 && GET_MODE_CLASS (inner_mode) == MODE_INT
9345 && (STORE_FLAG_VALUE
9346 & ((HOST_WIDE_INT) 1
9347 << (GET_MODE_BITSIZE (inner_mode) - 1))))
9348 #ifdef FLOAT_STORE_FLAG_VALUE
9349 || (code == GE
9350 && GET_MODE_CLASS (inner_mode) == MODE_FLOAT
9351 && (fsfv = FLOAT_STORE_FLAG_VALUE (inner_mode),
9352 REAL_VALUE_NEGATIVE (fsfv)))
9353 #endif
9354 ))
9355 && GET_RTX_CLASS (GET_CODE (SET_SRC (set))) == '<'
9356 && (((GET_MODE_CLASS (mode) == MODE_CC)
9357 == (GET_MODE_CLASS (inner_mode) == MODE_CC))
9358 || mode == VOIDmode || inner_mode == VOIDmode))
9359
9360 {
9361 reverse_code = 1;
9362 x = SET_SRC (set);
9363 }
9364 else
9365 break;
9366 }
9367
9368 else if (reg_set_p (op0, prev))
9369 /* If this sets OP0, but not directly, we have to give up. */
9370 break;
9371
9372 if (x)
9373 {
9374 if (GET_RTX_CLASS (GET_CODE (x)) == '<')
9375 code = GET_CODE (x);
9376 if (reverse_code)
9377 {
9378 code = reversed_comparison_code (x, prev);
9379 if (code == UNKNOWN)
9380 return 0;
9381 reverse_code = 0;
9382 }
9383
9384 op0 = XEXP (x, 0), op1 = XEXP (x, 1);
9385 if (earliest)
9386 *earliest = prev;
9387 }
9388 }
9389
9390 /* If constant is first, put it last. */
9391 if (CONSTANT_P (op0))
9392 code = swap_condition (code), tem = op0, op0 = op1, op1 = tem;
9393
9394 /* If OP0 is the result of a comparison, we weren't able to find what
9395 was really being compared, so fail. */
9396 if (!allow_cc_mode
9397 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
9398 return 0;
9399
9400 /* Canonicalize any ordered comparison with integers involving equality
9401 if we can do computations in the relevant mode and we do not
9402 overflow. */
9403
9404 if (GET_MODE_CLASS (GET_MODE (op0)) != MODE_CC
9405 && GET_CODE (op1) == CONST_INT
9406 && GET_MODE (op0) != VOIDmode
9407 && GET_MODE_BITSIZE (GET_MODE (op0)) <= HOST_BITS_PER_WIDE_INT)
9408 {
9409 HOST_WIDE_INT const_val = INTVAL (op1);
9410 unsigned HOST_WIDE_INT uconst_val = const_val;
9411 unsigned HOST_WIDE_INT max_val
9412 = (unsigned HOST_WIDE_INT) GET_MODE_MASK (GET_MODE (op0));
9413
9414 switch (code)
9415 {
9416 case LE:
9417 if ((unsigned HOST_WIDE_INT) const_val != max_val >> 1)
9418 code = LT, op1 = gen_int_mode (const_val + 1, GET_MODE (op0));
9419 break;
9420
9421 /* When cross-compiling, const_val might be sign-extended from
9422 BITS_PER_WORD to HOST_BITS_PER_WIDE_INT */
9423 case GE:
9424 if ((HOST_WIDE_INT) (const_val & max_val)
9425 != (((HOST_WIDE_INT) 1
9426 << (GET_MODE_BITSIZE (GET_MODE (op0)) - 1))))
9427 code = GT, op1 = gen_int_mode (const_val - 1, GET_MODE (op0));
9428 break;
9429
9430 case LEU:
9431 if (uconst_val < max_val)
9432 code = LTU, op1 = gen_int_mode (uconst_val + 1, GET_MODE (op0));
9433 break;
9434
9435 case GEU:
9436 if (uconst_val != 0)
9437 code = GTU, op1 = gen_int_mode (uconst_val - 1, GET_MODE (op0));
9438 break;
9439
9440 default:
9441 break;
9442 }
9443 }
9444
9445 /* Never return CC0; return zero instead. */
9446 if (CC0_P (op0))
9447 return 0;
9448
9449 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
9450 }
9451
9452 /* Given a jump insn JUMP, return the condition that will cause it to branch
9453 to its JUMP_LABEL. If the condition cannot be understood, or is an
9454 inequality floating-point comparison which needs to be reversed, 0 will
9455 be returned.
9456
9457 If EARLIEST is nonzero, it is a pointer to a place where the earliest
9458 insn used in locating the condition was found. If a replacement test
9459 of the condition is desired, it should be placed in front of that
9460 insn and we will be sure that the inputs are still valid.
9461
9462 If ALLOW_CC_MODE is nonzero, allow the condition returned to be a
9463 compare CC mode register. */
9464
9465 rtx
9466 get_condition (rtx jump, rtx *earliest, int allow_cc_mode)
9467 {
9468 rtx cond;
9469 int reverse;
9470 rtx set;
9471
9472 /* If this is not a standard conditional jump, we can't parse it. */
9473 if (GET_CODE (jump) != JUMP_INSN
9474 || ! any_condjump_p (jump))
9475 return 0;
9476 set = pc_set (jump);
9477
9478 cond = XEXP (SET_SRC (set), 0);
9479
9480 /* If this branches to JUMP_LABEL when the condition is false, reverse
9481 the condition. */
9482 reverse
9483 = GET_CODE (XEXP (SET_SRC (set), 2)) == LABEL_REF
9484 && XEXP (XEXP (SET_SRC (set), 2), 0) == JUMP_LABEL (jump);
9485
9486 return canonicalize_condition (jump, cond, reverse, earliest, NULL_RTX,
9487 allow_cc_mode);
9488 }
9489
9490 /* Similar to above routine, except that we also put an invariant last
9491 unless both operands are invariants. */
9492
9493 rtx
9494 get_condition_for_loop (const struct loop *loop, rtx x)
9495 {
9496 rtx comparison = get_condition (x, (rtx*) 0, false);
9497
9498 if (comparison == 0
9499 || ! loop_invariant_p (loop, XEXP (comparison, 0))
9500 || loop_invariant_p (loop, XEXP (comparison, 1)))
9501 return comparison;
9502
9503 return gen_rtx_fmt_ee (swap_condition (GET_CODE (comparison)), VOIDmode,
9504 XEXP (comparison, 1), XEXP (comparison, 0));
9505 }
9506
9507 /* Scan the function and determine whether it has indirect (computed) jumps.
9508
9509 This is taken mostly from flow.c; similar code exists elsewhere
9510 in the compiler. It may be useful to put this into rtlanal.c. */
9511 static int
9512 indirect_jump_in_function_p (rtx start)
9513 {
9514 rtx insn;
9515
9516 for (insn = start; insn; insn = NEXT_INSN (insn))
9517 if (computed_jump_p (insn))
9518 return 1;
9519
9520 return 0;
9521 }
9522
9523 /* Add MEM to the LOOP_MEMS array, if appropriate. See the
9524 documentation for LOOP_MEMS for the definition of `appropriate'.
9525 This function is called from prescan_loop via for_each_rtx. */
9526
9527 static int
9528 insert_loop_mem (rtx *mem, void *data ATTRIBUTE_UNUSED)
9529 {
9530 struct loop_info *loop_info = data;
9531 int i;
9532 rtx m = *mem;
9533
9534 if (m == NULL_RTX)
9535 return 0;
9536
9537 switch (GET_CODE (m))
9538 {
9539 case MEM:
9540 break;
9541
9542 case CLOBBER:
9543 /* We're not interested in MEMs that are only clobbered. */
9544 return -1;
9545
9546 case CONST_DOUBLE:
9547 /* We're not interested in the MEM associated with a
9548 CONST_DOUBLE, so there's no need to traverse into this. */
9549 return -1;
9550
9551 case EXPR_LIST:
9552 /* We're not interested in any MEMs that only appear in notes. */
9553 return -1;
9554
9555 default:
9556 /* This is not a MEM. */
9557 return 0;
9558 }
9559
9560 /* See if we've already seen this MEM. */
9561 for (i = 0; i < loop_info->mems_idx; ++i)
9562 if (rtx_equal_p (m, loop_info->mems[i].mem))
9563 {
9564 if (MEM_VOLATILE_P (m) && !MEM_VOLATILE_P (loop_info->mems[i].mem))
9565 loop_info->mems[i].mem = m;
9566 if (GET_MODE (m) != GET_MODE (loop_info->mems[i].mem))
9567 /* The modes of the two memory accesses are different. If
9568 this happens, something tricky is going on, and we just
9569 don't optimize accesses to this MEM. */
9570 loop_info->mems[i].optimize = 0;
9571
9572 return 0;
9573 }
9574
9575 /* Resize the array, if necessary. */
9576 if (loop_info->mems_idx == loop_info->mems_allocated)
9577 {
9578 if (loop_info->mems_allocated != 0)
9579 loop_info->mems_allocated *= 2;
9580 else
9581 loop_info->mems_allocated = 32;
9582
9583 loop_info->mems = xrealloc (loop_info->mems,
9584 loop_info->mems_allocated * sizeof (loop_mem_info));
9585 }
9586
9587 /* Actually insert the MEM. */
9588 loop_info->mems[loop_info->mems_idx].mem = m;
9589 /* We can't hoist this MEM out of the loop if it's a BLKmode MEM
9590 because we can't put it in a register. We still store it in the
9591 table, though, so that if we see the same address later, but in a
9592 non-BLK mode, we'll not think we can optimize it at that point. */
9593 loop_info->mems[loop_info->mems_idx].optimize = (GET_MODE (m) != BLKmode);
9594 loop_info->mems[loop_info->mems_idx].reg = NULL_RTX;
9595 ++loop_info->mems_idx;
9596
9597 return 0;
9598 }
9599
9600
9601 /* Allocate REGS->ARRAY or reallocate it if it is too small.
9602
9603 Increment REGS->ARRAY[I].SET_IN_LOOP at the index I of each
9604 register that is modified by an insn between FROM and TO. If the
9605 value of an element of REGS->array[I].SET_IN_LOOP becomes 127 or
9606 more, stop incrementing it, to avoid overflow.
9607
9608 Store in REGS->ARRAY[I].SINGLE_USAGE the single insn in which
9609 register I is used, if it is only used once. Otherwise, it is set
9610 to 0 (for no uses) or const0_rtx for more than one use. This
9611 parameter may be zero, in which case this processing is not done.
9612
9613 Set REGS->ARRAY[I].MAY_NOT_OPTIMIZE nonzero if we should not
9614 optimize register I. */
9615
9616 static void
9617 loop_regs_scan (const struct loop *loop, int extra_size)
9618 {
9619 struct loop_regs *regs = LOOP_REGS (loop);
9620 int old_nregs;
9621 /* last_set[n] is nonzero iff reg n has been set in the current
9622 basic block. In that case, it is the insn that last set reg n. */
9623 rtx *last_set;
9624 rtx insn;
9625 int i;
9626
9627 old_nregs = regs->num;
9628 regs->num = max_reg_num ();
9629
9630 /* Grow the regs array if not allocated or too small. */
9631 if (regs->num >= regs->size)
9632 {
9633 regs->size = regs->num + extra_size;
9634
9635 regs->array = xrealloc (regs->array, regs->size * sizeof (*regs->array));
9636
9637 /* Zero the new elements. */
9638 memset (regs->array + old_nregs, 0,
9639 (regs->size - old_nregs) * sizeof (*regs->array));
9640 }
9641
9642 /* Clear previously scanned fields but do not clear n_times_set. */
9643 for (i = 0; i < old_nregs; i++)
9644 {
9645 regs->array[i].set_in_loop = 0;
9646 regs->array[i].may_not_optimize = 0;
9647 regs->array[i].single_usage = NULL_RTX;
9648 }
9649
9650 last_set = xcalloc (regs->num, sizeof (rtx));
9651
9652 /* Scan the loop, recording register usage. */
9653 for (insn = loop->top ? loop->top : loop->start; insn != loop->end;
9654 insn = NEXT_INSN (insn))
9655 {
9656 if (INSN_P (insn))
9657 {
9658 /* Record registers that have exactly one use. */
9659 find_single_use_in_loop (regs, insn, PATTERN (insn));
9660
9661 /* Include uses in REG_EQUAL notes. */
9662 if (REG_NOTES (insn))
9663 find_single_use_in_loop (regs, insn, REG_NOTES (insn));
9664
9665 if (GET_CODE (PATTERN (insn)) == SET
9666 || GET_CODE (PATTERN (insn)) == CLOBBER)
9667 count_one_set (regs, insn, PATTERN (insn), last_set);
9668 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
9669 {
9670 int i;
9671 for (i = XVECLEN (PATTERN (insn), 0) - 1; i >= 0; i--)
9672 count_one_set (regs, insn, XVECEXP (PATTERN (insn), 0, i),
9673 last_set);
9674 }
9675 }
9676
9677 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9678 memset (last_set, 0, regs->num * sizeof (rtx));
9679
9680 /* Invalidate all registers used for function argument passing.
9681 We check rtx_varies_p for the same reason as below, to allow
9682 optimizing PIC calculations. */
9683 if (GET_CODE (insn) == CALL_INSN)
9684 {
9685 rtx link;
9686 for (link = CALL_INSN_FUNCTION_USAGE (insn);
9687 link;
9688 link = XEXP (link, 1))
9689 {
9690 rtx op, reg;
9691
9692 if (GET_CODE (op = XEXP (link, 0)) == USE
9693 && GET_CODE (reg = XEXP (op, 0)) == REG
9694 && rtx_varies_p (reg, 1))
9695 regs->array[REGNO (reg)].may_not_optimize = 1;
9696 }
9697 }
9698 }
9699
9700 /* Invalidate all hard registers clobbered by calls. With one exception:
9701 a call-clobbered PIC register is still function-invariant for our
9702 purposes, since we can hoist any PIC calculations out of the loop.
9703 Thus the call to rtx_varies_p. */
9704 if (LOOP_INFO (loop)->has_call)
9705 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
9706 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i)
9707 && rtx_varies_p (regno_reg_rtx[i], 1))
9708 {
9709 regs->array[i].may_not_optimize = 1;
9710 regs->array[i].set_in_loop = 1;
9711 }
9712
9713 #ifdef AVOID_CCMODE_COPIES
9714 /* Don't try to move insns which set CC registers if we should not
9715 create CCmode register copies. */
9716 for (i = regs->num - 1; i >= FIRST_PSEUDO_REGISTER; i--)
9717 if (GET_MODE_CLASS (GET_MODE (regno_reg_rtx[i])) == MODE_CC)
9718 regs->array[i].may_not_optimize = 1;
9719 #endif
9720
9721 /* Set regs->array[I].n_times_set for the new registers. */
9722 for (i = old_nregs; i < regs->num; i++)
9723 regs->array[i].n_times_set = regs->array[i].set_in_loop;
9724
9725 free (last_set);
9726 }
9727
9728 /* Returns the number of real INSNs in the LOOP. */
9729
9730 static int
9731 count_insns_in_loop (const struct loop *loop)
9732 {
9733 int count = 0;
9734 rtx insn;
9735
9736 for (insn = loop->top ? loop->top : loop->start; insn != loop->end;
9737 insn = NEXT_INSN (insn))
9738 if (INSN_P (insn))
9739 ++count;
9740
9741 return count;
9742 }
9743
9744 /* Move MEMs into registers for the duration of the loop. */
9745
9746 static void
9747 load_mems (const struct loop *loop)
9748 {
9749 struct loop_info *loop_info = LOOP_INFO (loop);
9750 struct loop_regs *regs = LOOP_REGS (loop);
9751 int maybe_never = 0;
9752 int i;
9753 rtx p, prev_ebb_head;
9754 rtx label = NULL_RTX;
9755 rtx end_label;
9756 /* Nonzero if the next instruction may never be executed. */
9757 int next_maybe_never = 0;
9758 unsigned int last_max_reg = max_reg_num ();
9759
9760 if (loop_info->mems_idx == 0)
9761 return;
9762
9763 /* We cannot use next_label here because it skips over normal insns. */
9764 end_label = next_nonnote_insn (loop->end);
9765 if (end_label && GET_CODE (end_label) != CODE_LABEL)
9766 end_label = NULL_RTX;
9767
9768 /* Check to see if it's possible that some instructions in the loop are
9769 never executed. Also check if there is a goto out of the loop other
9770 than right after the end of the loop. */
9771 for (p = next_insn_in_loop (loop, loop->scan_start);
9772 p != NULL_RTX;
9773 p = next_insn_in_loop (loop, p))
9774 {
9775 if (GET_CODE (p) == CODE_LABEL)
9776 maybe_never = 1;
9777 else if (GET_CODE (p) == JUMP_INSN
9778 /* If we enter the loop in the middle, and scan
9779 around to the beginning, don't set maybe_never
9780 for that. This must be an unconditional jump,
9781 otherwise the code at the top of the loop might
9782 never be executed. Unconditional jumps are
9783 followed a by barrier then loop end. */
9784 && ! (GET_CODE (p) == JUMP_INSN
9785 && JUMP_LABEL (p) == loop->top
9786 && NEXT_INSN (NEXT_INSN (p)) == loop->end
9787 && any_uncondjump_p (p)))
9788 {
9789 /* If this is a jump outside of the loop but not right
9790 after the end of the loop, we would have to emit new fixup
9791 sequences for each such label. */
9792 if (/* If we can't tell where control might go when this
9793 JUMP_INSN is executed, we must be conservative. */
9794 !JUMP_LABEL (p)
9795 || (JUMP_LABEL (p) != end_label
9796 && (INSN_UID (JUMP_LABEL (p)) >= max_uid_for_loop
9797 || INSN_LUID (JUMP_LABEL (p)) < INSN_LUID (loop->start)
9798 || INSN_LUID (JUMP_LABEL (p)) > INSN_LUID (loop->end))))
9799 return;
9800
9801 if (!any_condjump_p (p))
9802 /* Something complicated. */
9803 maybe_never = 1;
9804 else
9805 /* If there are any more instructions in the loop, they
9806 might not be reached. */
9807 next_maybe_never = 1;
9808 }
9809 else if (next_maybe_never)
9810 maybe_never = 1;
9811 }
9812
9813 /* Find start of the extended basic block that enters the loop. */
9814 for (p = loop->start;
9815 PREV_INSN (p) && GET_CODE (p) != CODE_LABEL;
9816 p = PREV_INSN (p))
9817 ;
9818 prev_ebb_head = p;
9819
9820 cselib_init ();
9821
9822 /* Build table of mems that get set to constant values before the
9823 loop. */
9824 for (; p != loop->start; p = NEXT_INSN (p))
9825 cselib_process_insn (p);
9826
9827 /* Actually move the MEMs. */
9828 for (i = 0; i < loop_info->mems_idx; ++i)
9829 {
9830 regset_head load_copies;
9831 regset_head store_copies;
9832 int written = 0;
9833 rtx reg;
9834 rtx mem = loop_info->mems[i].mem;
9835 rtx mem_list_entry;
9836
9837 if (MEM_VOLATILE_P (mem)
9838 || loop_invariant_p (loop, XEXP (mem, 0)) != 1)
9839 /* There's no telling whether or not MEM is modified. */
9840 loop_info->mems[i].optimize = 0;
9841
9842 /* Go through the MEMs written to in the loop to see if this
9843 one is aliased by one of them. */
9844 mem_list_entry = loop_info->store_mems;
9845 while (mem_list_entry)
9846 {
9847 if (rtx_equal_p (mem, XEXP (mem_list_entry, 0)))
9848 written = 1;
9849 else if (true_dependence (XEXP (mem_list_entry, 0), VOIDmode,
9850 mem, rtx_varies_p))
9851 {
9852 /* MEM is indeed aliased by this store. */
9853 loop_info->mems[i].optimize = 0;
9854 break;
9855 }
9856 mem_list_entry = XEXP (mem_list_entry, 1);
9857 }
9858
9859 if (flag_float_store && written
9860 && GET_MODE_CLASS (GET_MODE (mem)) == MODE_FLOAT)
9861 loop_info->mems[i].optimize = 0;
9862
9863 /* If this MEM is written to, we must be sure that there
9864 are no reads from another MEM that aliases this one. */
9865 if (loop_info->mems[i].optimize && written)
9866 {
9867 int j;
9868
9869 for (j = 0; j < loop_info->mems_idx; ++j)
9870 {
9871 if (j == i)
9872 continue;
9873 else if (true_dependence (mem,
9874 VOIDmode,
9875 loop_info->mems[j].mem,
9876 rtx_varies_p))
9877 {
9878 /* It's not safe to hoist loop_info->mems[i] out of
9879 the loop because writes to it might not be
9880 seen by reads from loop_info->mems[j]. */
9881 loop_info->mems[i].optimize = 0;
9882 break;
9883 }
9884 }
9885 }
9886
9887 if (maybe_never && may_trap_p (mem))
9888 /* We can't access the MEM outside the loop; it might
9889 cause a trap that wouldn't have happened otherwise. */
9890 loop_info->mems[i].optimize = 0;
9891
9892 if (!loop_info->mems[i].optimize)
9893 /* We thought we were going to lift this MEM out of the
9894 loop, but later discovered that we could not. */
9895 continue;
9896
9897 INIT_REG_SET (&load_copies);
9898 INIT_REG_SET (&store_copies);
9899
9900 /* Allocate a pseudo for this MEM. We set REG_USERVAR_P in
9901 order to keep scan_loop from moving stores to this MEM
9902 out of the loop just because this REG is neither a
9903 user-variable nor used in the loop test. */
9904 reg = gen_reg_rtx (GET_MODE (mem));
9905 REG_USERVAR_P (reg) = 1;
9906 loop_info->mems[i].reg = reg;
9907
9908 /* Now, replace all references to the MEM with the
9909 corresponding pseudos. */
9910 maybe_never = 0;
9911 for (p = next_insn_in_loop (loop, loop->scan_start);
9912 p != NULL_RTX;
9913 p = next_insn_in_loop (loop, p))
9914 {
9915 if (INSN_P (p))
9916 {
9917 rtx set;
9918
9919 set = single_set (p);
9920
9921 /* See if this copies the mem into a register that isn't
9922 modified afterwards. We'll try to do copy propagation
9923 a little further on. */
9924 if (set
9925 /* @@@ This test is _way_ too conservative. */
9926 && ! maybe_never
9927 && GET_CODE (SET_DEST (set)) == REG
9928 && REGNO (SET_DEST (set)) >= FIRST_PSEUDO_REGISTER
9929 && REGNO (SET_DEST (set)) < last_max_reg
9930 && regs->array[REGNO (SET_DEST (set))].n_times_set == 1
9931 && rtx_equal_p (SET_SRC (set), mem))
9932 SET_REGNO_REG_SET (&load_copies, REGNO (SET_DEST (set)));
9933
9934 /* See if this copies the mem from a register that isn't
9935 modified afterwards. We'll try to remove the
9936 redundant copy later on by doing a little register
9937 renaming and copy propagation. This will help
9938 to untangle things for the BIV detection code. */
9939 if (set
9940 && ! maybe_never
9941 && GET_CODE (SET_SRC (set)) == REG
9942 && REGNO (SET_SRC (set)) >= FIRST_PSEUDO_REGISTER
9943 && REGNO (SET_SRC (set)) < last_max_reg
9944 && regs->array[REGNO (SET_SRC (set))].n_times_set == 1
9945 && rtx_equal_p (SET_DEST (set), mem))
9946 SET_REGNO_REG_SET (&store_copies, REGNO (SET_SRC (set)));
9947
9948 /* If this is a call which uses / clobbers this memory
9949 location, we must not change the interface here. */
9950 if (GET_CODE (p) == CALL_INSN
9951 && reg_mentioned_p (loop_info->mems[i].mem,
9952 CALL_INSN_FUNCTION_USAGE (p)))
9953 {
9954 cancel_changes (0);
9955 loop_info->mems[i].optimize = 0;
9956 break;
9957 }
9958 else
9959 /* Replace the memory reference with the shadow register. */
9960 replace_loop_mems (p, loop_info->mems[i].mem,
9961 loop_info->mems[i].reg, written);
9962 }
9963
9964 if (GET_CODE (p) == CODE_LABEL
9965 || GET_CODE (p) == JUMP_INSN)
9966 maybe_never = 1;
9967 }
9968
9969 if (! loop_info->mems[i].optimize)
9970 ; /* We found we couldn't do the replacement, so do nothing. */
9971 else if (! apply_change_group ())
9972 /* We couldn't replace all occurrences of the MEM. */
9973 loop_info->mems[i].optimize = 0;
9974 else
9975 {
9976 /* Load the memory immediately before LOOP->START, which is
9977 the NOTE_LOOP_BEG. */
9978 cselib_val *e = cselib_lookup (mem, VOIDmode, 0);
9979 rtx set;
9980 rtx best = mem;
9981 int j;
9982 struct elt_loc_list *const_equiv = 0;
9983
9984 if (e)
9985 {
9986 struct elt_loc_list *equiv;
9987 struct elt_loc_list *best_equiv = 0;
9988 for (equiv = e->locs; equiv; equiv = equiv->next)
9989 {
9990 if (CONSTANT_P (equiv->loc))
9991 const_equiv = equiv;
9992 else if (GET_CODE (equiv->loc) == REG
9993 /* Extending hard register lifetimes causes crash
9994 on SRC targets. Doing so on non-SRC is
9995 probably also not good idea, since we most
9996 probably have pseudoregister equivalence as
9997 well. */
9998 && REGNO (equiv->loc) >= FIRST_PSEUDO_REGISTER)
9999 best_equiv = equiv;
10000 }
10001 /* Use the constant equivalence if that is cheap enough. */
10002 if (! best_equiv)
10003 best_equiv = const_equiv;
10004 else if (const_equiv
10005 && (rtx_cost (const_equiv->loc, SET)
10006 <= rtx_cost (best_equiv->loc, SET)))
10007 {
10008 best_equiv = const_equiv;
10009 const_equiv = 0;
10010 }
10011
10012 /* If best_equiv is nonzero, we know that MEM is set to a
10013 constant or register before the loop. We will use this
10014 knowledge to initialize the shadow register with that
10015 constant or reg rather than by loading from MEM. */
10016 if (best_equiv)
10017 best = copy_rtx (best_equiv->loc);
10018 }
10019
10020 set = gen_move_insn (reg, best);
10021 set = loop_insn_hoist (loop, set);
10022 if (REG_P (best))
10023 {
10024 for (p = prev_ebb_head; p != loop->start; p = NEXT_INSN (p))
10025 if (REGNO_LAST_UID (REGNO (best)) == INSN_UID (p))
10026 {
10027 REGNO_LAST_UID (REGNO (best)) = INSN_UID (set);
10028 break;
10029 }
10030 }
10031
10032 if (const_equiv)
10033 set_unique_reg_note (set, REG_EQUAL, copy_rtx (const_equiv->loc));
10034
10035 if (written)
10036 {
10037 if (label == NULL_RTX)
10038 {
10039 label = gen_label_rtx ();
10040 emit_label_after (label, loop->end);
10041 }
10042
10043 /* Store the memory immediately after END, which is
10044 the NOTE_LOOP_END. */
10045 set = gen_move_insn (copy_rtx (mem), reg);
10046 loop_insn_emit_after (loop, 0, label, set);
10047 }
10048
10049 if (loop_dump_stream)
10050 {
10051 fprintf (loop_dump_stream, "Hoisted regno %d %s from ",
10052 REGNO (reg), (written ? "r/w" : "r/o"));
10053 print_rtl (loop_dump_stream, mem);
10054 fputc ('\n', loop_dump_stream);
10055 }
10056
10057 /* Attempt a bit of copy propagation. This helps untangle the
10058 data flow, and enables {basic,general}_induction_var to find
10059 more bivs/givs. */
10060 EXECUTE_IF_SET_IN_REG_SET
10061 (&load_copies, FIRST_PSEUDO_REGISTER, j,
10062 {
10063 try_copy_prop (loop, reg, j);
10064 });
10065 CLEAR_REG_SET (&load_copies);
10066
10067 EXECUTE_IF_SET_IN_REG_SET
10068 (&store_copies, FIRST_PSEUDO_REGISTER, j,
10069 {
10070 try_swap_copy_prop (loop, reg, j);
10071 });
10072 CLEAR_REG_SET (&store_copies);
10073 }
10074 }
10075
10076 /* Now, we need to replace all references to the previous exit
10077 label with the new one. */
10078 if (label != NULL_RTX && end_label != NULL_RTX)
10079 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
10080 if (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p) == end_label)
10081 redirect_jump (p, label, false);
10082
10083 cselib_finish ();
10084 }
10085
10086 /* For communication between note_reg_stored and its caller. */
10087 struct note_reg_stored_arg
10088 {
10089 int set_seen;
10090 rtx reg;
10091 };
10092
10093 /* Called via note_stores, record in SET_SEEN whether X, which is written,
10094 is equal to ARG. */
10095 static void
10096 note_reg_stored (rtx x, rtx setter ATTRIBUTE_UNUSED, void *arg)
10097 {
10098 struct note_reg_stored_arg *t = (struct note_reg_stored_arg *) arg;
10099 if (t->reg == x)
10100 t->set_seen = 1;
10101 }
10102
10103 /* Try to replace every occurrence of pseudo REGNO with REPLACEMENT.
10104 There must be exactly one insn that sets this pseudo; it will be
10105 deleted if all replacements succeed and we can prove that the register
10106 is not used after the loop. */
10107
10108 static void
10109 try_copy_prop (const struct loop *loop, rtx replacement, unsigned int regno)
10110 {
10111 /* This is the reg that we are copying from. */
10112 rtx reg_rtx = regno_reg_rtx[regno];
10113 rtx init_insn = 0;
10114 rtx insn;
10115 /* These help keep track of whether we replaced all uses of the reg. */
10116 int replaced_last = 0;
10117 int store_is_first = 0;
10118
10119 for (insn = next_insn_in_loop (loop, loop->scan_start);
10120 insn != NULL_RTX;
10121 insn = next_insn_in_loop (loop, insn))
10122 {
10123 rtx set;
10124
10125 /* Only substitute within one extended basic block from the initializing
10126 insn. */
10127 if (GET_CODE (insn) == CODE_LABEL && init_insn)
10128 break;
10129
10130 if (! INSN_P (insn))
10131 continue;
10132
10133 /* Is this the initializing insn? */
10134 set = single_set (insn);
10135 if (set
10136 && GET_CODE (SET_DEST (set)) == REG
10137 && REGNO (SET_DEST (set)) == regno)
10138 {
10139 if (init_insn)
10140 abort ();
10141
10142 init_insn = insn;
10143 if (REGNO_FIRST_UID (regno) == INSN_UID (insn))
10144 store_is_first = 1;
10145 }
10146
10147 /* Only substitute after seeing the initializing insn. */
10148 if (init_insn && insn != init_insn)
10149 {
10150 struct note_reg_stored_arg arg;
10151
10152 replace_loop_regs (insn, reg_rtx, replacement);
10153 if (REGNO_LAST_UID (regno) == INSN_UID (insn))
10154 replaced_last = 1;
10155
10156 /* Stop replacing when REPLACEMENT is modified. */
10157 arg.reg = replacement;
10158 arg.set_seen = 0;
10159 note_stores (PATTERN (insn), note_reg_stored, &arg);
10160 if (arg.set_seen)
10161 {
10162 rtx note = find_reg_note (insn, REG_EQUAL, NULL);
10163
10164 /* It is possible that we've turned previously valid REG_EQUAL to
10165 invalid, as we change the REGNO to REPLACEMENT and unlike REGNO,
10166 REPLACEMENT is modified, we get different meaning. */
10167 if (note && reg_mentioned_p (replacement, XEXP (note, 0)))
10168 remove_note (insn, note);
10169 break;
10170 }
10171 }
10172 }
10173 if (! init_insn)
10174 abort ();
10175 if (apply_change_group ())
10176 {
10177 if (loop_dump_stream)
10178 fprintf (loop_dump_stream, " Replaced reg %d", regno);
10179 if (store_is_first && replaced_last)
10180 {
10181 rtx first;
10182 rtx retval_note;
10183
10184 /* Assume we're just deleting INIT_INSN. */
10185 first = init_insn;
10186 /* Look for REG_RETVAL note. If we're deleting the end of
10187 the libcall sequence, the whole sequence can go. */
10188 retval_note = find_reg_note (init_insn, REG_RETVAL, NULL_RTX);
10189 /* If we found a REG_RETVAL note, find the first instruction
10190 in the sequence. */
10191 if (retval_note)
10192 first = XEXP (retval_note, 0);
10193
10194 /* Delete the instructions. */
10195 loop_delete_insns (first, init_insn);
10196 }
10197 if (loop_dump_stream)
10198 fprintf (loop_dump_stream, ".\n");
10199 }
10200 }
10201
10202 /* Replace all the instructions from FIRST up to and including LAST
10203 with NOTE_INSN_DELETED notes. */
10204
10205 static void
10206 loop_delete_insns (rtx first, rtx last)
10207 {
10208 while (1)
10209 {
10210 if (loop_dump_stream)
10211 fprintf (loop_dump_stream, ", deleting init_insn (%d)",
10212 INSN_UID (first));
10213 delete_insn (first);
10214
10215 /* If this was the LAST instructions we're supposed to delete,
10216 we're done. */
10217 if (first == last)
10218 break;
10219
10220 first = NEXT_INSN (first);
10221 }
10222 }
10223
10224 /* Try to replace occurrences of pseudo REGNO with REPLACEMENT within
10225 loop LOOP if the order of the sets of these registers can be
10226 swapped. There must be exactly one insn within the loop that sets
10227 this pseudo followed immediately by a move insn that sets
10228 REPLACEMENT with REGNO. */
10229 static void
10230 try_swap_copy_prop (const struct loop *loop, rtx replacement,
10231 unsigned int regno)
10232 {
10233 rtx insn;
10234 rtx set = NULL_RTX;
10235 unsigned int new_regno;
10236
10237 new_regno = REGNO (replacement);
10238
10239 for (insn = next_insn_in_loop (loop, loop->scan_start);
10240 insn != NULL_RTX;
10241 insn = next_insn_in_loop (loop, insn))
10242 {
10243 /* Search for the insn that copies REGNO to NEW_REGNO? */
10244 if (INSN_P (insn)
10245 && (set = single_set (insn))
10246 && GET_CODE (SET_DEST (set)) == REG
10247 && REGNO (SET_DEST (set)) == new_regno
10248 && GET_CODE (SET_SRC (set)) == REG
10249 && REGNO (SET_SRC (set)) == regno)
10250 break;
10251 }
10252
10253 if (insn != NULL_RTX)
10254 {
10255 rtx prev_insn;
10256 rtx prev_set;
10257
10258 /* Some DEF-USE info would come in handy here to make this
10259 function more general. For now, just check the previous insn
10260 which is the most likely candidate for setting REGNO. */
10261
10262 prev_insn = PREV_INSN (insn);
10263
10264 if (INSN_P (insn)
10265 && (prev_set = single_set (prev_insn))
10266 && GET_CODE (SET_DEST (prev_set)) == REG
10267 && REGNO (SET_DEST (prev_set)) == regno)
10268 {
10269 /* We have:
10270 (set (reg regno) (expr))
10271 (set (reg new_regno) (reg regno))
10272
10273 so try converting this to:
10274 (set (reg new_regno) (expr))
10275 (set (reg regno) (reg new_regno))
10276
10277 The former construct is often generated when a global
10278 variable used for an induction variable is shadowed by a
10279 register (NEW_REGNO). The latter construct improves the
10280 chances of GIV replacement and BIV elimination. */
10281
10282 validate_change (prev_insn, &SET_DEST (prev_set),
10283 replacement, 1);
10284 validate_change (insn, &SET_DEST (set),
10285 SET_SRC (set), 1);
10286 validate_change (insn, &SET_SRC (set),
10287 replacement, 1);
10288
10289 if (apply_change_group ())
10290 {
10291 if (loop_dump_stream)
10292 fprintf (loop_dump_stream,
10293 " Swapped set of reg %d at %d with reg %d at %d.\n",
10294 regno, INSN_UID (insn),
10295 new_regno, INSN_UID (prev_insn));
10296
10297 /* Update first use of REGNO. */
10298 if (REGNO_FIRST_UID (regno) == INSN_UID (prev_insn))
10299 REGNO_FIRST_UID (regno) = INSN_UID (insn);
10300
10301 /* Now perform copy propagation to hopefully
10302 remove all uses of REGNO within the loop. */
10303 try_copy_prop (loop, replacement, regno);
10304 }
10305 }
10306 }
10307 }
10308
10309 /* Worker function for find_mem_in_note, called via for_each_rtx. */
10310
10311 static int
10312 find_mem_in_note_1 (rtx *x, void *data)
10313 {
10314 if (*x != NULL_RTX && GET_CODE (*x) == MEM)
10315 {
10316 rtx *res = (rtx *) data;
10317 *res = *x;
10318 return 1;
10319 }
10320 return 0;
10321 }
10322
10323 /* Returns the first MEM found in NOTE by depth-first search. */
10324
10325 static rtx
10326 find_mem_in_note (rtx note)
10327 {
10328 if (note && for_each_rtx (&note, find_mem_in_note_1, &note))
10329 return note;
10330 return NULL_RTX;
10331 }
10332
10333 /* Replace MEM with its associated pseudo register. This function is
10334 called from load_mems via for_each_rtx. DATA is actually a pointer
10335 to a structure describing the instruction currently being scanned
10336 and the MEM we are currently replacing. */
10337
10338 static int
10339 replace_loop_mem (rtx *mem, void *data)
10340 {
10341 loop_replace_args *args = (loop_replace_args *) data;
10342 rtx m = *mem;
10343
10344 if (m == NULL_RTX)
10345 return 0;
10346
10347 switch (GET_CODE (m))
10348 {
10349 case MEM:
10350 break;
10351
10352 case CONST_DOUBLE:
10353 /* We're not interested in the MEM associated with a
10354 CONST_DOUBLE, so there's no need to traverse into one. */
10355 return -1;
10356
10357 default:
10358 /* This is not a MEM. */
10359 return 0;
10360 }
10361
10362 if (!rtx_equal_p (args->match, m))
10363 /* This is not the MEM we are currently replacing. */
10364 return 0;
10365
10366 /* Actually replace the MEM. */
10367 validate_change (args->insn, mem, args->replacement, 1);
10368
10369 return 0;
10370 }
10371
10372 static void
10373 replace_loop_mems (rtx insn, rtx mem, rtx reg, int written)
10374 {
10375 loop_replace_args args;
10376
10377 args.insn = insn;
10378 args.match = mem;
10379 args.replacement = reg;
10380
10381 for_each_rtx (&insn, replace_loop_mem, &args);
10382
10383 /* If we hoist a mem write out of the loop, then REG_EQUAL
10384 notes referring to the mem are no longer valid. */
10385 if (written)
10386 {
10387 rtx note, sub;
10388 rtx *link;
10389
10390 for (link = &REG_NOTES (insn); (note = *link); link = &XEXP (note, 1))
10391 {
10392 if (REG_NOTE_KIND (note) == REG_EQUAL
10393 && (sub = find_mem_in_note (note))
10394 && true_dependence (mem, VOIDmode, sub, rtx_varies_p))
10395 {
10396 /* Remove the note. */
10397 validate_change (NULL_RTX, link, XEXP (note, 1), 1);
10398 break;
10399 }
10400 }
10401 }
10402 }
10403
10404 /* Replace one register with another. Called through for_each_rtx; PX points
10405 to the rtx being scanned. DATA is actually a pointer to
10406 a structure of arguments. */
10407
10408 static int
10409 replace_loop_reg (rtx *px, void *data)
10410 {
10411 rtx x = *px;
10412 loop_replace_args *args = (loop_replace_args *) data;
10413
10414 if (x == NULL_RTX)
10415 return 0;
10416
10417 if (x == args->match)
10418 validate_change (args->insn, px, args->replacement, 1);
10419
10420 return 0;
10421 }
10422
10423 static void
10424 replace_loop_regs (rtx insn, rtx reg, rtx replacement)
10425 {
10426 loop_replace_args args;
10427
10428 args.insn = insn;
10429 args.match = reg;
10430 args.replacement = replacement;
10431
10432 for_each_rtx (&insn, replace_loop_reg, &args);
10433 }
10434 \f
10435 /* Emit insn for PATTERN after WHERE_INSN in basic block WHERE_BB
10436 (ignored in the interim). */
10437
10438 static rtx
10439 loop_insn_emit_after (const struct loop *loop ATTRIBUTE_UNUSED,
10440 basic_block where_bb ATTRIBUTE_UNUSED, rtx where_insn,
10441 rtx pattern)
10442 {
10443 return emit_insn_after (pattern, where_insn);
10444 }
10445
10446
10447 /* If WHERE_INSN is nonzero emit insn for PATTERN before WHERE_INSN
10448 in basic block WHERE_BB (ignored in the interim) within the loop
10449 otherwise hoist PATTERN into the loop pre-header. */
10450
10451 rtx
10452 loop_insn_emit_before (const struct loop *loop,
10453 basic_block where_bb ATTRIBUTE_UNUSED,
10454 rtx where_insn, rtx pattern)
10455 {
10456 if (! where_insn)
10457 return loop_insn_hoist (loop, pattern);
10458 return emit_insn_before (pattern, where_insn);
10459 }
10460
10461
10462 /* Emit call insn for PATTERN before WHERE_INSN in basic block
10463 WHERE_BB (ignored in the interim) within the loop. */
10464
10465 static rtx
10466 loop_call_insn_emit_before (const struct loop *loop ATTRIBUTE_UNUSED,
10467 basic_block where_bb ATTRIBUTE_UNUSED,
10468 rtx where_insn, rtx pattern)
10469 {
10470 return emit_call_insn_before (pattern, where_insn);
10471 }
10472
10473
10474 /* Hoist insn for PATTERN into the loop pre-header. */
10475
10476 rtx
10477 loop_insn_hoist (const struct loop *loop, rtx pattern)
10478 {
10479 return loop_insn_emit_before (loop, 0, loop->start, pattern);
10480 }
10481
10482
10483 /* Hoist call insn for PATTERN into the loop pre-header. */
10484
10485 static rtx
10486 loop_call_insn_hoist (const struct loop *loop, rtx pattern)
10487 {
10488 return loop_call_insn_emit_before (loop, 0, loop->start, pattern);
10489 }
10490
10491
10492 /* Sink insn for PATTERN after the loop end. */
10493
10494 rtx
10495 loop_insn_sink (const struct loop *loop, rtx pattern)
10496 {
10497 return loop_insn_emit_before (loop, 0, loop->sink, pattern);
10498 }
10499
10500 /* bl->final_value can be either general_operand or PLUS of general_operand
10501 and constant. Emit sequence of instructions to load it into REG. */
10502 static rtx
10503 gen_load_of_final_value (rtx reg, rtx final_value)
10504 {
10505 rtx seq;
10506 start_sequence ();
10507 final_value = force_operand (final_value, reg);
10508 if (final_value != reg)
10509 emit_move_insn (reg, final_value);
10510 seq = get_insns ();
10511 end_sequence ();
10512 return seq;
10513 }
10514
10515 /* If the loop has multiple exits, emit insn for PATTERN before the
10516 loop to ensure that it will always be executed no matter how the
10517 loop exits. Otherwise, emit the insn for PATTERN after the loop,
10518 since this is slightly more efficient. */
10519
10520 static rtx
10521 loop_insn_sink_or_swim (const struct loop *loop, rtx pattern)
10522 {
10523 if (loop->exit_count)
10524 return loop_insn_hoist (loop, pattern);
10525 else
10526 return loop_insn_sink (loop, pattern);
10527 }
10528 \f
10529 static void
10530 loop_ivs_dump (const struct loop *loop, FILE *file, int verbose)
10531 {
10532 struct iv_class *bl;
10533 int iv_num = 0;
10534
10535 if (! loop || ! file)
10536 return;
10537
10538 for (bl = LOOP_IVS (loop)->list; bl; bl = bl->next)
10539 iv_num++;
10540
10541 fprintf (file, "Loop %d: %d IV classes\n", loop->num, iv_num);
10542
10543 for (bl = LOOP_IVS (loop)->list; bl; bl = bl->next)
10544 {
10545 loop_iv_class_dump (bl, file, verbose);
10546 fputc ('\n', file);
10547 }
10548 }
10549
10550
10551 static void
10552 loop_iv_class_dump (const struct iv_class *bl, FILE *file,
10553 int verbose ATTRIBUTE_UNUSED)
10554 {
10555 struct induction *v;
10556 rtx incr;
10557 int i;
10558
10559 if (! bl || ! file)
10560 return;
10561
10562 fprintf (file, "IV class for reg %d, benefit %d\n",
10563 bl->regno, bl->total_benefit);
10564
10565 fprintf (file, " Init insn %d", INSN_UID (bl->init_insn));
10566 if (bl->initial_value)
10567 {
10568 fprintf (file, ", init val: ");
10569 print_simple_rtl (file, bl->initial_value);
10570 }
10571 if (bl->initial_test)
10572 {
10573 fprintf (file, ", init test: ");
10574 print_simple_rtl (file, bl->initial_test);
10575 }
10576 fputc ('\n', file);
10577
10578 if (bl->final_value)
10579 {
10580 fprintf (file, " Final val: ");
10581 print_simple_rtl (file, bl->final_value);
10582 fputc ('\n', file);
10583 }
10584
10585 if ((incr = biv_total_increment (bl)))
10586 {
10587 fprintf (file, " Total increment: ");
10588 print_simple_rtl (file, incr);
10589 fputc ('\n', file);
10590 }
10591
10592 /* List the increments. */
10593 for (i = 0, v = bl->biv; v; v = v->next_iv, i++)
10594 {
10595 fprintf (file, " Inc%d: insn %d, incr: ", i, INSN_UID (v->insn));
10596 print_simple_rtl (file, v->add_val);
10597 fputc ('\n', file);
10598 }
10599
10600 /* List the givs. */
10601 for (i = 0, v = bl->giv; v; v = v->next_iv, i++)
10602 {
10603 fprintf (file, " Giv%d: insn %d, benefit %d, ",
10604 i, INSN_UID (v->insn), v->benefit);
10605 if (v->giv_type == DEST_ADDR)
10606 print_simple_rtl (file, v->mem);
10607 else
10608 print_simple_rtl (file, single_set (v->insn));
10609 fputc ('\n', file);
10610 }
10611 }
10612
10613
10614 static void
10615 loop_biv_dump (const struct induction *v, FILE *file, int verbose)
10616 {
10617 if (! v || ! file)
10618 return;
10619
10620 fprintf (file,
10621 "Biv %d: insn %d",
10622 REGNO (v->dest_reg), INSN_UID (v->insn));
10623 fprintf (file, " const ");
10624 print_simple_rtl (file, v->add_val);
10625
10626 if (verbose && v->final_value)
10627 {
10628 fputc ('\n', file);
10629 fprintf (file, " final ");
10630 print_simple_rtl (file, v->final_value);
10631 }
10632
10633 fputc ('\n', file);
10634 }
10635
10636
10637 static void
10638 loop_giv_dump (const struct induction *v, FILE *file, int verbose)
10639 {
10640 if (! v || ! file)
10641 return;
10642
10643 if (v->giv_type == DEST_REG)
10644 fprintf (file, "Giv %d: insn %d",
10645 REGNO (v->dest_reg), INSN_UID (v->insn));
10646 else
10647 fprintf (file, "Dest address: insn %d",
10648 INSN_UID (v->insn));
10649
10650 fprintf (file, " src reg %d benefit %d",
10651 REGNO (v->src_reg), v->benefit);
10652 fprintf (file, " lifetime %d",
10653 v->lifetime);
10654
10655 if (v->replaceable)
10656 fprintf (file, " replaceable");
10657
10658 if (v->no_const_addval)
10659 fprintf (file, " ncav");
10660
10661 if (v->ext_dependent)
10662 {
10663 switch (GET_CODE (v->ext_dependent))
10664 {
10665 case SIGN_EXTEND:
10666 fprintf (file, " ext se");
10667 break;
10668 case ZERO_EXTEND:
10669 fprintf (file, " ext ze");
10670 break;
10671 case TRUNCATE:
10672 fprintf (file, " ext tr");
10673 break;
10674 default:
10675 abort ();
10676 }
10677 }
10678
10679 fputc ('\n', file);
10680 fprintf (file, " mult ");
10681 print_simple_rtl (file, v->mult_val);
10682
10683 fputc ('\n', file);
10684 fprintf (file, " add ");
10685 print_simple_rtl (file, v->add_val);
10686
10687 if (verbose && v->final_value)
10688 {
10689 fputc ('\n', file);
10690 fprintf (file, " final ");
10691 print_simple_rtl (file, v->final_value);
10692 }
10693
10694 fputc ('\n', file);
10695 }
10696
10697
10698 void
10699 debug_ivs (const struct loop *loop)
10700 {
10701 loop_ivs_dump (loop, stderr, 1);
10702 }
10703
10704
10705 void
10706 debug_iv_class (const struct iv_class *bl)
10707 {
10708 loop_iv_class_dump (bl, stderr, 1);
10709 }
10710
10711
10712 void
10713 debug_biv (const struct induction *v)
10714 {
10715 loop_biv_dump (v, stderr, 1);
10716 }
10717
10718
10719 void
10720 debug_giv (const struct induction *v)
10721 {
10722 loop_giv_dump (v, stderr, 1);
10723 }
10724
10725
10726 #define LOOP_BLOCK_NUM_1(INSN) \
10727 ((INSN) ? (BLOCK_FOR_INSN (INSN) ? BLOCK_NUM (INSN) : - 1) : -1)
10728
10729 /* The notes do not have an assigned block, so look at the next insn. */
10730 #define LOOP_BLOCK_NUM(INSN) \
10731 ((INSN) ? (GET_CODE (INSN) == NOTE \
10732 ? LOOP_BLOCK_NUM_1 (next_nonnote_insn (INSN)) \
10733 : LOOP_BLOCK_NUM_1 (INSN)) \
10734 : -1)
10735
10736 #define LOOP_INSN_UID(INSN) ((INSN) ? INSN_UID (INSN) : -1)
10737
10738 static void
10739 loop_dump_aux (const struct loop *loop, FILE *file,
10740 int verbose ATTRIBUTE_UNUSED)
10741 {
10742 rtx label;
10743
10744 if (! loop || ! file)
10745 return;
10746
10747 /* Print diagnostics to compare our concept of a loop with
10748 what the loop notes say. */
10749 if (! PREV_INSN (BB_HEAD (loop->first))
10750 || GET_CODE (PREV_INSN (BB_HEAD (loop->first))) != NOTE
10751 || NOTE_LINE_NUMBER (PREV_INSN (BB_HEAD (loop->first)))
10752 != NOTE_INSN_LOOP_BEG)
10753 fprintf (file, ";; No NOTE_INSN_LOOP_BEG at %d\n",
10754 INSN_UID (PREV_INSN (BB_HEAD (loop->first))));
10755 if (! NEXT_INSN (BB_END (loop->last))
10756 || GET_CODE (NEXT_INSN (BB_END (loop->last))) != NOTE
10757 || NOTE_LINE_NUMBER (NEXT_INSN (BB_END (loop->last)))
10758 != NOTE_INSN_LOOP_END)
10759 fprintf (file, ";; No NOTE_INSN_LOOP_END at %d\n",
10760 INSN_UID (NEXT_INSN (BB_END (loop->last))));
10761
10762 if (loop->start)
10763 {
10764 fprintf (file,
10765 ";; start %d (%d), cont dom %d (%d), cont %d (%d), vtop %d (%d), end %d (%d)\n",
10766 LOOP_BLOCK_NUM (loop->start),
10767 LOOP_INSN_UID (loop->start),
10768 LOOP_BLOCK_NUM (loop->cont),
10769 LOOP_INSN_UID (loop->cont),
10770 LOOP_BLOCK_NUM (loop->cont),
10771 LOOP_INSN_UID (loop->cont),
10772 LOOP_BLOCK_NUM (loop->vtop),
10773 LOOP_INSN_UID (loop->vtop),
10774 LOOP_BLOCK_NUM (loop->end),
10775 LOOP_INSN_UID (loop->end));
10776 fprintf (file, ";; top %d (%d), scan start %d (%d)\n",
10777 LOOP_BLOCK_NUM (loop->top),
10778 LOOP_INSN_UID (loop->top),
10779 LOOP_BLOCK_NUM (loop->scan_start),
10780 LOOP_INSN_UID (loop->scan_start));
10781 fprintf (file, ";; exit_count %d", loop->exit_count);
10782 if (loop->exit_count)
10783 {
10784 fputs (", labels:", file);
10785 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
10786 {
10787 fprintf (file, " %d ",
10788 LOOP_INSN_UID (XEXP (label, 0)));
10789 }
10790 }
10791 fputs ("\n", file);
10792
10793 /* This can happen when a marked loop appears as two nested loops,
10794 say from while (a || b) {}. The inner loop won't match
10795 the loop markers but the outer one will. */
10796 if (LOOP_BLOCK_NUM (loop->cont) != loop->latch->index)
10797 fprintf (file, ";; NOTE_INSN_LOOP_CONT not in loop latch\n");
10798 }
10799 }
10800
10801 /* Call this function from the debugger to dump LOOP. */
10802
10803 void
10804 debug_loop (const struct loop *loop)
10805 {
10806 flow_loop_dump (loop, stderr, loop_dump_aux, 1);
10807 }
10808
10809 /* Call this function from the debugger to dump LOOPS. */
10810
10811 void
10812 debug_loops (const struct loops *loops)
10813 {
10814 flow_loops_dump (loops, stderr, loop_dump_aux, 1);
10815 }