tree-ssa-pre.c (bitmap_set_t): New.
[gcc.git] / gcc / loop.c
1 /* Perform various loop optimizations, including strength reduction.
2 Copyright (C) 1987, 1988, 1989, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
3 1998, 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22 /* This is the loop optimization pass of the compiler.
23 It finds invariant computations within loops and moves them
24 to the beginning of the loop. Then it identifies basic and
25 general induction variables.
26
27 Basic induction variables (BIVs) are a pseudo registers which are set within
28 a loop only by incrementing or decrementing its value. General induction
29 variables (GIVs) are pseudo registers with a value which is a linear function
30 of a basic induction variable. BIVs are recognized by `basic_induction_var';
31 GIVs by `general_induction_var'.
32
33 Once induction variables are identified, strength reduction is applied to the
34 general induction variables, and induction variable elimination is applied to
35 the basic induction variables.
36
37 It also finds cases where
38 a register is set within the loop by zero-extending a narrower value
39 and changes these to zero the entire register once before the loop
40 and merely copy the low part within the loop.
41
42 Most of the complexity is in heuristics to decide when it is worth
43 while to do these things. */
44
45 #include "config.h"
46 #include "system.h"
47 #include "coretypes.h"
48 #include "tm.h"
49 #include "rtl.h"
50 #include "tm_p.h"
51 #include "function.h"
52 #include "expr.h"
53 #include "hard-reg-set.h"
54 #include "basic-block.h"
55 #include "insn-config.h"
56 #include "regs.h"
57 #include "recog.h"
58 #include "flags.h"
59 #include "real.h"
60 #include "loop.h"
61 #include "cselib.h"
62 #include "except.h"
63 #include "toplev.h"
64 #include "predict.h"
65 #include "insn-flags.h"
66 #include "optabs.h"
67 #include "cfgloop.h"
68 #include "ggc.h"
69
70 /* Not really meaningful values, but at least something. */
71 #ifndef SIMULTANEOUS_PREFETCHES
72 #define SIMULTANEOUS_PREFETCHES 3
73 #endif
74 #ifndef PREFETCH_BLOCK
75 #define PREFETCH_BLOCK 32
76 #endif
77 #ifndef HAVE_prefetch
78 #define HAVE_prefetch 0
79 #define CODE_FOR_prefetch 0
80 #define gen_prefetch(a,b,c) (abort(), NULL_RTX)
81 #endif
82
83 /* Give up the prefetch optimizations once we exceed a given threshold.
84 It is unlikely that we would be able to optimize something in a loop
85 with so many detected prefetches. */
86 #define MAX_PREFETCHES 100
87 /* The number of prefetch blocks that are beneficial to fetch at once before
88 a loop with a known (and low) iteration count. */
89 #define PREFETCH_BLOCKS_BEFORE_LOOP_MAX 6
90 /* For very tiny loops it is not worthwhile to prefetch even before the loop,
91 since it is likely that the data are already in the cache. */
92 #define PREFETCH_BLOCKS_BEFORE_LOOP_MIN 2
93
94 /* Parameterize some prefetch heuristics so they can be turned on and off
95 easily for performance testing on new architectures. These can be
96 defined in target-dependent files. */
97
98 /* Prefetch is worthwhile only when loads/stores are dense. */
99 #ifndef PREFETCH_ONLY_DENSE_MEM
100 #define PREFETCH_ONLY_DENSE_MEM 1
101 #endif
102
103 /* Define what we mean by "dense" loads and stores; This value divided by 256
104 is the minimum percentage of memory references that worth prefetching. */
105 #ifndef PREFETCH_DENSE_MEM
106 #define PREFETCH_DENSE_MEM 220
107 #endif
108
109 /* Do not prefetch for a loop whose iteration count is known to be low. */
110 #ifndef PREFETCH_NO_LOW_LOOPCNT
111 #define PREFETCH_NO_LOW_LOOPCNT 1
112 #endif
113
114 /* Define what we mean by a "low" iteration count. */
115 #ifndef PREFETCH_LOW_LOOPCNT
116 #define PREFETCH_LOW_LOOPCNT 32
117 #endif
118
119 /* Do not prefetch for a loop that contains a function call; such a loop is
120 probably not an internal loop. */
121 #ifndef PREFETCH_NO_CALL
122 #define PREFETCH_NO_CALL 1
123 #endif
124
125 /* Do not prefetch accesses with an extreme stride. */
126 #ifndef PREFETCH_NO_EXTREME_STRIDE
127 #define PREFETCH_NO_EXTREME_STRIDE 1
128 #endif
129
130 /* Define what we mean by an "extreme" stride. */
131 #ifndef PREFETCH_EXTREME_STRIDE
132 #define PREFETCH_EXTREME_STRIDE 4096
133 #endif
134
135 /* Define a limit to how far apart indices can be and still be merged
136 into a single prefetch. */
137 #ifndef PREFETCH_EXTREME_DIFFERENCE
138 #define PREFETCH_EXTREME_DIFFERENCE 4096
139 #endif
140
141 /* Issue prefetch instructions before the loop to fetch data to be used
142 in the first few loop iterations. */
143 #ifndef PREFETCH_BEFORE_LOOP
144 #define PREFETCH_BEFORE_LOOP 1
145 #endif
146
147 /* Do not handle reversed order prefetches (negative stride). */
148 #ifndef PREFETCH_NO_REVERSE_ORDER
149 #define PREFETCH_NO_REVERSE_ORDER 1
150 #endif
151
152 /* Prefetch even if the GIV is in conditional code. */
153 #ifndef PREFETCH_CONDITIONAL
154 #define PREFETCH_CONDITIONAL 1
155 #endif
156
157 #define LOOP_REG_LIFETIME(LOOP, REGNO) \
158 ((REGNO_LAST_LUID (REGNO) - REGNO_FIRST_LUID (REGNO)))
159
160 #define LOOP_REG_GLOBAL_P(LOOP, REGNO) \
161 ((REGNO_LAST_LUID (REGNO) > INSN_LUID ((LOOP)->end) \
162 || REGNO_FIRST_LUID (REGNO) < INSN_LUID ((LOOP)->start)))
163
164 #define LOOP_REGNO_NREGS(REGNO, SET_DEST) \
165 ((REGNO) < FIRST_PSEUDO_REGISTER \
166 ? (int) hard_regno_nregs[(REGNO)][GET_MODE (SET_DEST)] : 1)
167
168
169 /* Vector mapping INSN_UIDs to luids.
170 The luids are like uids but increase monotonically always.
171 We use them to see whether a jump comes from outside a given loop. */
172
173 int *uid_luid;
174
175 /* Indexed by INSN_UID, contains the ordinal giving the (innermost) loop
176 number the insn is contained in. */
177
178 struct loop **uid_loop;
179
180 /* 1 + largest uid of any insn. */
181
182 int max_uid_for_loop;
183
184 /* Number of loops detected in current function. Used as index to the
185 next few tables. */
186
187 static int max_loop_num;
188
189 /* Bound on pseudo register number before loop optimization.
190 A pseudo has valid regscan info if its number is < max_reg_before_loop. */
191 unsigned int max_reg_before_loop;
192
193 /* The value to pass to the next call of reg_scan_update. */
194 static int loop_max_reg;
195 \f
196 /* During the analysis of a loop, a chain of `struct movable's
197 is made to record all the movable insns found.
198 Then the entire chain can be scanned to decide which to move. */
199
200 struct movable
201 {
202 rtx insn; /* A movable insn */
203 rtx set_src; /* The expression this reg is set from. */
204 rtx set_dest; /* The destination of this SET. */
205 rtx dependencies; /* When INSN is libcall, this is an EXPR_LIST
206 of any registers used within the LIBCALL. */
207 int consec; /* Number of consecutive following insns
208 that must be moved with this one. */
209 unsigned int regno; /* The register it sets */
210 short lifetime; /* lifetime of that register;
211 may be adjusted when matching movables
212 that load the same value are found. */
213 short savings; /* Number of insns we can move for this reg,
214 including other movables that force this
215 or match this one. */
216 ENUM_BITFIELD(machine_mode) savemode : 8; /* Nonzero means it is a mode for
217 a low part that we should avoid changing when
218 clearing the rest of the reg. */
219 unsigned int cond : 1; /* 1 if only conditionally movable */
220 unsigned int force : 1; /* 1 means MUST move this insn */
221 unsigned int global : 1; /* 1 means reg is live outside this loop */
222 /* If PARTIAL is 1, GLOBAL means something different:
223 that the reg is live outside the range from where it is set
224 to the following label. */
225 unsigned int done : 1; /* 1 inhibits further processing of this */
226
227 unsigned int partial : 1; /* 1 means this reg is used for zero-extending.
228 In particular, moving it does not make it
229 invariant. */
230 unsigned int move_insn : 1; /* 1 means that we call emit_move_insn to
231 load SRC, rather than copying INSN. */
232 unsigned int move_insn_first:1;/* Same as above, if this is necessary for the
233 first insn of a consecutive sets group. */
234 unsigned int is_equiv : 1; /* 1 means a REG_EQUIV is present on INSN. */
235 unsigned int insert_temp : 1; /* 1 means we copy to a new pseudo and replace
236 the original insn with a copy from that
237 pseudo, rather than deleting it. */
238 struct movable *match; /* First entry for same value */
239 struct movable *forces; /* An insn that must be moved if this is */
240 struct movable *next;
241 };
242
243
244 FILE *loop_dump_stream;
245
246 /* Forward declarations. */
247
248 static void invalidate_loops_containing_label (rtx);
249 static void find_and_verify_loops (rtx, struct loops *);
250 static void mark_loop_jump (rtx, struct loop *);
251 static void prescan_loop (struct loop *);
252 static int reg_in_basic_block_p (rtx, rtx);
253 static int consec_sets_invariant_p (const struct loop *, rtx, int, rtx);
254 static int labels_in_range_p (rtx, int);
255 static void count_one_set (struct loop_regs *, rtx, rtx, rtx *);
256 static void note_addr_stored (rtx, rtx, void *);
257 static void note_set_pseudo_multiple_uses (rtx, rtx, void *);
258 static int loop_reg_used_before_p (const struct loop *, rtx, rtx);
259 static rtx find_regs_nested (rtx, rtx);
260 static void scan_loop (struct loop*, int);
261 #if 0
262 static void replace_call_address (rtx, rtx, rtx);
263 #endif
264 static rtx skip_consec_insns (rtx, int);
265 static int libcall_benefit (rtx);
266 static rtx libcall_other_reg (rtx, rtx);
267 static void record_excess_regs (rtx, rtx, rtx *);
268 static void ignore_some_movables (struct loop_movables *);
269 static void force_movables (struct loop_movables *);
270 static void combine_movables (struct loop_movables *, struct loop_regs *);
271 static int num_unmoved_movables (const struct loop *);
272 static int regs_match_p (rtx, rtx, struct loop_movables *);
273 static int rtx_equal_for_loop_p (rtx, rtx, struct loop_movables *,
274 struct loop_regs *);
275 static void add_label_notes (rtx, rtx);
276 static void move_movables (struct loop *loop, struct loop_movables *, int,
277 int);
278 static void loop_movables_add (struct loop_movables *, struct movable *);
279 static void loop_movables_free (struct loop_movables *);
280 static int count_nonfixed_reads (const struct loop *, rtx);
281 static void loop_bivs_find (struct loop *);
282 static void loop_bivs_init_find (struct loop *);
283 static void loop_bivs_check (struct loop *);
284 static void loop_givs_find (struct loop *);
285 static void loop_givs_check (struct loop *);
286 static int loop_biv_eliminable_p (struct loop *, struct iv_class *, int, int);
287 static int loop_giv_reduce_benefit (struct loop *, struct iv_class *,
288 struct induction *, rtx);
289 static void loop_givs_dead_check (struct loop *, struct iv_class *);
290 static void loop_givs_reduce (struct loop *, struct iv_class *);
291 static void loop_givs_rescan (struct loop *, struct iv_class *, rtx *);
292 static void loop_ivs_free (struct loop *);
293 static void strength_reduce (struct loop *, int);
294 static void find_single_use_in_loop (struct loop_regs *, rtx, rtx);
295 static int valid_initial_value_p (rtx, rtx, int, rtx);
296 static void find_mem_givs (const struct loop *, rtx, rtx, int, int);
297 static void record_biv (struct loop *, struct induction *, rtx, rtx, rtx,
298 rtx, rtx *, int, int);
299 static void check_final_value (const struct loop *, struct induction *);
300 static void loop_ivs_dump (const struct loop *, FILE *, int);
301 static void loop_iv_class_dump (const struct iv_class *, FILE *, int);
302 static void loop_biv_dump (const struct induction *, FILE *, int);
303 static void loop_giv_dump (const struct induction *, FILE *, int);
304 static void record_giv (const struct loop *, struct induction *, rtx, rtx,
305 rtx, rtx, rtx, rtx, int, enum g_types, int, int,
306 rtx *);
307 static void update_giv_derive (const struct loop *, rtx);
308 static void check_ext_dependent_givs (const struct loop *, struct iv_class *);
309 static int basic_induction_var (const struct loop *, rtx, enum machine_mode,
310 rtx, rtx, rtx *, rtx *, rtx **);
311 static rtx simplify_giv_expr (const struct loop *, rtx, rtx *, int *);
312 static int general_induction_var (const struct loop *loop, rtx, rtx *, rtx *,
313 rtx *, rtx *, int, int *, enum machine_mode);
314 static int consec_sets_giv (const struct loop *, int, rtx, rtx, rtx, rtx *,
315 rtx *, rtx *, rtx *);
316 static int check_dbra_loop (struct loop *, int);
317 static rtx express_from_1 (rtx, rtx, rtx);
318 static rtx combine_givs_p (struct induction *, struct induction *);
319 static int cmp_combine_givs_stats (const void *, const void *);
320 static void combine_givs (struct loop_regs *, struct iv_class *);
321 static int product_cheap_p (rtx, rtx);
322 static int maybe_eliminate_biv (const struct loop *, struct iv_class *, int,
323 int, int);
324 static int maybe_eliminate_biv_1 (const struct loop *, rtx, rtx,
325 struct iv_class *, int, basic_block, rtx);
326 static int last_use_this_basic_block (rtx, rtx);
327 static void record_initial (rtx, rtx, void *);
328 static void update_reg_last_use (rtx, rtx);
329 static rtx next_insn_in_loop (const struct loop *, rtx);
330 static void loop_regs_scan (const struct loop *, int);
331 static int count_insns_in_loop (const struct loop *);
332 static int find_mem_in_note_1 (rtx *, void *);
333 static rtx find_mem_in_note (rtx);
334 static void load_mems (const struct loop *);
335 static int insert_loop_mem (rtx *, void *);
336 static int replace_loop_mem (rtx *, void *);
337 static void replace_loop_mems (rtx, rtx, rtx, int);
338 static int replace_loop_reg (rtx *, void *);
339 static void replace_loop_regs (rtx insn, rtx, rtx);
340 static void note_reg_stored (rtx, rtx, void *);
341 static void try_copy_prop (const struct loop *, rtx, unsigned int);
342 static void try_swap_copy_prop (const struct loop *, rtx, unsigned int);
343 static rtx check_insn_for_givs (struct loop *, rtx, int, int);
344 static rtx check_insn_for_bivs (struct loop *, rtx, int, int);
345 static rtx gen_add_mult (rtx, rtx, rtx, rtx);
346 static void loop_regs_update (const struct loop *, rtx);
347 static int iv_add_mult_cost (rtx, rtx, rtx, rtx);
348
349 static rtx loop_insn_emit_after (const struct loop *, basic_block, rtx, rtx);
350 static rtx loop_call_insn_emit_before (const struct loop *, basic_block,
351 rtx, rtx);
352 static rtx loop_call_insn_hoist (const struct loop *, rtx);
353 static rtx loop_insn_sink_or_swim (const struct loop *, rtx);
354
355 static void loop_dump_aux (const struct loop *, FILE *, int);
356 static void loop_delete_insns (rtx, rtx);
357 static HOST_WIDE_INT remove_constant_addition (rtx *);
358 static rtx gen_load_of_final_value (rtx, rtx);
359 void debug_ivs (const struct loop *);
360 void debug_iv_class (const struct iv_class *);
361 void debug_biv (const struct induction *);
362 void debug_giv (const struct induction *);
363 void debug_loop (const struct loop *);
364 void debug_loops (const struct loops *);
365
366 typedef struct loop_replace_args
367 {
368 rtx match;
369 rtx replacement;
370 rtx insn;
371 } loop_replace_args;
372
373 /* Nonzero iff INSN is between START and END, inclusive. */
374 #define INSN_IN_RANGE_P(INSN, START, END) \
375 (INSN_UID (INSN) < max_uid_for_loop \
376 && INSN_LUID (INSN) >= INSN_LUID (START) \
377 && INSN_LUID (INSN) <= INSN_LUID (END))
378
379 /* Indirect_jump_in_function is computed once per function. */
380 static int indirect_jump_in_function;
381 static int indirect_jump_in_function_p (rtx);
382
383 static int compute_luids (rtx, rtx, int);
384
385 static int biv_elimination_giv_has_0_offset (struct induction *,
386 struct induction *, rtx);
387 \f
388 /* Benefit penalty, if a giv is not replaceable, i.e. must emit an insn to
389 copy the value of the strength reduced giv to its original register. */
390 static int copy_cost;
391
392 /* Cost of using a register, to normalize the benefits of a giv. */
393 static int reg_address_cost;
394
395 void
396 init_loop (void)
397 {
398 rtx reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
399
400 reg_address_cost = address_cost (reg, SImode);
401
402 copy_cost = COSTS_N_INSNS (1);
403 }
404 \f
405 /* Compute the mapping from uids to luids.
406 LUIDs are numbers assigned to insns, like uids,
407 except that luids increase monotonically through the code.
408 Start at insn START and stop just before END. Assign LUIDs
409 starting with PREV_LUID + 1. Return the last assigned LUID + 1. */
410 static int
411 compute_luids (rtx start, rtx end, int prev_luid)
412 {
413 int i;
414 rtx insn;
415
416 for (insn = start, i = prev_luid; insn != end; insn = NEXT_INSN (insn))
417 {
418 if (INSN_UID (insn) >= max_uid_for_loop)
419 continue;
420 /* Don't assign luids to line-number NOTEs, so that the distance in
421 luids between two insns is not affected by -g. */
422 if (GET_CODE (insn) != NOTE
423 || NOTE_LINE_NUMBER (insn) <= 0)
424 uid_luid[INSN_UID (insn)] = ++i;
425 else
426 /* Give a line number note the same luid as preceding insn. */
427 uid_luid[INSN_UID (insn)] = i;
428 }
429 return i + 1;
430 }
431 \f
432 /* Entry point of this file. Perform loop optimization
433 on the current function. F is the first insn of the function
434 and DUMPFILE is a stream for output of a trace of actions taken
435 (or 0 if none should be output). */
436
437 void
438 loop_optimize (rtx f, FILE *dumpfile, int flags)
439 {
440 rtx insn;
441 int i;
442 struct loops loops_data;
443 struct loops *loops = &loops_data;
444 struct loop_info *loops_info;
445
446 loop_dump_stream = dumpfile;
447
448 init_recog_no_volatile ();
449
450 max_reg_before_loop = max_reg_num ();
451 loop_max_reg = max_reg_before_loop;
452
453 regs_may_share = 0;
454
455 /* Count the number of loops. */
456
457 max_loop_num = 0;
458 for (insn = f; insn; insn = NEXT_INSN (insn))
459 {
460 if (GET_CODE (insn) == NOTE
461 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
462 max_loop_num++;
463 }
464
465 /* Don't waste time if no loops. */
466 if (max_loop_num == 0)
467 return;
468
469 loops->num = max_loop_num;
470
471 /* Get size to use for tables indexed by uids.
472 Leave some space for labels allocated by find_and_verify_loops. */
473 max_uid_for_loop = get_max_uid () + 1 + max_loop_num * 32;
474
475 uid_luid = xcalloc (max_uid_for_loop, sizeof (int));
476 uid_loop = xcalloc (max_uid_for_loop, sizeof (struct loop *));
477
478 /* Allocate storage for array of loops. */
479 loops->array = xcalloc (loops->num, sizeof (struct loop));
480
481 /* Find and process each loop.
482 First, find them, and record them in order of their beginnings. */
483 find_and_verify_loops (f, loops);
484
485 /* Allocate and initialize auxiliary loop information. */
486 loops_info = xcalloc (loops->num, sizeof (struct loop_info));
487 for (i = 0; i < (int) loops->num; i++)
488 loops->array[i].aux = loops_info + i;
489
490 /* Now find all register lifetimes. This must be done after
491 find_and_verify_loops, because it might reorder the insns in the
492 function. */
493 reg_scan (f, max_reg_before_loop, 1);
494
495 /* This must occur after reg_scan so that registers created by gcse
496 will have entries in the register tables.
497
498 We could have added a call to reg_scan after gcse_main in toplev.c,
499 but moving this call to init_alias_analysis is more efficient. */
500 init_alias_analysis ();
501
502 /* See if we went too far. Note that get_max_uid already returns
503 one more that the maximum uid of all insn. */
504 if (get_max_uid () > max_uid_for_loop)
505 abort ();
506 /* Now reset it to the actual size we need. See above. */
507 max_uid_for_loop = get_max_uid ();
508
509 /* find_and_verify_loops has already called compute_luids, but it
510 might have rearranged code afterwards, so we need to recompute
511 the luids now. */
512 compute_luids (f, NULL_RTX, 0);
513
514 /* Don't leave gaps in uid_luid for insns that have been
515 deleted. It is possible that the first or last insn
516 using some register has been deleted by cross-jumping.
517 Make sure that uid_luid for that former insn's uid
518 points to the general area where that insn used to be. */
519 for (i = 0; i < max_uid_for_loop; i++)
520 {
521 uid_luid[0] = uid_luid[i];
522 if (uid_luid[0] != 0)
523 break;
524 }
525 for (i = 0; i < max_uid_for_loop; i++)
526 if (uid_luid[i] == 0)
527 uid_luid[i] = uid_luid[i - 1];
528
529 /* Determine if the function has indirect jump. On some systems
530 this prevents low overhead loop instructions from being used. */
531 indirect_jump_in_function = indirect_jump_in_function_p (f);
532
533 /* Now scan the loops, last ones first, since this means inner ones are done
534 before outer ones. */
535 for (i = max_loop_num - 1; i >= 0; i--)
536 {
537 struct loop *loop = &loops->array[i];
538
539 if (! loop->invalid && loop->end)
540 {
541 scan_loop (loop, flags);
542 ggc_collect ();
543 }
544 }
545
546 end_alias_analysis ();
547
548 /* Clean up. */
549 for (i = 0; i < (int) loops->num; i++)
550 free (loops_info[i].mems);
551
552 free (uid_luid);
553 free (uid_loop);
554 free (loops_info);
555 free (loops->array);
556 }
557 \f
558 /* Returns the next insn, in execution order, after INSN. START and
559 END are the NOTE_INSN_LOOP_BEG and NOTE_INSN_LOOP_END for the loop,
560 respectively. LOOP->TOP, if non-NULL, is the top of the loop in the
561 insn-stream; it is used with loops that are entered near the
562 bottom. */
563
564 static rtx
565 next_insn_in_loop (const struct loop *loop, rtx insn)
566 {
567 insn = NEXT_INSN (insn);
568
569 if (insn == loop->end)
570 {
571 if (loop->top)
572 /* Go to the top of the loop, and continue there. */
573 insn = loop->top;
574 else
575 /* We're done. */
576 insn = NULL_RTX;
577 }
578
579 if (insn == loop->scan_start)
580 /* We're done. */
581 insn = NULL_RTX;
582
583 return insn;
584 }
585
586 /* Find any register references hidden inside X and add them to
587 the dependency list DEPS. This is used to look inside CLOBBER (MEM
588 when checking whether a PARALLEL can be pulled out of a loop. */
589
590 static rtx
591 find_regs_nested (rtx deps, rtx x)
592 {
593 enum rtx_code code = GET_CODE (x);
594 if (code == REG)
595 deps = gen_rtx_EXPR_LIST (VOIDmode, x, deps);
596 else
597 {
598 const char *fmt = GET_RTX_FORMAT (code);
599 int i, j;
600 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
601 {
602 if (fmt[i] == 'e')
603 deps = find_regs_nested (deps, XEXP (x, i));
604 else if (fmt[i] == 'E')
605 for (j = 0; j < XVECLEN (x, i); j++)
606 deps = find_regs_nested (deps, XVECEXP (x, i, j));
607 }
608 }
609 return deps;
610 }
611
612 /* Optimize one loop described by LOOP. */
613
614 /* ??? Could also move memory writes out of loops if the destination address
615 is invariant, the source is invariant, the memory write is not volatile,
616 and if we can prove that no read inside the loop can read this address
617 before the write occurs. If there is a read of this address after the
618 write, then we can also mark the memory read as invariant. */
619
620 static void
621 scan_loop (struct loop *loop, int flags)
622 {
623 struct loop_info *loop_info = LOOP_INFO (loop);
624 struct loop_regs *regs = LOOP_REGS (loop);
625 int i;
626 rtx loop_start = loop->start;
627 rtx loop_end = loop->end;
628 rtx p;
629 /* 1 if we are scanning insns that could be executed zero times. */
630 int maybe_never = 0;
631 /* 1 if we are scanning insns that might never be executed
632 due to a subroutine call which might exit before they are reached. */
633 int call_passed = 0;
634 /* Number of insns in the loop. */
635 int insn_count;
636 int tem;
637 rtx temp, update_start, update_end;
638 /* The SET from an insn, if it is the only SET in the insn. */
639 rtx set, set1;
640 /* Chain describing insns movable in current loop. */
641 struct loop_movables *movables = LOOP_MOVABLES (loop);
642 /* Ratio of extra register life span we can justify
643 for saving an instruction. More if loop doesn't call subroutines
644 since in that case saving an insn makes more difference
645 and more registers are available. */
646 int threshold;
647 /* Nonzero if we are scanning instructions in a sub-loop. */
648 int loop_depth = 0;
649 int in_libcall;
650
651 loop->top = 0;
652
653 movables->head = 0;
654 movables->last = 0;
655
656 /* Determine whether this loop starts with a jump down to a test at
657 the end. This will occur for a small number of loops with a test
658 that is too complex to duplicate in front of the loop.
659
660 We search for the first insn or label in the loop, skipping NOTEs.
661 However, we must be careful not to skip past a NOTE_INSN_LOOP_BEG
662 (because we might have a loop executed only once that contains a
663 loop which starts with a jump to its exit test) or a NOTE_INSN_LOOP_END
664 (in case we have a degenerate loop).
665
666 Note that if we mistakenly think that a loop is entered at the top
667 when, in fact, it is entered at the exit test, the only effect will be
668 slightly poorer optimization. Making the opposite error can generate
669 incorrect code. Since very few loops now start with a jump to the
670 exit test, the code here to detect that case is very conservative. */
671
672 for (p = NEXT_INSN (loop_start);
673 p != loop_end
674 && GET_CODE (p) != CODE_LABEL && ! INSN_P (p)
675 && (GET_CODE (p) != NOTE
676 || (NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_BEG
677 && NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_END));
678 p = NEXT_INSN (p))
679 ;
680
681 loop->scan_start = p;
682
683 /* If loop end is the end of the current function, then emit a
684 NOTE_INSN_DELETED after loop_end and set loop->sink to the dummy
685 note insn. This is the position we use when sinking insns out of
686 the loop. */
687 if (NEXT_INSN (loop->end) != 0)
688 loop->sink = NEXT_INSN (loop->end);
689 else
690 loop->sink = emit_note_after (NOTE_INSN_DELETED, loop->end);
691
692 /* Set up variables describing this loop. */
693 prescan_loop (loop);
694 threshold = (loop_info->has_call ? 1 : 2) * (1 + n_non_fixed_regs);
695
696 /* If loop has a jump before the first label,
697 the true entry is the target of that jump.
698 Start scan from there.
699 But record in LOOP->TOP the place where the end-test jumps
700 back to so we can scan that after the end of the loop. */
701 if (GET_CODE (p) == JUMP_INSN
702 /* Loop entry must be unconditional jump (and not a RETURN) */
703 && any_uncondjump_p (p)
704 && JUMP_LABEL (p) != 0
705 /* Check to see whether the jump actually
706 jumps out of the loop (meaning it's no loop).
707 This case can happen for things like
708 do {..} while (0). If this label was generated previously
709 by loop, we can't tell anything about it and have to reject
710 the loop. */
711 && INSN_IN_RANGE_P (JUMP_LABEL (p), loop_start, loop_end))
712 {
713 loop->top = next_label (loop->scan_start);
714 loop->scan_start = JUMP_LABEL (p);
715 }
716
717 /* If LOOP->SCAN_START was an insn created by loop, we don't know its luid
718 as required by loop_reg_used_before_p. So skip such loops. (This
719 test may never be true, but it's best to play it safe.)
720
721 Also, skip loops where we do not start scanning at a label. This
722 test also rejects loops starting with a JUMP_INSN that failed the
723 test above. */
724
725 if (INSN_UID (loop->scan_start) >= max_uid_for_loop
726 || GET_CODE (loop->scan_start) != CODE_LABEL)
727 {
728 if (loop_dump_stream)
729 fprintf (loop_dump_stream, "\nLoop from %d to %d is phony.\n\n",
730 INSN_UID (loop_start), INSN_UID (loop_end));
731 return;
732 }
733
734 /* Allocate extra space for REGs that might be created by load_mems.
735 We allocate a little extra slop as well, in the hopes that we
736 won't have to reallocate the regs array. */
737 loop_regs_scan (loop, loop_info->mems_idx + 16);
738 insn_count = count_insns_in_loop (loop);
739
740 if (loop_dump_stream)
741 {
742 fprintf (loop_dump_stream, "\nLoop from %d to %d: %d real insns.\n",
743 INSN_UID (loop_start), INSN_UID (loop_end), insn_count);
744 if (loop->cont)
745 fprintf (loop_dump_stream, "Continue at insn %d.\n",
746 INSN_UID (loop->cont));
747 }
748
749 /* Scan through the loop finding insns that are safe to move.
750 Set REGS->ARRAY[I].SET_IN_LOOP negative for the reg I being set, so that
751 this reg will be considered invariant for subsequent insns.
752 We consider whether subsequent insns use the reg
753 in deciding whether it is worth actually moving.
754
755 MAYBE_NEVER is nonzero if we have passed a conditional jump insn
756 and therefore it is possible that the insns we are scanning
757 would never be executed. At such times, we must make sure
758 that it is safe to execute the insn once instead of zero times.
759 When MAYBE_NEVER is 0, all insns will be executed at least once
760 so that is not a problem. */
761
762 for (in_libcall = 0, p = next_insn_in_loop (loop, loop->scan_start);
763 p != NULL_RTX;
764 p = next_insn_in_loop (loop, p))
765 {
766 if (in_libcall && INSN_P (p) && find_reg_note (p, REG_RETVAL, NULL_RTX))
767 in_libcall--;
768 if (GET_CODE (p) == INSN)
769 {
770 temp = find_reg_note (p, REG_LIBCALL, NULL_RTX);
771 if (temp)
772 in_libcall++;
773 if (! in_libcall
774 && (set = single_set (p))
775 && REG_P (SET_DEST (set))
776 #ifdef PIC_OFFSET_TABLE_REG_CALL_CLOBBERED
777 && SET_DEST (set) != pic_offset_table_rtx
778 #endif
779 && ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
780 {
781 int tem1 = 0;
782 int tem2 = 0;
783 int move_insn = 0;
784 int insert_temp = 0;
785 rtx src = SET_SRC (set);
786 rtx dependencies = 0;
787
788 /* Figure out what to use as a source of this insn. If a
789 REG_EQUIV note is given or if a REG_EQUAL note with a
790 constant operand is specified, use it as the source and
791 mark that we should move this insn by calling
792 emit_move_insn rather that duplicating the insn.
793
794 Otherwise, only use the REG_EQUAL contents if a REG_RETVAL
795 note is present. */
796 temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
797 if (temp)
798 src = XEXP (temp, 0), move_insn = 1;
799 else
800 {
801 temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
802 if (temp && CONSTANT_P (XEXP (temp, 0)))
803 src = XEXP (temp, 0), move_insn = 1;
804 if (temp && find_reg_note (p, REG_RETVAL, NULL_RTX))
805 {
806 src = XEXP (temp, 0);
807 /* A libcall block can use regs that don't appear in
808 the equivalent expression. To move the libcall,
809 we must move those regs too. */
810 dependencies = libcall_other_reg (p, src);
811 }
812 }
813
814 /* For parallels, add any possible uses to the dependencies, as
815 we can't move the insn without resolving them first.
816 MEMs inside CLOBBERs may also reference registers; these
817 count as implicit uses. */
818 if (GET_CODE (PATTERN (p)) == PARALLEL)
819 {
820 for (i = 0; i < XVECLEN (PATTERN (p), 0); i++)
821 {
822 rtx x = XVECEXP (PATTERN (p), 0, i);
823 if (GET_CODE (x) == USE)
824 dependencies
825 = gen_rtx_EXPR_LIST (VOIDmode, XEXP (x, 0),
826 dependencies);
827 else if (GET_CODE (x) == CLOBBER
828 && MEM_P (XEXP (x, 0)))
829 dependencies = find_regs_nested (dependencies,
830 XEXP (XEXP (x, 0), 0));
831 }
832 }
833
834 if (/* The register is used in basic blocks other
835 than the one where it is set (meaning that
836 something after this point in the loop might
837 depend on its value before the set). */
838 ! reg_in_basic_block_p (p, SET_DEST (set))
839 /* And the set is not guaranteed to be executed once
840 the loop starts, or the value before the set is
841 needed before the set occurs...
842
843 ??? Note we have quadratic behavior here, mitigated
844 by the fact that the previous test will often fail for
845 large loops. Rather than re-scanning the entire loop
846 each time for register usage, we should build tables
847 of the register usage and use them here instead. */
848 && (maybe_never
849 || loop_reg_used_before_p (loop, set, p)))
850 /* It is unsafe to move the set. However, it may be OK to
851 move the source into a new pseudo, and substitute a
852 reg-to-reg copy for the original insn.
853
854 This code used to consider it OK to move a set of a variable
855 which was not created by the user and not used in an exit
856 test.
857 That behavior is incorrect and was removed. */
858 insert_temp = 1;
859
860 /* Don't try to optimize a MODE_CC set with a constant
861 source. It probably will be combined with a conditional
862 jump. */
863 if (GET_MODE_CLASS (GET_MODE (SET_DEST (set))) == MODE_CC
864 && CONSTANT_P (src))
865 ;
866 /* Don't try to optimize a register that was made
867 by loop-optimization for an inner loop.
868 We don't know its life-span, so we can't compute
869 the benefit. */
870 else if (REGNO (SET_DEST (set)) >= max_reg_before_loop)
871 ;
872 /* Don't move the source and add a reg-to-reg copy:
873 - with -Os (this certainly increases size),
874 - if the mode doesn't support copy operations (obviously),
875 - if the source is already a reg (the motion will gain nothing),
876 - if the source is a legitimate constant (likewise). */
877 else if (insert_temp
878 && (optimize_size
879 || ! can_copy_p (GET_MODE (SET_SRC (set)))
880 || REG_P (SET_SRC (set))
881 || (CONSTANT_P (SET_SRC (set))
882 && LEGITIMATE_CONSTANT_P (SET_SRC (set)))))
883 ;
884 else if ((tem = loop_invariant_p (loop, src))
885 && (dependencies == 0
886 || (tem2
887 = loop_invariant_p (loop, dependencies)) != 0)
888 && (regs->array[REGNO (SET_DEST (set))].set_in_loop == 1
889 || (tem1
890 = consec_sets_invariant_p
891 (loop, SET_DEST (set),
892 regs->array[REGNO (SET_DEST (set))].set_in_loop,
893 p)))
894 /* If the insn can cause a trap (such as divide by zero),
895 can't move it unless it's guaranteed to be executed
896 once loop is entered. Even a function call might
897 prevent the trap insn from being reached
898 (since it might exit!) */
899 && ! ((maybe_never || call_passed)
900 && may_trap_p (src)))
901 {
902 struct movable *m;
903 int regno = REGNO (SET_DEST (set));
904
905 /* A potential lossage is where we have a case where two insns
906 can be combined as long as they are both in the loop, but
907 we move one of them outside the loop. For large loops,
908 this can lose. The most common case of this is the address
909 of a function being called.
910
911 Therefore, if this register is marked as being used
912 exactly once if we are in a loop with calls
913 (a "large loop"), see if we can replace the usage of
914 this register with the source of this SET. If we can,
915 delete this insn.
916
917 Don't do this if P has a REG_RETVAL note or if we have
918 SMALL_REGISTER_CLASSES and SET_SRC is a hard register. */
919
920 if (loop_info->has_call
921 && regs->array[regno].single_usage != 0
922 && regs->array[regno].single_usage != const0_rtx
923 && REGNO_FIRST_UID (regno) == INSN_UID (p)
924 && (REGNO_LAST_UID (regno)
925 == INSN_UID (regs->array[regno].single_usage))
926 && regs->array[regno].set_in_loop == 1
927 && GET_CODE (SET_SRC (set)) != ASM_OPERANDS
928 && ! side_effects_p (SET_SRC (set))
929 && ! find_reg_note (p, REG_RETVAL, NULL_RTX)
930 && (! SMALL_REGISTER_CLASSES
931 || (! (REG_P (SET_SRC (set))
932 && (REGNO (SET_SRC (set))
933 < FIRST_PSEUDO_REGISTER))))
934 && regno >= FIRST_PSEUDO_REGISTER
935 /* This test is not redundant; SET_SRC (set) might be
936 a call-clobbered register and the life of REGNO
937 might span a call. */
938 && ! modified_between_p (SET_SRC (set), p,
939 regs->array[regno].single_usage)
940 && no_labels_between_p (p,
941 regs->array[regno].single_usage)
942 && validate_replace_rtx (SET_DEST (set), SET_SRC (set),
943 regs->array[regno].single_usage))
944 {
945 /* Replace any usage in a REG_EQUAL note. Must copy
946 the new source, so that we don't get rtx sharing
947 between the SET_SOURCE and REG_NOTES of insn p. */
948 REG_NOTES (regs->array[regno].single_usage)
949 = (replace_rtx
950 (REG_NOTES (regs->array[regno].single_usage),
951 SET_DEST (set), copy_rtx (SET_SRC (set))));
952
953 delete_insn (p);
954 for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
955 i++)
956 regs->array[regno+i].set_in_loop = 0;
957 continue;
958 }
959
960 m = xmalloc (sizeof (struct movable));
961 m->next = 0;
962 m->insn = p;
963 m->set_src = src;
964 m->dependencies = dependencies;
965 m->set_dest = SET_DEST (set);
966 m->force = 0;
967 m->consec
968 = regs->array[REGNO (SET_DEST (set))].set_in_loop - 1;
969 m->done = 0;
970 m->forces = 0;
971 m->partial = 0;
972 m->move_insn = move_insn;
973 m->move_insn_first = 0;
974 m->insert_temp = insert_temp;
975 m->is_equiv = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
976 m->savemode = VOIDmode;
977 m->regno = regno;
978 /* Set M->cond if either loop_invariant_p
979 or consec_sets_invariant_p returned 2
980 (only conditionally invariant). */
981 m->cond = ((tem | tem1 | tem2) > 1);
982 m->global = LOOP_REG_GLOBAL_P (loop, regno);
983 m->match = 0;
984 m->lifetime = LOOP_REG_LIFETIME (loop, regno);
985 m->savings = regs->array[regno].n_times_set;
986 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
987 m->savings += libcall_benefit (p);
988 for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set)); i++)
989 regs->array[regno+i].set_in_loop = move_insn ? -2 : -1;
990 /* Add M to the end of the chain MOVABLES. */
991 loop_movables_add (movables, m);
992
993 if (m->consec > 0)
994 {
995 /* It is possible for the first instruction to have a
996 REG_EQUAL note but a non-invariant SET_SRC, so we must
997 remember the status of the first instruction in case
998 the last instruction doesn't have a REG_EQUAL note. */
999 m->move_insn_first = m->move_insn;
1000
1001 /* Skip this insn, not checking REG_LIBCALL notes. */
1002 p = next_nonnote_insn (p);
1003 /* Skip the consecutive insns, if there are any. */
1004 p = skip_consec_insns (p, m->consec);
1005 /* Back up to the last insn of the consecutive group. */
1006 p = prev_nonnote_insn (p);
1007
1008 /* We must now reset m->move_insn, m->is_equiv, and
1009 possibly m->set_src to correspond to the effects of
1010 all the insns. */
1011 temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
1012 if (temp)
1013 m->set_src = XEXP (temp, 0), m->move_insn = 1;
1014 else
1015 {
1016 temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
1017 if (temp && CONSTANT_P (XEXP (temp, 0)))
1018 m->set_src = XEXP (temp, 0), m->move_insn = 1;
1019 else
1020 m->move_insn = 0;
1021
1022 }
1023 m->is_equiv
1024 = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
1025 }
1026 }
1027 /* If this register is always set within a STRICT_LOW_PART
1028 or set to zero, then its high bytes are constant.
1029 So clear them outside the loop and within the loop
1030 just load the low bytes.
1031 We must check that the machine has an instruction to do so.
1032 Also, if the value loaded into the register
1033 depends on the same register, this cannot be done. */
1034 else if (SET_SRC (set) == const0_rtx
1035 && GET_CODE (NEXT_INSN (p)) == INSN
1036 && (set1 = single_set (NEXT_INSN (p)))
1037 && GET_CODE (set1) == SET
1038 && (GET_CODE (SET_DEST (set1)) == STRICT_LOW_PART)
1039 && (GET_CODE (XEXP (SET_DEST (set1), 0)) == SUBREG)
1040 && (SUBREG_REG (XEXP (SET_DEST (set1), 0))
1041 == SET_DEST (set))
1042 && !reg_mentioned_p (SET_DEST (set), SET_SRC (set1)))
1043 {
1044 int regno = REGNO (SET_DEST (set));
1045 if (regs->array[regno].set_in_loop == 2)
1046 {
1047 struct movable *m;
1048 m = xmalloc (sizeof (struct movable));
1049 m->next = 0;
1050 m->insn = p;
1051 m->set_dest = SET_DEST (set);
1052 m->dependencies = 0;
1053 m->force = 0;
1054 m->consec = 0;
1055 m->done = 0;
1056 m->forces = 0;
1057 m->move_insn = 0;
1058 m->move_insn_first = 0;
1059 m->insert_temp = insert_temp;
1060 m->partial = 1;
1061 /* If the insn may not be executed on some cycles,
1062 we can't clear the whole reg; clear just high part.
1063 Not even if the reg is used only within this loop.
1064 Consider this:
1065 while (1)
1066 while (s != t) {
1067 if (foo ()) x = *s;
1068 use (x);
1069 }
1070 Clearing x before the inner loop could clobber a value
1071 being saved from the last time around the outer loop.
1072 However, if the reg is not used outside this loop
1073 and all uses of the register are in the same
1074 basic block as the store, there is no problem.
1075
1076 If this insn was made by loop, we don't know its
1077 INSN_LUID and hence must make a conservative
1078 assumption. */
1079 m->global = (INSN_UID (p) >= max_uid_for_loop
1080 || LOOP_REG_GLOBAL_P (loop, regno)
1081 || (labels_in_range_p
1082 (p, REGNO_FIRST_LUID (regno))));
1083 if (maybe_never && m->global)
1084 m->savemode = GET_MODE (SET_SRC (set1));
1085 else
1086 m->savemode = VOIDmode;
1087 m->regno = regno;
1088 m->cond = 0;
1089 m->match = 0;
1090 m->lifetime = LOOP_REG_LIFETIME (loop, regno);
1091 m->savings = 1;
1092 for (i = 0;
1093 i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
1094 i++)
1095 regs->array[regno+i].set_in_loop = -1;
1096 /* Add M to the end of the chain MOVABLES. */
1097 loop_movables_add (movables, m);
1098 }
1099 }
1100 }
1101 }
1102 /* Past a call insn, we get to insns which might not be executed
1103 because the call might exit. This matters for insns that trap.
1104 Constant and pure call insns always return, so they don't count. */
1105 else if (GET_CODE (p) == CALL_INSN && ! CONST_OR_PURE_CALL_P (p))
1106 call_passed = 1;
1107 /* Past a label or a jump, we get to insns for which we
1108 can't count on whether or how many times they will be
1109 executed during each iteration. Therefore, we can
1110 only move out sets of trivial variables
1111 (those not used after the loop). */
1112 /* Similar code appears twice in strength_reduce. */
1113 else if ((GET_CODE (p) == CODE_LABEL || GET_CODE (p) == JUMP_INSN)
1114 /* If we enter the loop in the middle, and scan around to the
1115 beginning, don't set maybe_never for that. This must be an
1116 unconditional jump, otherwise the code at the top of the
1117 loop might never be executed. Unconditional jumps are
1118 followed by a barrier then the loop_end. */
1119 && ! (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p) == loop->top
1120 && NEXT_INSN (NEXT_INSN (p)) == loop_end
1121 && any_uncondjump_p (p)))
1122 maybe_never = 1;
1123 else if (GET_CODE (p) == NOTE)
1124 {
1125 /* At the virtual top of a converted loop, insns are again known to
1126 be executed: logically, the loop begins here even though the exit
1127 code has been duplicated. */
1128 if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_VTOP && loop_depth == 0)
1129 maybe_never = call_passed = 0;
1130 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
1131 loop_depth++;
1132 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_END)
1133 loop_depth--;
1134 }
1135 }
1136
1137 /* If one movable subsumes another, ignore that other. */
1138
1139 ignore_some_movables (movables);
1140
1141 /* For each movable insn, see if the reg that it loads
1142 leads when it dies right into another conditionally movable insn.
1143 If so, record that the second insn "forces" the first one,
1144 since the second can be moved only if the first is. */
1145
1146 force_movables (movables);
1147
1148 /* See if there are multiple movable insns that load the same value.
1149 If there are, make all but the first point at the first one
1150 through the `match' field, and add the priorities of them
1151 all together as the priority of the first. */
1152
1153 combine_movables (movables, regs);
1154
1155 /* Now consider each movable insn to decide whether it is worth moving.
1156 Store 0 in regs->array[I].set_in_loop for each reg I that is moved.
1157
1158 For machines with few registers this increases code size, so do not
1159 move moveables when optimizing for code size on such machines.
1160 (The 18 below is the value for i386.) */
1161
1162 if (!optimize_size
1163 || (reg_class_size[GENERAL_REGS] > 18 && !loop_info->has_call))
1164 {
1165 move_movables (loop, movables, threshold, insn_count);
1166
1167 /* Recalculate regs->array if move_movables has created new
1168 registers. */
1169 if (max_reg_num () > regs->num)
1170 {
1171 loop_regs_scan (loop, 0);
1172 for (update_start = loop_start;
1173 PREV_INSN (update_start)
1174 && GET_CODE (PREV_INSN (update_start)) != CODE_LABEL;
1175 update_start = PREV_INSN (update_start))
1176 ;
1177 update_end = NEXT_INSN (loop_end);
1178
1179 reg_scan_update (update_start, update_end, loop_max_reg);
1180 loop_max_reg = max_reg_num ();
1181 }
1182 }
1183
1184 /* Now candidates that still are negative are those not moved.
1185 Change regs->array[I].set_in_loop to indicate that those are not actually
1186 invariant. */
1187 for (i = 0; i < regs->num; i++)
1188 if (regs->array[i].set_in_loop < 0)
1189 regs->array[i].set_in_loop = regs->array[i].n_times_set;
1190
1191 /* Now that we've moved some things out of the loop, we might be able to
1192 hoist even more memory references. */
1193 load_mems (loop);
1194
1195 /* Recalculate regs->array if load_mems has created new registers. */
1196 if (max_reg_num () > regs->num)
1197 loop_regs_scan (loop, 0);
1198
1199 for (update_start = loop_start;
1200 PREV_INSN (update_start)
1201 && GET_CODE (PREV_INSN (update_start)) != CODE_LABEL;
1202 update_start = PREV_INSN (update_start))
1203 ;
1204 update_end = NEXT_INSN (loop_end);
1205
1206 reg_scan_update (update_start, update_end, loop_max_reg);
1207 loop_max_reg = max_reg_num ();
1208
1209 if (flag_strength_reduce)
1210 {
1211 if (update_end && GET_CODE (update_end) == CODE_LABEL)
1212 /* Ensure our label doesn't go away. */
1213 LABEL_NUSES (update_end)++;
1214
1215 strength_reduce (loop, flags);
1216
1217 reg_scan_update (update_start, update_end, loop_max_reg);
1218 loop_max_reg = max_reg_num ();
1219
1220 if (update_end && GET_CODE (update_end) == CODE_LABEL
1221 && --LABEL_NUSES (update_end) == 0)
1222 delete_related_insns (update_end);
1223 }
1224
1225
1226 /* The movable information is required for strength reduction. */
1227 loop_movables_free (movables);
1228
1229 free (regs->array);
1230 regs->array = 0;
1231 regs->num = 0;
1232 }
1233 \f
1234 /* Add elements to *OUTPUT to record all the pseudo-regs
1235 mentioned in IN_THIS but not mentioned in NOT_IN_THIS. */
1236
1237 static void
1238 record_excess_regs (rtx in_this, rtx not_in_this, rtx *output)
1239 {
1240 enum rtx_code code;
1241 const char *fmt;
1242 int i;
1243
1244 code = GET_CODE (in_this);
1245
1246 switch (code)
1247 {
1248 case PC:
1249 case CC0:
1250 case CONST_INT:
1251 case CONST_DOUBLE:
1252 case CONST:
1253 case SYMBOL_REF:
1254 case LABEL_REF:
1255 return;
1256
1257 case REG:
1258 if (REGNO (in_this) >= FIRST_PSEUDO_REGISTER
1259 && ! reg_mentioned_p (in_this, not_in_this))
1260 *output = gen_rtx_EXPR_LIST (VOIDmode, in_this, *output);
1261 return;
1262
1263 default:
1264 break;
1265 }
1266
1267 fmt = GET_RTX_FORMAT (code);
1268 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1269 {
1270 int j;
1271
1272 switch (fmt[i])
1273 {
1274 case 'E':
1275 for (j = 0; j < XVECLEN (in_this, i); j++)
1276 record_excess_regs (XVECEXP (in_this, i, j), not_in_this, output);
1277 break;
1278
1279 case 'e':
1280 record_excess_regs (XEXP (in_this, i), not_in_this, output);
1281 break;
1282 }
1283 }
1284 }
1285 \f
1286 /* Check what regs are referred to in the libcall block ending with INSN,
1287 aside from those mentioned in the equivalent value.
1288 If there are none, return 0.
1289 If there are one or more, return an EXPR_LIST containing all of them. */
1290
1291 static rtx
1292 libcall_other_reg (rtx insn, rtx equiv)
1293 {
1294 rtx note = find_reg_note (insn, REG_RETVAL, NULL_RTX);
1295 rtx p = XEXP (note, 0);
1296 rtx output = 0;
1297
1298 /* First, find all the regs used in the libcall block
1299 that are not mentioned as inputs to the result. */
1300
1301 while (p != insn)
1302 {
1303 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
1304 || GET_CODE (p) == CALL_INSN)
1305 record_excess_regs (PATTERN (p), equiv, &output);
1306 p = NEXT_INSN (p);
1307 }
1308
1309 return output;
1310 }
1311 \f
1312 /* Return 1 if all uses of REG
1313 are between INSN and the end of the basic block. */
1314
1315 static int
1316 reg_in_basic_block_p (rtx insn, rtx reg)
1317 {
1318 int regno = REGNO (reg);
1319 rtx p;
1320
1321 if (REGNO_FIRST_UID (regno) != INSN_UID (insn))
1322 return 0;
1323
1324 /* Search this basic block for the already recorded last use of the reg. */
1325 for (p = insn; p; p = NEXT_INSN (p))
1326 {
1327 switch (GET_CODE (p))
1328 {
1329 case NOTE:
1330 break;
1331
1332 case INSN:
1333 case CALL_INSN:
1334 /* Ordinary insn: if this is the last use, we win. */
1335 if (REGNO_LAST_UID (regno) == INSN_UID (p))
1336 return 1;
1337 break;
1338
1339 case JUMP_INSN:
1340 /* Jump insn: if this is the last use, we win. */
1341 if (REGNO_LAST_UID (regno) == INSN_UID (p))
1342 return 1;
1343 /* Otherwise, it's the end of the basic block, so we lose. */
1344 return 0;
1345
1346 case CODE_LABEL:
1347 case BARRIER:
1348 /* It's the end of the basic block, so we lose. */
1349 return 0;
1350
1351 default:
1352 break;
1353 }
1354 }
1355
1356 /* The "last use" that was recorded can't be found after the first
1357 use. This can happen when the last use was deleted while
1358 processing an inner loop, this inner loop was then completely
1359 unrolled, and the outer loop is always exited after the inner loop,
1360 so that everything after the first use becomes a single basic block. */
1361 return 1;
1362 }
1363 \f
1364 /* Compute the benefit of eliminating the insns in the block whose
1365 last insn is LAST. This may be a group of insns used to compute a
1366 value directly or can contain a library call. */
1367
1368 static int
1369 libcall_benefit (rtx last)
1370 {
1371 rtx insn;
1372 int benefit = 0;
1373
1374 for (insn = XEXP (find_reg_note (last, REG_RETVAL, NULL_RTX), 0);
1375 insn != last; insn = NEXT_INSN (insn))
1376 {
1377 if (GET_CODE (insn) == CALL_INSN)
1378 benefit += 10; /* Assume at least this many insns in a library
1379 routine. */
1380 else if (GET_CODE (insn) == INSN
1381 && GET_CODE (PATTERN (insn)) != USE
1382 && GET_CODE (PATTERN (insn)) != CLOBBER)
1383 benefit++;
1384 }
1385
1386 return benefit;
1387 }
1388 \f
1389 /* Skip COUNT insns from INSN, counting library calls as 1 insn. */
1390
1391 static rtx
1392 skip_consec_insns (rtx insn, int count)
1393 {
1394 for (; count > 0; count--)
1395 {
1396 rtx temp;
1397
1398 /* If first insn of libcall sequence, skip to end. */
1399 /* Do this at start of loop, since INSN is guaranteed to
1400 be an insn here. */
1401 if (GET_CODE (insn) != NOTE
1402 && (temp = find_reg_note (insn, REG_LIBCALL, NULL_RTX)))
1403 insn = XEXP (temp, 0);
1404
1405 do
1406 insn = NEXT_INSN (insn);
1407 while (GET_CODE (insn) == NOTE);
1408 }
1409
1410 return insn;
1411 }
1412
1413 /* Ignore any movable whose insn falls within a libcall
1414 which is part of another movable.
1415 We make use of the fact that the movable for the libcall value
1416 was made later and so appears later on the chain. */
1417
1418 static void
1419 ignore_some_movables (struct loop_movables *movables)
1420 {
1421 struct movable *m, *m1;
1422
1423 for (m = movables->head; m; m = m->next)
1424 {
1425 /* Is this a movable for the value of a libcall? */
1426 rtx note = find_reg_note (m->insn, REG_RETVAL, NULL_RTX);
1427 if (note)
1428 {
1429 rtx insn;
1430 /* Check for earlier movables inside that range,
1431 and mark them invalid. We cannot use LUIDs here because
1432 insns created by loop.c for prior loops don't have LUIDs.
1433 Rather than reject all such insns from movables, we just
1434 explicitly check each insn in the libcall (since invariant
1435 libcalls aren't that common). */
1436 for (insn = XEXP (note, 0); insn != m->insn; insn = NEXT_INSN (insn))
1437 for (m1 = movables->head; m1 != m; m1 = m1->next)
1438 if (m1->insn == insn)
1439 m1->done = 1;
1440 }
1441 }
1442 }
1443
1444 /* For each movable insn, see if the reg that it loads
1445 leads when it dies right into another conditionally movable insn.
1446 If so, record that the second insn "forces" the first one,
1447 since the second can be moved only if the first is. */
1448
1449 static void
1450 force_movables (struct loop_movables *movables)
1451 {
1452 struct movable *m, *m1;
1453
1454 for (m1 = movables->head; m1; m1 = m1->next)
1455 /* Omit this if moving just the (SET (REG) 0) of a zero-extend. */
1456 if (!m1->partial && !m1->done)
1457 {
1458 int regno = m1->regno;
1459 for (m = m1->next; m; m = m->next)
1460 /* ??? Could this be a bug? What if CSE caused the
1461 register of M1 to be used after this insn?
1462 Since CSE does not update regno_last_uid,
1463 this insn M->insn might not be where it dies.
1464 But very likely this doesn't matter; what matters is
1465 that M's reg is computed from M1's reg. */
1466 if (INSN_UID (m->insn) == REGNO_LAST_UID (regno)
1467 && !m->done)
1468 break;
1469 if (m != 0 && m->set_src == m1->set_dest
1470 /* If m->consec, m->set_src isn't valid. */
1471 && m->consec == 0)
1472 m = 0;
1473
1474 /* Increase the priority of the moving the first insn
1475 since it permits the second to be moved as well.
1476 Likewise for insns already forced by the first insn. */
1477 if (m != 0)
1478 {
1479 struct movable *m2;
1480
1481 m->forces = m1;
1482 for (m2 = m1; m2; m2 = m2->forces)
1483 {
1484 m2->lifetime += m->lifetime;
1485 m2->savings += m->savings;
1486 }
1487 }
1488 }
1489 }
1490 \f
1491 /* Find invariant expressions that are equal and can be combined into
1492 one register. */
1493
1494 static void
1495 combine_movables (struct loop_movables *movables, struct loop_regs *regs)
1496 {
1497 struct movable *m;
1498 char *matched_regs = xmalloc (regs->num);
1499 enum machine_mode mode;
1500
1501 /* Regs that are set more than once are not allowed to match
1502 or be matched. I'm no longer sure why not. */
1503 /* Only pseudo registers are allowed to match or be matched,
1504 since move_movables does not validate the change. */
1505 /* Perhaps testing m->consec_sets would be more appropriate here? */
1506
1507 for (m = movables->head; m; m = m->next)
1508 if (m->match == 0 && regs->array[m->regno].n_times_set == 1
1509 && m->regno >= FIRST_PSEUDO_REGISTER
1510 && !m->insert_temp
1511 && !m->partial)
1512 {
1513 struct movable *m1;
1514 int regno = m->regno;
1515
1516 memset (matched_regs, 0, regs->num);
1517 matched_regs[regno] = 1;
1518
1519 /* We want later insns to match the first one. Don't make the first
1520 one match any later ones. So start this loop at m->next. */
1521 for (m1 = m->next; m1; m1 = m1->next)
1522 if (m != m1 && m1->match == 0
1523 && !m1->insert_temp
1524 && regs->array[m1->regno].n_times_set == 1
1525 && m1->regno >= FIRST_PSEUDO_REGISTER
1526 /* A reg used outside the loop mustn't be eliminated. */
1527 && !m1->global
1528 /* A reg used for zero-extending mustn't be eliminated. */
1529 && !m1->partial
1530 && (matched_regs[m1->regno]
1531 ||
1532 (
1533 /* Can combine regs with different modes loaded from the
1534 same constant only if the modes are the same or
1535 if both are integer modes with M wider or the same
1536 width as M1. The check for integer is redundant, but
1537 safe, since the only case of differing destination
1538 modes with equal sources is when both sources are
1539 VOIDmode, i.e., CONST_INT. */
1540 (GET_MODE (m->set_dest) == GET_MODE (m1->set_dest)
1541 || (GET_MODE_CLASS (GET_MODE (m->set_dest)) == MODE_INT
1542 && GET_MODE_CLASS (GET_MODE (m1->set_dest)) == MODE_INT
1543 && (GET_MODE_BITSIZE (GET_MODE (m->set_dest))
1544 >= GET_MODE_BITSIZE (GET_MODE (m1->set_dest)))))
1545 /* See if the source of M1 says it matches M. */
1546 && ((REG_P (m1->set_src)
1547 && matched_regs[REGNO (m1->set_src)])
1548 || rtx_equal_for_loop_p (m->set_src, m1->set_src,
1549 movables, regs))))
1550 && ((m->dependencies == m1->dependencies)
1551 || rtx_equal_p (m->dependencies, m1->dependencies)))
1552 {
1553 m->lifetime += m1->lifetime;
1554 m->savings += m1->savings;
1555 m1->done = 1;
1556 m1->match = m;
1557 matched_regs[m1->regno] = 1;
1558 }
1559 }
1560
1561 /* Now combine the regs used for zero-extension.
1562 This can be done for those not marked `global'
1563 provided their lives don't overlap. */
1564
1565 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1566 mode = GET_MODE_WIDER_MODE (mode))
1567 {
1568 struct movable *m0 = 0;
1569
1570 /* Combine all the registers for extension from mode MODE.
1571 Don't combine any that are used outside this loop. */
1572 for (m = movables->head; m; m = m->next)
1573 if (m->partial && ! m->global
1574 && mode == GET_MODE (SET_SRC (PATTERN (NEXT_INSN (m->insn)))))
1575 {
1576 struct movable *m1;
1577
1578 int first = REGNO_FIRST_LUID (m->regno);
1579 int last = REGNO_LAST_LUID (m->regno);
1580
1581 if (m0 == 0)
1582 {
1583 /* First one: don't check for overlap, just record it. */
1584 m0 = m;
1585 continue;
1586 }
1587
1588 /* Make sure they extend to the same mode.
1589 (Almost always true.) */
1590 if (GET_MODE (m->set_dest) != GET_MODE (m0->set_dest))
1591 continue;
1592
1593 /* We already have one: check for overlap with those
1594 already combined together. */
1595 for (m1 = movables->head; m1 != m; m1 = m1->next)
1596 if (m1 == m0 || (m1->partial && m1->match == m0))
1597 if (! (REGNO_FIRST_LUID (m1->regno) > last
1598 || REGNO_LAST_LUID (m1->regno) < first))
1599 goto overlap;
1600
1601 /* No overlap: we can combine this with the others. */
1602 m0->lifetime += m->lifetime;
1603 m0->savings += m->savings;
1604 m->done = 1;
1605 m->match = m0;
1606
1607 overlap:
1608 ;
1609 }
1610 }
1611
1612 /* Clean up. */
1613 free (matched_regs);
1614 }
1615
1616 /* Returns the number of movable instructions in LOOP that were not
1617 moved outside the loop. */
1618
1619 static int
1620 num_unmoved_movables (const struct loop *loop)
1621 {
1622 int num = 0;
1623 struct movable *m;
1624
1625 for (m = LOOP_MOVABLES (loop)->head; m; m = m->next)
1626 if (!m->done)
1627 ++num;
1628
1629 return num;
1630 }
1631
1632 \f
1633 /* Return 1 if regs X and Y will become the same if moved. */
1634
1635 static int
1636 regs_match_p (rtx x, rtx y, struct loop_movables *movables)
1637 {
1638 unsigned int xn = REGNO (x);
1639 unsigned int yn = REGNO (y);
1640 struct movable *mx, *my;
1641
1642 for (mx = movables->head; mx; mx = mx->next)
1643 if (mx->regno == xn)
1644 break;
1645
1646 for (my = movables->head; my; my = my->next)
1647 if (my->regno == yn)
1648 break;
1649
1650 return (mx && my
1651 && ((mx->match == my->match && mx->match != 0)
1652 || mx->match == my
1653 || mx == my->match));
1654 }
1655
1656 /* Return 1 if X and Y are identical-looking rtx's.
1657 This is the Lisp function EQUAL for rtx arguments.
1658
1659 If two registers are matching movables or a movable register and an
1660 equivalent constant, consider them equal. */
1661
1662 static int
1663 rtx_equal_for_loop_p (rtx x, rtx y, struct loop_movables *movables,
1664 struct loop_regs *regs)
1665 {
1666 int i;
1667 int j;
1668 struct movable *m;
1669 enum rtx_code code;
1670 const char *fmt;
1671
1672 if (x == y)
1673 return 1;
1674 if (x == 0 || y == 0)
1675 return 0;
1676
1677 code = GET_CODE (x);
1678
1679 /* If we have a register and a constant, they may sometimes be
1680 equal. */
1681 if (REG_P (x) && regs->array[REGNO (x)].set_in_loop == -2
1682 && CONSTANT_P (y))
1683 {
1684 for (m = movables->head; m; m = m->next)
1685 if (m->move_insn && m->regno == REGNO (x)
1686 && rtx_equal_p (m->set_src, y))
1687 return 1;
1688 }
1689 else if (REG_P (y) && regs->array[REGNO (y)].set_in_loop == -2
1690 && CONSTANT_P (x))
1691 {
1692 for (m = movables->head; m; m = m->next)
1693 if (m->move_insn && m->regno == REGNO (y)
1694 && rtx_equal_p (m->set_src, x))
1695 return 1;
1696 }
1697
1698 /* Otherwise, rtx's of different codes cannot be equal. */
1699 if (code != GET_CODE (y))
1700 return 0;
1701
1702 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.
1703 (REG:SI x) and (REG:HI x) are NOT equivalent. */
1704
1705 if (GET_MODE (x) != GET_MODE (y))
1706 return 0;
1707
1708 /* These three types of rtx's can be compared nonrecursively. */
1709 if (code == REG)
1710 return (REGNO (x) == REGNO (y) || regs_match_p (x, y, movables));
1711
1712 if (code == LABEL_REF)
1713 return XEXP (x, 0) == XEXP (y, 0);
1714 if (code == SYMBOL_REF)
1715 return XSTR (x, 0) == XSTR (y, 0);
1716
1717 /* Compare the elements. If any pair of corresponding elements
1718 fail to match, return 0 for the whole things. */
1719
1720 fmt = GET_RTX_FORMAT (code);
1721 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1722 {
1723 switch (fmt[i])
1724 {
1725 case 'w':
1726 if (XWINT (x, i) != XWINT (y, i))
1727 return 0;
1728 break;
1729
1730 case 'i':
1731 if (XINT (x, i) != XINT (y, i))
1732 return 0;
1733 break;
1734
1735 case 'E':
1736 /* Two vectors must have the same length. */
1737 if (XVECLEN (x, i) != XVECLEN (y, i))
1738 return 0;
1739
1740 /* And the corresponding elements must match. */
1741 for (j = 0; j < XVECLEN (x, i); j++)
1742 if (rtx_equal_for_loop_p (XVECEXP (x, i, j), XVECEXP (y, i, j),
1743 movables, regs) == 0)
1744 return 0;
1745 break;
1746
1747 case 'e':
1748 if (rtx_equal_for_loop_p (XEXP (x, i), XEXP (y, i), movables, regs)
1749 == 0)
1750 return 0;
1751 break;
1752
1753 case 's':
1754 if (strcmp (XSTR (x, i), XSTR (y, i)))
1755 return 0;
1756 break;
1757
1758 case 'u':
1759 /* These are just backpointers, so they don't matter. */
1760 break;
1761
1762 case '0':
1763 break;
1764
1765 /* It is believed that rtx's at this level will never
1766 contain anything but integers and other rtx's,
1767 except for within LABEL_REFs and SYMBOL_REFs. */
1768 default:
1769 abort ();
1770 }
1771 }
1772 return 1;
1773 }
1774 \f
1775 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to all
1776 insns in INSNS which use the reference. LABEL_NUSES for CODE_LABEL
1777 references is incremented once for each added note. */
1778
1779 static void
1780 add_label_notes (rtx x, rtx insns)
1781 {
1782 enum rtx_code code = GET_CODE (x);
1783 int i, j;
1784 const char *fmt;
1785 rtx insn;
1786
1787 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
1788 {
1789 /* This code used to ignore labels that referred to dispatch tables to
1790 avoid flow generating (slightly) worse code.
1791
1792 We no longer ignore such label references (see LABEL_REF handling in
1793 mark_jump_label for additional information). */
1794 for (insn = insns; insn; insn = NEXT_INSN (insn))
1795 if (reg_mentioned_p (XEXP (x, 0), insn))
1796 {
1797 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, XEXP (x, 0),
1798 REG_NOTES (insn));
1799 if (LABEL_P (XEXP (x, 0)))
1800 LABEL_NUSES (XEXP (x, 0))++;
1801 }
1802 }
1803
1804 fmt = GET_RTX_FORMAT (code);
1805 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1806 {
1807 if (fmt[i] == 'e')
1808 add_label_notes (XEXP (x, i), insns);
1809 else if (fmt[i] == 'E')
1810 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
1811 add_label_notes (XVECEXP (x, i, j), insns);
1812 }
1813 }
1814 \f
1815 /* Scan MOVABLES, and move the insns that deserve to be moved.
1816 If two matching movables are combined, replace one reg with the
1817 other throughout. */
1818
1819 static void
1820 move_movables (struct loop *loop, struct loop_movables *movables,
1821 int threshold, int insn_count)
1822 {
1823 struct loop_regs *regs = LOOP_REGS (loop);
1824 int nregs = regs->num;
1825 rtx new_start = 0;
1826 struct movable *m;
1827 rtx p;
1828 rtx loop_start = loop->start;
1829 rtx loop_end = loop->end;
1830 /* Map of pseudo-register replacements to handle combining
1831 when we move several insns that load the same value
1832 into different pseudo-registers. */
1833 rtx *reg_map = xcalloc (nregs, sizeof (rtx));
1834 char *already_moved = xcalloc (nregs, sizeof (char));
1835
1836 for (m = movables->head; m; m = m->next)
1837 {
1838 /* Describe this movable insn. */
1839
1840 if (loop_dump_stream)
1841 {
1842 fprintf (loop_dump_stream, "Insn %d: regno %d (life %d), ",
1843 INSN_UID (m->insn), m->regno, m->lifetime);
1844 if (m->consec > 0)
1845 fprintf (loop_dump_stream, "consec %d, ", m->consec);
1846 if (m->cond)
1847 fprintf (loop_dump_stream, "cond ");
1848 if (m->force)
1849 fprintf (loop_dump_stream, "force ");
1850 if (m->global)
1851 fprintf (loop_dump_stream, "global ");
1852 if (m->done)
1853 fprintf (loop_dump_stream, "done ");
1854 if (m->move_insn)
1855 fprintf (loop_dump_stream, "move-insn ");
1856 if (m->match)
1857 fprintf (loop_dump_stream, "matches %d ",
1858 INSN_UID (m->match->insn));
1859 if (m->forces)
1860 fprintf (loop_dump_stream, "forces %d ",
1861 INSN_UID (m->forces->insn));
1862 }
1863
1864 /* Ignore the insn if it's already done (it matched something else).
1865 Otherwise, see if it is now safe to move. */
1866
1867 if (!m->done
1868 && (! m->cond
1869 || (1 == loop_invariant_p (loop, m->set_src)
1870 && (m->dependencies == 0
1871 || 1 == loop_invariant_p (loop, m->dependencies))
1872 && (m->consec == 0
1873 || 1 == consec_sets_invariant_p (loop, m->set_dest,
1874 m->consec + 1,
1875 m->insn))))
1876 && (! m->forces || m->forces->done))
1877 {
1878 int regno;
1879 rtx p;
1880 int savings = m->savings;
1881
1882 /* We have an insn that is safe to move.
1883 Compute its desirability. */
1884
1885 p = m->insn;
1886 regno = m->regno;
1887
1888 if (loop_dump_stream)
1889 fprintf (loop_dump_stream, "savings %d ", savings);
1890
1891 if (regs->array[regno].moved_once && loop_dump_stream)
1892 fprintf (loop_dump_stream, "halved since already moved ");
1893
1894 /* An insn MUST be moved if we already moved something else
1895 which is safe only if this one is moved too: that is,
1896 if already_moved[REGNO] is nonzero. */
1897
1898 /* An insn is desirable to move if the new lifetime of the
1899 register is no more than THRESHOLD times the old lifetime.
1900 If it's not desirable, it means the loop is so big
1901 that moving won't speed things up much,
1902 and it is liable to make register usage worse. */
1903
1904 /* It is also desirable to move if it can be moved at no
1905 extra cost because something else was already moved. */
1906
1907 if (already_moved[regno]
1908 || flag_move_all_movables
1909 || (threshold * savings * m->lifetime) >=
1910 (regs->array[regno].moved_once ? insn_count * 2 : insn_count)
1911 || (m->forces && m->forces->done
1912 && regs->array[m->forces->regno].n_times_set == 1))
1913 {
1914 int count;
1915 struct movable *m1;
1916 rtx first = NULL_RTX;
1917 rtx newreg = NULL_RTX;
1918
1919 if (m->insert_temp)
1920 newreg = gen_reg_rtx (GET_MODE (m->set_dest));
1921
1922 /* Now move the insns that set the reg. */
1923
1924 if (m->partial && m->match)
1925 {
1926 rtx newpat, i1;
1927 rtx r1, r2;
1928 /* Find the end of this chain of matching regs.
1929 Thus, we load each reg in the chain from that one reg.
1930 And that reg is loaded with 0 directly,
1931 since it has ->match == 0. */
1932 for (m1 = m; m1->match; m1 = m1->match);
1933 newpat = gen_move_insn (SET_DEST (PATTERN (m->insn)),
1934 SET_DEST (PATTERN (m1->insn)));
1935 i1 = loop_insn_hoist (loop, newpat);
1936
1937 /* Mark the moved, invariant reg as being allowed to
1938 share a hard reg with the other matching invariant. */
1939 REG_NOTES (i1) = REG_NOTES (m->insn);
1940 r1 = SET_DEST (PATTERN (m->insn));
1941 r2 = SET_DEST (PATTERN (m1->insn));
1942 regs_may_share
1943 = gen_rtx_EXPR_LIST (VOIDmode, r1,
1944 gen_rtx_EXPR_LIST (VOIDmode, r2,
1945 regs_may_share));
1946 delete_insn (m->insn);
1947
1948 if (new_start == 0)
1949 new_start = i1;
1950
1951 if (loop_dump_stream)
1952 fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
1953 }
1954 /* If we are to re-generate the item being moved with a
1955 new move insn, first delete what we have and then emit
1956 the move insn before the loop. */
1957 else if (m->move_insn)
1958 {
1959 rtx i1, temp, seq;
1960
1961 for (count = m->consec; count >= 0; count--)
1962 {
1963 /* If this is the first insn of a library call sequence,
1964 something is very wrong. */
1965 if (GET_CODE (p) != NOTE
1966 && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
1967 abort ();
1968
1969 /* If this is the last insn of a libcall sequence, then
1970 delete every insn in the sequence except the last.
1971 The last insn is handled in the normal manner. */
1972 if (GET_CODE (p) != NOTE
1973 && (temp = find_reg_note (p, REG_RETVAL, NULL_RTX)))
1974 {
1975 temp = XEXP (temp, 0);
1976 while (temp != p)
1977 temp = delete_insn (temp);
1978 }
1979
1980 temp = p;
1981 p = delete_insn (p);
1982
1983 /* simplify_giv_expr expects that it can walk the insns
1984 at m->insn forwards and see this old sequence we are
1985 tossing here. delete_insn does preserve the next
1986 pointers, but when we skip over a NOTE we must fix
1987 it up. Otherwise that code walks into the non-deleted
1988 insn stream. */
1989 while (p && GET_CODE (p) == NOTE)
1990 p = NEXT_INSN (temp) = NEXT_INSN (p);
1991
1992 if (m->insert_temp)
1993 {
1994 /* Replace the original insn with a move from
1995 our newly created temp. */
1996 start_sequence ();
1997 emit_move_insn (m->set_dest, newreg);
1998 seq = get_insns ();
1999 end_sequence ();
2000 emit_insn_before (seq, p);
2001 }
2002 }
2003
2004 start_sequence ();
2005 emit_move_insn (m->insert_temp ? newreg : m->set_dest,
2006 m->set_src);
2007 seq = get_insns ();
2008 end_sequence ();
2009
2010 add_label_notes (m->set_src, seq);
2011
2012 i1 = loop_insn_hoist (loop, seq);
2013 if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
2014 set_unique_reg_note (i1,
2015 m->is_equiv ? REG_EQUIV : REG_EQUAL,
2016 m->set_src);
2017
2018 if (loop_dump_stream)
2019 fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
2020
2021 /* The more regs we move, the less we like moving them. */
2022 threshold -= 3;
2023 }
2024 else
2025 {
2026 for (count = m->consec; count >= 0; count--)
2027 {
2028 rtx i1, temp;
2029
2030 /* If first insn of libcall sequence, skip to end. */
2031 /* Do this at start of loop, since p is guaranteed to
2032 be an insn here. */
2033 if (GET_CODE (p) != NOTE
2034 && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
2035 p = XEXP (temp, 0);
2036
2037 /* If last insn of libcall sequence, move all
2038 insns except the last before the loop. The last
2039 insn is handled in the normal manner. */
2040 if (GET_CODE (p) != NOTE
2041 && (temp = find_reg_note (p, REG_RETVAL, NULL_RTX)))
2042 {
2043 rtx fn_address = 0;
2044 rtx fn_reg = 0;
2045 rtx fn_address_insn = 0;
2046
2047 first = 0;
2048 for (temp = XEXP (temp, 0); temp != p;
2049 temp = NEXT_INSN (temp))
2050 {
2051 rtx body;
2052 rtx n;
2053 rtx next;
2054
2055 if (GET_CODE (temp) == NOTE)
2056 continue;
2057
2058 body = PATTERN (temp);
2059
2060 /* Find the next insn after TEMP,
2061 not counting USE or NOTE insns. */
2062 for (next = NEXT_INSN (temp); next != p;
2063 next = NEXT_INSN (next))
2064 if (! (GET_CODE (next) == INSN
2065 && GET_CODE (PATTERN (next)) == USE)
2066 && GET_CODE (next) != NOTE)
2067 break;
2068
2069 /* If that is the call, this may be the insn
2070 that loads the function address.
2071
2072 Extract the function address from the insn
2073 that loads it into a register.
2074 If this insn was cse'd, we get incorrect code.
2075
2076 So emit a new move insn that copies the
2077 function address into the register that the
2078 call insn will use. flow.c will delete any
2079 redundant stores that we have created. */
2080 if (GET_CODE (next) == CALL_INSN
2081 && GET_CODE (body) == SET
2082 && REG_P (SET_DEST (body))
2083 && (n = find_reg_note (temp, REG_EQUAL,
2084 NULL_RTX)))
2085 {
2086 fn_reg = SET_SRC (body);
2087 if (!REG_P (fn_reg))
2088 fn_reg = SET_DEST (body);
2089 fn_address = XEXP (n, 0);
2090 fn_address_insn = temp;
2091 }
2092 /* We have the call insn.
2093 If it uses the register we suspect it might,
2094 load it with the correct address directly. */
2095 if (GET_CODE (temp) == CALL_INSN
2096 && fn_address != 0
2097 && reg_referenced_p (fn_reg, body))
2098 loop_insn_emit_after (loop, 0, fn_address_insn,
2099 gen_move_insn
2100 (fn_reg, fn_address));
2101
2102 if (GET_CODE (temp) == CALL_INSN)
2103 {
2104 i1 = loop_call_insn_hoist (loop, body);
2105 /* Because the USAGE information potentially
2106 contains objects other than hard registers
2107 we need to copy it. */
2108 if (CALL_INSN_FUNCTION_USAGE (temp))
2109 CALL_INSN_FUNCTION_USAGE (i1)
2110 = copy_rtx (CALL_INSN_FUNCTION_USAGE (temp));
2111 }
2112 else
2113 i1 = loop_insn_hoist (loop, body);
2114 if (first == 0)
2115 first = i1;
2116 if (temp == fn_address_insn)
2117 fn_address_insn = i1;
2118 REG_NOTES (i1) = REG_NOTES (temp);
2119 REG_NOTES (temp) = NULL;
2120 delete_insn (temp);
2121 }
2122 if (new_start == 0)
2123 new_start = first;
2124 }
2125 if (m->savemode != VOIDmode)
2126 {
2127 /* P sets REG to zero; but we should clear only
2128 the bits that are not covered by the mode
2129 m->savemode. */
2130 rtx reg = m->set_dest;
2131 rtx sequence;
2132 rtx tem;
2133
2134 start_sequence ();
2135 tem = expand_simple_binop
2136 (GET_MODE (reg), AND, reg,
2137 GEN_INT ((((HOST_WIDE_INT) 1
2138 << GET_MODE_BITSIZE (m->savemode)))
2139 - 1),
2140 reg, 1, OPTAB_LIB_WIDEN);
2141 if (tem == 0)
2142 abort ();
2143 if (tem != reg)
2144 emit_move_insn (reg, tem);
2145 sequence = get_insns ();
2146 end_sequence ();
2147 i1 = loop_insn_hoist (loop, sequence);
2148 }
2149 else if (GET_CODE (p) == CALL_INSN)
2150 {
2151 i1 = loop_call_insn_hoist (loop, PATTERN (p));
2152 /* Because the USAGE information potentially
2153 contains objects other than hard registers
2154 we need to copy it. */
2155 if (CALL_INSN_FUNCTION_USAGE (p))
2156 CALL_INSN_FUNCTION_USAGE (i1)
2157 = copy_rtx (CALL_INSN_FUNCTION_USAGE (p));
2158 }
2159 else if (count == m->consec && m->move_insn_first)
2160 {
2161 rtx seq;
2162 /* The SET_SRC might not be invariant, so we must
2163 use the REG_EQUAL note. */
2164 start_sequence ();
2165 emit_move_insn (m->insert_temp ? newreg : m->set_dest,
2166 m->set_src);
2167 seq = get_insns ();
2168 end_sequence ();
2169
2170 add_label_notes (m->set_src, seq);
2171
2172 i1 = loop_insn_hoist (loop, seq);
2173 if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
2174 set_unique_reg_note (i1, m->is_equiv ? REG_EQUIV
2175 : REG_EQUAL, m->set_src);
2176 }
2177 else if (m->insert_temp)
2178 {
2179 rtx *reg_map2 = xcalloc (REGNO (newreg),
2180 sizeof(rtx));
2181 reg_map2 [m->regno] = newreg;
2182
2183 i1 = loop_insn_hoist (loop, copy_rtx (PATTERN (p)));
2184 replace_regs (i1, reg_map2, REGNO (newreg), 1);
2185 free (reg_map2);
2186 }
2187 else
2188 i1 = loop_insn_hoist (loop, PATTERN (p));
2189
2190 if (REG_NOTES (i1) == 0)
2191 {
2192 REG_NOTES (i1) = REG_NOTES (p);
2193 REG_NOTES (p) = NULL;
2194
2195 /* If there is a REG_EQUAL note present whose value
2196 is not loop invariant, then delete it, since it
2197 may cause problems with later optimization passes.
2198 It is possible for cse to create such notes
2199 like this as a result of record_jump_cond. */
2200
2201 if ((temp = find_reg_note (i1, REG_EQUAL, NULL_RTX))
2202 && ! loop_invariant_p (loop, XEXP (temp, 0)))
2203 remove_note (i1, temp);
2204 }
2205
2206 if (new_start == 0)
2207 new_start = i1;
2208
2209 if (loop_dump_stream)
2210 fprintf (loop_dump_stream, " moved to %d",
2211 INSN_UID (i1));
2212
2213 /* If library call, now fix the REG_NOTES that contain
2214 insn pointers, namely REG_LIBCALL on FIRST
2215 and REG_RETVAL on I1. */
2216 if ((temp = find_reg_note (i1, REG_RETVAL, NULL_RTX)))
2217 {
2218 XEXP (temp, 0) = first;
2219 temp = find_reg_note (first, REG_LIBCALL, NULL_RTX);
2220 XEXP (temp, 0) = i1;
2221 }
2222
2223 temp = p;
2224 delete_insn (p);
2225 p = NEXT_INSN (p);
2226
2227 /* simplify_giv_expr expects that it can walk the insns
2228 at m->insn forwards and see this old sequence we are
2229 tossing here. delete_insn does preserve the next
2230 pointers, but when we skip over a NOTE we must fix
2231 it up. Otherwise that code walks into the non-deleted
2232 insn stream. */
2233 while (p && GET_CODE (p) == NOTE)
2234 p = NEXT_INSN (temp) = NEXT_INSN (p);
2235
2236 if (m->insert_temp)
2237 {
2238 rtx seq;
2239 /* Replace the original insn with a move from
2240 our newly created temp. */
2241 start_sequence ();
2242 emit_move_insn (m->set_dest, newreg);
2243 seq = get_insns ();
2244 end_sequence ();
2245 emit_insn_before (seq, p);
2246 }
2247 }
2248
2249 /* The more regs we move, the less we like moving them. */
2250 threshold -= 3;
2251 }
2252
2253 m->done = 1;
2254
2255 if (!m->insert_temp)
2256 {
2257 /* Any other movable that loads the same register
2258 MUST be moved. */
2259 already_moved[regno] = 1;
2260
2261 /* This reg has been moved out of one loop. */
2262 regs->array[regno].moved_once = 1;
2263
2264 /* The reg set here is now invariant. */
2265 if (! m->partial)
2266 {
2267 int i;
2268 for (i = 0; i < LOOP_REGNO_NREGS (regno, m->set_dest); i++)
2269 regs->array[regno+i].set_in_loop = 0;
2270 }
2271
2272 /* Change the length-of-life info for the register
2273 to say it lives at least the full length of this loop.
2274 This will help guide optimizations in outer loops. */
2275
2276 if (REGNO_FIRST_LUID (regno) > INSN_LUID (loop_start))
2277 /* This is the old insn before all the moved insns.
2278 We can't use the moved insn because it is out of range
2279 in uid_luid. Only the old insns have luids. */
2280 REGNO_FIRST_UID (regno) = INSN_UID (loop_start);
2281 if (REGNO_LAST_LUID (regno) < INSN_LUID (loop_end))
2282 REGNO_LAST_UID (regno) = INSN_UID (loop_end);
2283 }
2284
2285 /* Combine with this moved insn any other matching movables. */
2286
2287 if (! m->partial)
2288 for (m1 = movables->head; m1; m1 = m1->next)
2289 if (m1->match == m)
2290 {
2291 rtx temp;
2292
2293 /* Schedule the reg loaded by M1
2294 for replacement so that shares the reg of M.
2295 If the modes differ (only possible in restricted
2296 circumstances, make a SUBREG.
2297
2298 Note this assumes that the target dependent files
2299 treat REG and SUBREG equally, including within
2300 GO_IF_LEGITIMATE_ADDRESS and in all the
2301 predicates since we never verify that replacing the
2302 original register with a SUBREG results in a
2303 recognizable insn. */
2304 if (GET_MODE (m->set_dest) == GET_MODE (m1->set_dest))
2305 reg_map[m1->regno] = m->set_dest;
2306 else
2307 reg_map[m1->regno]
2308 = gen_lowpart_common (GET_MODE (m1->set_dest),
2309 m->set_dest);
2310
2311 /* Get rid of the matching insn
2312 and prevent further processing of it. */
2313 m1->done = 1;
2314
2315 /* If library call, delete all insns. */
2316 if ((temp = find_reg_note (m1->insn, REG_RETVAL,
2317 NULL_RTX)))
2318 delete_insn_chain (XEXP (temp, 0), m1->insn);
2319 else
2320 delete_insn (m1->insn);
2321
2322 /* Any other movable that loads the same register
2323 MUST be moved. */
2324 already_moved[m1->regno] = 1;
2325
2326 /* The reg merged here is now invariant,
2327 if the reg it matches is invariant. */
2328 if (! m->partial)
2329 {
2330 int i;
2331 for (i = 0;
2332 i < LOOP_REGNO_NREGS (regno, m1->set_dest);
2333 i++)
2334 regs->array[m1->regno+i].set_in_loop = 0;
2335 }
2336 }
2337 }
2338 else if (loop_dump_stream)
2339 fprintf (loop_dump_stream, "not desirable");
2340 }
2341 else if (loop_dump_stream && !m->match)
2342 fprintf (loop_dump_stream, "not safe");
2343
2344 if (loop_dump_stream)
2345 fprintf (loop_dump_stream, "\n");
2346 }
2347
2348 if (new_start == 0)
2349 new_start = loop_start;
2350
2351 /* Go through all the instructions in the loop, making
2352 all the register substitutions scheduled in REG_MAP. */
2353 for (p = new_start; p != loop_end; p = NEXT_INSN (p))
2354 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
2355 || GET_CODE (p) == CALL_INSN)
2356 {
2357 replace_regs (PATTERN (p), reg_map, nregs, 0);
2358 replace_regs (REG_NOTES (p), reg_map, nregs, 0);
2359 INSN_CODE (p) = -1;
2360 }
2361
2362 /* Clean up. */
2363 free (reg_map);
2364 free (already_moved);
2365 }
2366
2367
2368 static void
2369 loop_movables_add (struct loop_movables *movables, struct movable *m)
2370 {
2371 if (movables->head == 0)
2372 movables->head = m;
2373 else
2374 movables->last->next = m;
2375 movables->last = m;
2376 }
2377
2378
2379 static void
2380 loop_movables_free (struct loop_movables *movables)
2381 {
2382 struct movable *m;
2383 struct movable *m_next;
2384
2385 for (m = movables->head; m; m = m_next)
2386 {
2387 m_next = m->next;
2388 free (m);
2389 }
2390 }
2391 \f
2392 #if 0
2393 /* Scan X and replace the address of any MEM in it with ADDR.
2394 REG is the address that MEM should have before the replacement. */
2395
2396 static void
2397 replace_call_address (rtx x, rtx reg, rtx addr)
2398 {
2399 enum rtx_code code;
2400 int i;
2401 const char *fmt;
2402
2403 if (x == 0)
2404 return;
2405 code = GET_CODE (x);
2406 switch (code)
2407 {
2408 case PC:
2409 case CC0:
2410 case CONST_INT:
2411 case CONST_DOUBLE:
2412 case CONST:
2413 case SYMBOL_REF:
2414 case LABEL_REF:
2415 case REG:
2416 return;
2417
2418 case SET:
2419 /* Short cut for very common case. */
2420 replace_call_address (XEXP (x, 1), reg, addr);
2421 return;
2422
2423 case CALL:
2424 /* Short cut for very common case. */
2425 replace_call_address (XEXP (x, 0), reg, addr);
2426 return;
2427
2428 case MEM:
2429 /* If this MEM uses a reg other than the one we expected,
2430 something is wrong. */
2431 if (XEXP (x, 0) != reg)
2432 abort ();
2433 XEXP (x, 0) = addr;
2434 return;
2435
2436 default:
2437 break;
2438 }
2439
2440 fmt = GET_RTX_FORMAT (code);
2441 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2442 {
2443 if (fmt[i] == 'e')
2444 replace_call_address (XEXP (x, i), reg, addr);
2445 else if (fmt[i] == 'E')
2446 {
2447 int j;
2448 for (j = 0; j < XVECLEN (x, i); j++)
2449 replace_call_address (XVECEXP (x, i, j), reg, addr);
2450 }
2451 }
2452 }
2453 #endif
2454 \f
2455 /* Return the number of memory refs to addresses that vary
2456 in the rtx X. */
2457
2458 static int
2459 count_nonfixed_reads (const struct loop *loop, rtx x)
2460 {
2461 enum rtx_code code;
2462 int i;
2463 const char *fmt;
2464 int value;
2465
2466 if (x == 0)
2467 return 0;
2468
2469 code = GET_CODE (x);
2470 switch (code)
2471 {
2472 case PC:
2473 case CC0:
2474 case CONST_INT:
2475 case CONST_DOUBLE:
2476 case CONST:
2477 case SYMBOL_REF:
2478 case LABEL_REF:
2479 case REG:
2480 return 0;
2481
2482 case MEM:
2483 return ((loop_invariant_p (loop, XEXP (x, 0)) != 1)
2484 + count_nonfixed_reads (loop, XEXP (x, 0)));
2485
2486 default:
2487 break;
2488 }
2489
2490 value = 0;
2491 fmt = GET_RTX_FORMAT (code);
2492 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2493 {
2494 if (fmt[i] == 'e')
2495 value += count_nonfixed_reads (loop, XEXP (x, i));
2496 if (fmt[i] == 'E')
2497 {
2498 int j;
2499 for (j = 0; j < XVECLEN (x, i); j++)
2500 value += count_nonfixed_reads (loop, XVECEXP (x, i, j));
2501 }
2502 }
2503 return value;
2504 }
2505 \f
2506 /* Scan a loop setting the elements `cont', `vtop', `loops_enclosed',
2507 `has_call', `has_nonconst_call', `has_volatile', `has_tablejump',
2508 `unknown_address_altered', `unknown_constant_address_altered', and
2509 `num_mem_sets' in LOOP. Also, fill in the array `mems' and the
2510 list `store_mems' in LOOP. */
2511
2512 static void
2513 prescan_loop (struct loop *loop)
2514 {
2515 int level = 1;
2516 rtx insn;
2517 struct loop_info *loop_info = LOOP_INFO (loop);
2518 rtx start = loop->start;
2519 rtx end = loop->end;
2520 /* The label after END. Jumping here is just like falling off the
2521 end of the loop. We use next_nonnote_insn instead of next_label
2522 as a hedge against the (pathological) case where some actual insn
2523 might end up between the two. */
2524 rtx exit_target = next_nonnote_insn (end);
2525
2526 loop_info->has_indirect_jump = indirect_jump_in_function;
2527 loop_info->pre_header_has_call = 0;
2528 loop_info->has_call = 0;
2529 loop_info->has_nonconst_call = 0;
2530 loop_info->has_prefetch = 0;
2531 loop_info->has_volatile = 0;
2532 loop_info->has_tablejump = 0;
2533 loop_info->has_multiple_exit_targets = 0;
2534 loop->level = 1;
2535
2536 loop_info->unknown_address_altered = 0;
2537 loop_info->unknown_constant_address_altered = 0;
2538 loop_info->store_mems = NULL_RTX;
2539 loop_info->first_loop_store_insn = NULL_RTX;
2540 loop_info->mems_idx = 0;
2541 loop_info->num_mem_sets = 0;
2542 /* If loop opts run twice, this was set on 1st pass for 2nd. */
2543 loop_info->preconditioned = NOTE_PRECONDITIONED (end);
2544
2545 for (insn = start; insn && GET_CODE (insn) != CODE_LABEL;
2546 insn = PREV_INSN (insn))
2547 {
2548 if (GET_CODE (insn) == CALL_INSN)
2549 {
2550 loop_info->pre_header_has_call = 1;
2551 break;
2552 }
2553 }
2554
2555 for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
2556 insn = NEXT_INSN (insn))
2557 {
2558 switch (GET_CODE (insn))
2559 {
2560 case NOTE:
2561 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
2562 {
2563 ++level;
2564 /* Count number of loops contained in this one. */
2565 loop->level++;
2566 }
2567 else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END)
2568 --level;
2569 break;
2570
2571 case CALL_INSN:
2572 if (! CONST_OR_PURE_CALL_P (insn))
2573 {
2574 loop_info->unknown_address_altered = 1;
2575 loop_info->has_nonconst_call = 1;
2576 }
2577 else if (pure_call_p (insn))
2578 loop_info->has_nonconst_call = 1;
2579 loop_info->has_call = 1;
2580 if (can_throw_internal (insn))
2581 loop_info->has_multiple_exit_targets = 1;
2582
2583 /* Calls initializing constant objects have CLOBBER of MEM /u in the
2584 attached FUNCTION_USAGE expression list, not accounted for by the
2585 code above. We should note these to avoid missing dependencies in
2586 later references. */
2587 {
2588 rtx fusage_entry;
2589
2590 for (fusage_entry = CALL_INSN_FUNCTION_USAGE (insn);
2591 fusage_entry; fusage_entry = XEXP (fusage_entry, 1))
2592 {
2593 rtx fusage = XEXP (fusage_entry, 0);
2594
2595 if (GET_CODE (fusage) == CLOBBER
2596 && MEM_P (XEXP (fusage, 0))
2597 && RTX_UNCHANGING_P (XEXP (fusage, 0)))
2598 {
2599 note_stores (fusage, note_addr_stored, loop_info);
2600 if (! loop_info->first_loop_store_insn
2601 && loop_info->store_mems)
2602 loop_info->first_loop_store_insn = insn;
2603 }
2604 }
2605 }
2606 break;
2607
2608 case JUMP_INSN:
2609 if (! loop_info->has_multiple_exit_targets)
2610 {
2611 rtx set = pc_set (insn);
2612
2613 if (set)
2614 {
2615 rtx src = SET_SRC (set);
2616 rtx label1, label2;
2617
2618 if (GET_CODE (src) == IF_THEN_ELSE)
2619 {
2620 label1 = XEXP (src, 1);
2621 label2 = XEXP (src, 2);
2622 }
2623 else
2624 {
2625 label1 = src;
2626 label2 = NULL_RTX;
2627 }
2628
2629 do
2630 {
2631 if (label1 && label1 != pc_rtx)
2632 {
2633 if (GET_CODE (label1) != LABEL_REF)
2634 {
2635 /* Something tricky. */
2636 loop_info->has_multiple_exit_targets = 1;
2637 break;
2638 }
2639 else if (XEXP (label1, 0) != exit_target
2640 && LABEL_OUTSIDE_LOOP_P (label1))
2641 {
2642 /* A jump outside the current loop. */
2643 loop_info->has_multiple_exit_targets = 1;
2644 break;
2645 }
2646 }
2647
2648 label1 = label2;
2649 label2 = NULL_RTX;
2650 }
2651 while (label1);
2652 }
2653 else
2654 {
2655 /* A return, or something tricky. */
2656 loop_info->has_multiple_exit_targets = 1;
2657 }
2658 }
2659 /* Fall through. */
2660
2661 case INSN:
2662 if (volatile_refs_p (PATTERN (insn)))
2663 loop_info->has_volatile = 1;
2664
2665 if (GET_CODE (insn) == JUMP_INSN
2666 && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
2667 || GET_CODE (PATTERN (insn)) == ADDR_VEC))
2668 loop_info->has_tablejump = 1;
2669
2670 note_stores (PATTERN (insn), note_addr_stored, loop_info);
2671 if (! loop_info->first_loop_store_insn && loop_info->store_mems)
2672 loop_info->first_loop_store_insn = insn;
2673
2674 if (flag_non_call_exceptions && can_throw_internal (insn))
2675 loop_info->has_multiple_exit_targets = 1;
2676 break;
2677
2678 default:
2679 break;
2680 }
2681 }
2682
2683 /* Now, rescan the loop, setting up the LOOP_MEMS array. */
2684 if (/* An exception thrown by a called function might land us
2685 anywhere. */
2686 ! loop_info->has_nonconst_call
2687 /* We don't want loads for MEMs moved to a location before the
2688 one at which their stack memory becomes allocated. (Note
2689 that this is not a problem for malloc, etc., since those
2690 require actual function calls. */
2691 && ! current_function_calls_alloca
2692 /* There are ways to leave the loop other than falling off the
2693 end. */
2694 && ! loop_info->has_multiple_exit_targets)
2695 for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
2696 insn = NEXT_INSN (insn))
2697 for_each_rtx (&insn, insert_loop_mem, loop_info);
2698
2699 /* BLKmode MEMs are added to LOOP_STORE_MEM as necessary so
2700 that loop_invariant_p and load_mems can use true_dependence
2701 to determine what is really clobbered. */
2702 if (loop_info->unknown_address_altered)
2703 {
2704 rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
2705
2706 loop_info->store_mems
2707 = gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
2708 }
2709 if (loop_info->unknown_constant_address_altered)
2710 {
2711 rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
2712
2713 RTX_UNCHANGING_P (mem) = 1;
2714 loop_info->store_mems
2715 = gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
2716 }
2717 }
2718 \f
2719 /* Invalidate all loops containing LABEL. */
2720
2721 static void
2722 invalidate_loops_containing_label (rtx label)
2723 {
2724 struct loop *loop;
2725 for (loop = uid_loop[INSN_UID (label)]; loop; loop = loop->outer)
2726 loop->invalid = 1;
2727 }
2728
2729 /* Scan the function looking for loops. Record the start and end of each loop.
2730 Also mark as invalid loops any loops that contain a setjmp or are branched
2731 to from outside the loop. */
2732
2733 static void
2734 find_and_verify_loops (rtx f, struct loops *loops)
2735 {
2736 rtx insn;
2737 rtx label;
2738 int num_loops;
2739 struct loop *current_loop;
2740 struct loop *next_loop;
2741 struct loop *loop;
2742
2743 num_loops = loops->num;
2744
2745 compute_luids (f, NULL_RTX, 0);
2746
2747 /* If there are jumps to undefined labels,
2748 treat them as jumps out of any/all loops.
2749 This also avoids writing past end of tables when there are no loops. */
2750 uid_loop[0] = NULL;
2751
2752 /* Find boundaries of loops, mark which loops are contained within
2753 loops, and invalidate loops that have setjmp. */
2754
2755 num_loops = 0;
2756 current_loop = NULL;
2757 for (insn = f; insn; insn = NEXT_INSN (insn))
2758 {
2759 if (GET_CODE (insn) == NOTE)
2760 switch (NOTE_LINE_NUMBER (insn))
2761 {
2762 case NOTE_INSN_LOOP_BEG:
2763 next_loop = loops->array + num_loops;
2764 next_loop->num = num_loops;
2765 num_loops++;
2766 next_loop->start = insn;
2767 next_loop->outer = current_loop;
2768 current_loop = next_loop;
2769 break;
2770
2771 case NOTE_INSN_LOOP_CONT:
2772 current_loop->cont = insn;
2773 break;
2774
2775 case NOTE_INSN_LOOP_VTOP:
2776 current_loop->vtop = insn;
2777 break;
2778
2779 case NOTE_INSN_LOOP_END:
2780 if (! current_loop)
2781 abort ();
2782
2783 current_loop->end = insn;
2784 current_loop = current_loop->outer;
2785 break;
2786
2787 default:
2788 break;
2789 }
2790
2791 if (GET_CODE (insn) == CALL_INSN
2792 && find_reg_note (insn, REG_SETJMP, NULL))
2793 {
2794 /* In this case, we must invalidate our current loop and any
2795 enclosing loop. */
2796 for (loop = current_loop; loop; loop = loop->outer)
2797 {
2798 loop->invalid = 1;
2799 if (loop_dump_stream)
2800 fprintf (loop_dump_stream,
2801 "\nLoop at %d ignored due to setjmp.\n",
2802 INSN_UID (loop->start));
2803 }
2804 }
2805
2806 /* Note that this will mark the NOTE_INSN_LOOP_END note as being in the
2807 enclosing loop, but this doesn't matter. */
2808 uid_loop[INSN_UID (insn)] = current_loop;
2809 }
2810
2811 /* Any loop containing a label used in an initializer must be invalidated,
2812 because it can be jumped into from anywhere. */
2813 for (label = forced_labels; label; label = XEXP (label, 1))
2814 invalidate_loops_containing_label (XEXP (label, 0));
2815
2816 /* Any loop containing a label used for an exception handler must be
2817 invalidated, because it can be jumped into from anywhere. */
2818 for_each_eh_label (invalidate_loops_containing_label);
2819
2820 /* Now scan all insn's in the function. If any JUMP_INSN branches into a
2821 loop that it is not contained within, that loop is marked invalid.
2822 If any INSN or CALL_INSN uses a label's address, then the loop containing
2823 that label is marked invalid, because it could be jumped into from
2824 anywhere.
2825
2826 Also look for blocks of code ending in an unconditional branch that
2827 exits the loop. If such a block is surrounded by a conditional
2828 branch around the block, move the block elsewhere (see below) and
2829 invert the jump to point to the code block. This may eliminate a
2830 label in our loop and will simplify processing by both us and a
2831 possible second cse pass. */
2832
2833 for (insn = f; insn; insn = NEXT_INSN (insn))
2834 if (INSN_P (insn))
2835 {
2836 struct loop *this_loop = uid_loop[INSN_UID (insn)];
2837
2838 if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
2839 {
2840 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
2841 if (note)
2842 invalidate_loops_containing_label (XEXP (note, 0));
2843 }
2844
2845 if (GET_CODE (insn) != JUMP_INSN)
2846 continue;
2847
2848 mark_loop_jump (PATTERN (insn), this_loop);
2849
2850 /* See if this is an unconditional branch outside the loop. */
2851 if (this_loop
2852 && (GET_CODE (PATTERN (insn)) == RETURN
2853 || (any_uncondjump_p (insn)
2854 && onlyjump_p (insn)
2855 && (uid_loop[INSN_UID (JUMP_LABEL (insn))]
2856 != this_loop)))
2857 && get_max_uid () < max_uid_for_loop)
2858 {
2859 rtx p;
2860 rtx our_next = next_real_insn (insn);
2861 rtx last_insn_to_move = NEXT_INSN (insn);
2862 struct loop *dest_loop;
2863 struct loop *outer_loop = NULL;
2864
2865 /* Go backwards until we reach the start of the loop, a label,
2866 or a JUMP_INSN. */
2867 for (p = PREV_INSN (insn);
2868 GET_CODE (p) != CODE_LABEL
2869 && ! (GET_CODE (p) == NOTE
2870 && NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
2871 && GET_CODE (p) != JUMP_INSN;
2872 p = PREV_INSN (p))
2873 ;
2874
2875 /* Check for the case where we have a jump to an inner nested
2876 loop, and do not perform the optimization in that case. */
2877
2878 if (JUMP_LABEL (insn))
2879 {
2880 dest_loop = uid_loop[INSN_UID (JUMP_LABEL (insn))];
2881 if (dest_loop)
2882 {
2883 for (outer_loop = dest_loop; outer_loop;
2884 outer_loop = outer_loop->outer)
2885 if (outer_loop == this_loop)
2886 break;
2887 }
2888 }
2889
2890 /* Make sure that the target of P is within the current loop. */
2891
2892 if (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p)
2893 && uid_loop[INSN_UID (JUMP_LABEL (p))] != this_loop)
2894 outer_loop = this_loop;
2895
2896 /* If we stopped on a JUMP_INSN to the next insn after INSN,
2897 we have a block of code to try to move.
2898
2899 We look backward and then forward from the target of INSN
2900 to find a BARRIER at the same loop depth as the target.
2901 If we find such a BARRIER, we make a new label for the start
2902 of the block, invert the jump in P and point it to that label,
2903 and move the block of code to the spot we found. */
2904
2905 if (! outer_loop
2906 && GET_CODE (p) == JUMP_INSN
2907 && JUMP_LABEL (p) != 0
2908 /* Just ignore jumps to labels that were never emitted.
2909 These always indicate compilation errors. */
2910 && INSN_UID (JUMP_LABEL (p)) != 0
2911 && any_condjump_p (p) && onlyjump_p (p)
2912 && next_real_insn (JUMP_LABEL (p)) == our_next
2913 /* If it's not safe to move the sequence, then we
2914 mustn't try. */
2915 && insns_safe_to_move_p (p, NEXT_INSN (insn),
2916 &last_insn_to_move))
2917 {
2918 rtx target
2919 = JUMP_LABEL (insn) ? JUMP_LABEL (insn) : get_last_insn ();
2920 struct loop *target_loop = uid_loop[INSN_UID (target)];
2921 rtx loc, loc2;
2922 rtx tmp;
2923
2924 /* Search for possible garbage past the conditional jumps
2925 and look for the last barrier. */
2926 for (tmp = last_insn_to_move;
2927 tmp && GET_CODE (tmp) != CODE_LABEL; tmp = NEXT_INSN (tmp))
2928 if (GET_CODE (tmp) == BARRIER)
2929 last_insn_to_move = tmp;
2930
2931 for (loc = target; loc; loc = PREV_INSN (loc))
2932 if (GET_CODE (loc) == BARRIER
2933 /* Don't move things inside a tablejump. */
2934 && ((loc2 = next_nonnote_insn (loc)) == 0
2935 || GET_CODE (loc2) != CODE_LABEL
2936 || (loc2 = next_nonnote_insn (loc2)) == 0
2937 || GET_CODE (loc2) != JUMP_INSN
2938 || (GET_CODE (PATTERN (loc2)) != ADDR_VEC
2939 && GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
2940 && uid_loop[INSN_UID (loc)] == target_loop)
2941 break;
2942
2943 if (loc == 0)
2944 for (loc = target; loc; loc = NEXT_INSN (loc))
2945 if (GET_CODE (loc) == BARRIER
2946 /* Don't move things inside a tablejump. */
2947 && ((loc2 = next_nonnote_insn (loc)) == 0
2948 || GET_CODE (loc2) != CODE_LABEL
2949 || (loc2 = next_nonnote_insn (loc2)) == 0
2950 || GET_CODE (loc2) != JUMP_INSN
2951 || (GET_CODE (PATTERN (loc2)) != ADDR_VEC
2952 && GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
2953 && uid_loop[INSN_UID (loc)] == target_loop)
2954 break;
2955
2956 if (loc)
2957 {
2958 rtx cond_label = JUMP_LABEL (p);
2959 rtx new_label = get_label_after (p);
2960
2961 /* Ensure our label doesn't go away. */
2962 LABEL_NUSES (cond_label)++;
2963
2964 /* Verify that uid_loop is large enough and that
2965 we can invert P. */
2966 if (invert_jump (p, new_label, 1))
2967 {
2968 rtx q, r;
2969
2970 /* If no suitable BARRIER was found, create a suitable
2971 one before TARGET. Since TARGET is a fall through
2972 path, we'll need to insert a jump around our block
2973 and add a BARRIER before TARGET.
2974
2975 This creates an extra unconditional jump outside
2976 the loop. However, the benefits of removing rarely
2977 executed instructions from inside the loop usually
2978 outweighs the cost of the extra unconditional jump
2979 outside the loop. */
2980 if (loc == 0)
2981 {
2982 rtx temp;
2983
2984 temp = gen_jump (JUMP_LABEL (insn));
2985 temp = emit_jump_insn_before (temp, target);
2986 JUMP_LABEL (temp) = JUMP_LABEL (insn);
2987 LABEL_NUSES (JUMP_LABEL (insn))++;
2988 loc = emit_barrier_before (target);
2989 }
2990
2991 /* Include the BARRIER after INSN and copy the
2992 block after LOC. */
2993 if (squeeze_notes (&new_label, &last_insn_to_move))
2994 abort ();
2995 reorder_insns (new_label, last_insn_to_move, loc);
2996
2997 /* All those insns are now in TARGET_LOOP. */
2998 for (q = new_label;
2999 q != NEXT_INSN (last_insn_to_move);
3000 q = NEXT_INSN (q))
3001 uid_loop[INSN_UID (q)] = target_loop;
3002
3003 /* The label jumped to by INSN is no longer a loop
3004 exit. Unless INSN does not have a label (e.g.,
3005 it is a RETURN insn), search loop->exit_labels
3006 to find its label_ref, and remove it. Also turn
3007 off LABEL_OUTSIDE_LOOP_P bit. */
3008 if (JUMP_LABEL (insn))
3009 {
3010 for (q = 0, r = this_loop->exit_labels;
3011 r;
3012 q = r, r = LABEL_NEXTREF (r))
3013 if (XEXP (r, 0) == JUMP_LABEL (insn))
3014 {
3015 LABEL_OUTSIDE_LOOP_P (r) = 0;
3016 if (q)
3017 LABEL_NEXTREF (q) = LABEL_NEXTREF (r);
3018 else
3019 this_loop->exit_labels = LABEL_NEXTREF (r);
3020 break;
3021 }
3022
3023 for (loop = this_loop; loop && loop != target_loop;
3024 loop = loop->outer)
3025 loop->exit_count--;
3026
3027 /* If we didn't find it, then something is
3028 wrong. */
3029 if (! r)
3030 abort ();
3031 }
3032
3033 /* P is now a jump outside the loop, so it must be put
3034 in loop->exit_labels, and marked as such.
3035 The easiest way to do this is to just call
3036 mark_loop_jump again for P. */
3037 mark_loop_jump (PATTERN (p), this_loop);
3038
3039 /* If INSN now jumps to the insn after it,
3040 delete INSN. */
3041 if (JUMP_LABEL (insn) != 0
3042 && (next_real_insn (JUMP_LABEL (insn))
3043 == next_real_insn (insn)))
3044 delete_related_insns (insn);
3045 }
3046
3047 /* Continue the loop after where the conditional
3048 branch used to jump, since the only branch insn
3049 in the block (if it still remains) is an inter-loop
3050 branch and hence needs no processing. */
3051 insn = NEXT_INSN (cond_label);
3052
3053 if (--LABEL_NUSES (cond_label) == 0)
3054 delete_related_insns (cond_label);
3055
3056 /* This loop will be continued with NEXT_INSN (insn). */
3057 insn = PREV_INSN (insn);
3058 }
3059 }
3060 }
3061 }
3062 }
3063
3064 /* If any label in X jumps to a loop different from LOOP_NUM and any of the
3065 loops it is contained in, mark the target loop invalid.
3066
3067 For speed, we assume that X is part of a pattern of a JUMP_INSN. */
3068
3069 static void
3070 mark_loop_jump (rtx x, struct loop *loop)
3071 {
3072 struct loop *dest_loop;
3073 struct loop *outer_loop;
3074 int i;
3075
3076 switch (GET_CODE (x))
3077 {
3078 case PC:
3079 case USE:
3080 case CLOBBER:
3081 case REG:
3082 case MEM:
3083 case CONST_INT:
3084 case CONST_DOUBLE:
3085 case RETURN:
3086 return;
3087
3088 case CONST:
3089 /* There could be a label reference in here. */
3090 mark_loop_jump (XEXP (x, 0), loop);
3091 return;
3092
3093 case PLUS:
3094 case MINUS:
3095 case MULT:
3096 mark_loop_jump (XEXP (x, 0), loop);
3097 mark_loop_jump (XEXP (x, 1), loop);
3098 return;
3099
3100 case LO_SUM:
3101 /* This may refer to a LABEL_REF or SYMBOL_REF. */
3102 mark_loop_jump (XEXP (x, 1), loop);
3103 return;
3104
3105 case SIGN_EXTEND:
3106 case ZERO_EXTEND:
3107 mark_loop_jump (XEXP (x, 0), loop);
3108 return;
3109
3110 case LABEL_REF:
3111 dest_loop = uid_loop[INSN_UID (XEXP (x, 0))];
3112
3113 /* Link together all labels that branch outside the loop. This
3114 is used by final_[bg]iv_value and the loop unrolling code. Also
3115 mark this LABEL_REF so we know that this branch should predict
3116 false. */
3117
3118 /* A check to make sure the label is not in an inner nested loop,
3119 since this does not count as a loop exit. */
3120 if (dest_loop)
3121 {
3122 for (outer_loop = dest_loop; outer_loop;
3123 outer_loop = outer_loop->outer)
3124 if (outer_loop == loop)
3125 break;
3126 }
3127 else
3128 outer_loop = NULL;
3129
3130 if (loop && ! outer_loop)
3131 {
3132 LABEL_OUTSIDE_LOOP_P (x) = 1;
3133 LABEL_NEXTREF (x) = loop->exit_labels;
3134 loop->exit_labels = x;
3135
3136 for (outer_loop = loop;
3137 outer_loop && outer_loop != dest_loop;
3138 outer_loop = outer_loop->outer)
3139 outer_loop->exit_count++;
3140 }
3141
3142 /* If this is inside a loop, but not in the current loop or one enclosed
3143 by it, it invalidates at least one loop. */
3144
3145 if (! dest_loop)
3146 return;
3147
3148 /* We must invalidate every nested loop containing the target of this
3149 label, except those that also contain the jump insn. */
3150
3151 for (; dest_loop; dest_loop = dest_loop->outer)
3152 {
3153 /* Stop when we reach a loop that also contains the jump insn. */
3154 for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
3155 if (dest_loop == outer_loop)
3156 return;
3157
3158 /* If we get here, we know we need to invalidate a loop. */
3159 if (loop_dump_stream && ! dest_loop->invalid)
3160 fprintf (loop_dump_stream,
3161 "\nLoop at %d ignored due to multiple entry points.\n",
3162 INSN_UID (dest_loop->start));
3163
3164 dest_loop->invalid = 1;
3165 }
3166 return;
3167
3168 case SET:
3169 /* If this is not setting pc, ignore. */
3170 if (SET_DEST (x) == pc_rtx)
3171 mark_loop_jump (SET_SRC (x), loop);
3172 return;
3173
3174 case IF_THEN_ELSE:
3175 mark_loop_jump (XEXP (x, 1), loop);
3176 mark_loop_jump (XEXP (x, 2), loop);
3177 return;
3178
3179 case PARALLEL:
3180 case ADDR_VEC:
3181 for (i = 0; i < XVECLEN (x, 0); i++)
3182 mark_loop_jump (XVECEXP (x, 0, i), loop);
3183 return;
3184
3185 case ADDR_DIFF_VEC:
3186 for (i = 0; i < XVECLEN (x, 1); i++)
3187 mark_loop_jump (XVECEXP (x, 1, i), loop);
3188 return;
3189
3190 default:
3191 /* Strictly speaking this is not a jump into the loop, only a possible
3192 jump out of the loop. However, we have no way to link the destination
3193 of this jump onto the list of exit labels. To be safe we mark this
3194 loop and any containing loops as invalid. */
3195 if (loop)
3196 {
3197 for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
3198 {
3199 if (loop_dump_stream && ! outer_loop->invalid)
3200 fprintf (loop_dump_stream,
3201 "\nLoop at %d ignored due to unknown exit jump.\n",
3202 INSN_UID (outer_loop->start));
3203 outer_loop->invalid = 1;
3204 }
3205 }
3206 return;
3207 }
3208 }
3209 \f
3210 /* Return nonzero if there is a label in the range from
3211 insn INSN to and including the insn whose luid is END
3212 INSN must have an assigned luid (i.e., it must not have
3213 been previously created by loop.c). */
3214
3215 static int
3216 labels_in_range_p (rtx insn, int end)
3217 {
3218 while (insn && INSN_LUID (insn) <= end)
3219 {
3220 if (GET_CODE (insn) == CODE_LABEL)
3221 return 1;
3222 insn = NEXT_INSN (insn);
3223 }
3224
3225 return 0;
3226 }
3227
3228 /* Record that a memory reference X is being set. */
3229
3230 static void
3231 note_addr_stored (rtx x, rtx y ATTRIBUTE_UNUSED,
3232 void *data ATTRIBUTE_UNUSED)
3233 {
3234 struct loop_info *loop_info = data;
3235
3236 if (x == 0 || !MEM_P (x))
3237 return;
3238
3239 /* Count number of memory writes.
3240 This affects heuristics in strength_reduce. */
3241 loop_info->num_mem_sets++;
3242
3243 /* BLKmode MEM means all memory is clobbered. */
3244 if (GET_MODE (x) == BLKmode)
3245 {
3246 if (RTX_UNCHANGING_P (x))
3247 loop_info->unknown_constant_address_altered = 1;
3248 else
3249 loop_info->unknown_address_altered = 1;
3250
3251 return;
3252 }
3253
3254 loop_info->store_mems = gen_rtx_EXPR_LIST (VOIDmode, x,
3255 loop_info->store_mems);
3256 }
3257
3258 /* X is a value modified by an INSN that references a biv inside a loop
3259 exit test (ie, X is somehow related to the value of the biv). If X
3260 is a pseudo that is used more than once, then the biv is (effectively)
3261 used more than once. DATA is a pointer to a loop_regs structure. */
3262
3263 static void
3264 note_set_pseudo_multiple_uses (rtx x, rtx y ATTRIBUTE_UNUSED, void *data)
3265 {
3266 struct loop_regs *regs = (struct loop_regs *) data;
3267
3268 if (x == 0)
3269 return;
3270
3271 while (GET_CODE (x) == STRICT_LOW_PART
3272 || GET_CODE (x) == SIGN_EXTRACT
3273 || GET_CODE (x) == ZERO_EXTRACT
3274 || GET_CODE (x) == SUBREG)
3275 x = XEXP (x, 0);
3276
3277 if (!REG_P (x) || REGNO (x) < FIRST_PSEUDO_REGISTER)
3278 return;
3279
3280 /* If we do not have usage information, or if we know the register
3281 is used more than once, note that fact for check_dbra_loop. */
3282 if (REGNO (x) >= max_reg_before_loop
3283 || ! regs->array[REGNO (x)].single_usage
3284 || regs->array[REGNO (x)].single_usage == const0_rtx)
3285 regs->multiple_uses = 1;
3286 }
3287 \f
3288 /* Return nonzero if the rtx X is invariant over the current loop.
3289
3290 The value is 2 if we refer to something only conditionally invariant.
3291
3292 A memory ref is invariant if it is not volatile and does not conflict
3293 with anything stored in `loop_info->store_mems'. */
3294
3295 int
3296 loop_invariant_p (const struct loop *loop, rtx x)
3297 {
3298 struct loop_info *loop_info = LOOP_INFO (loop);
3299 struct loop_regs *regs = LOOP_REGS (loop);
3300 int i;
3301 enum rtx_code code;
3302 const char *fmt;
3303 int conditional = 0;
3304 rtx mem_list_entry;
3305
3306 if (x == 0)
3307 return 1;
3308 code = GET_CODE (x);
3309 switch (code)
3310 {
3311 case CONST_INT:
3312 case CONST_DOUBLE:
3313 case SYMBOL_REF:
3314 case CONST:
3315 return 1;
3316
3317 case LABEL_REF:
3318 /* A LABEL_REF is normally invariant, however, if we are unrolling
3319 loops, and this label is inside the loop, then it isn't invariant.
3320 This is because each unrolled copy of the loop body will have
3321 a copy of this label. If this was invariant, then an insn loading
3322 the address of this label into a register might get moved outside
3323 the loop, and then each loop body would end up using the same label.
3324
3325 We don't know the loop bounds here though, so just fail for all
3326 labels. */
3327 if (flag_old_unroll_loops)
3328 return 0;
3329 else
3330 return 1;
3331
3332 case PC:
3333 case CC0:
3334 case UNSPEC_VOLATILE:
3335 return 0;
3336
3337 case REG:
3338 /* We used to check RTX_UNCHANGING_P (x) here, but that is invalid
3339 since the reg might be set by initialization within the loop. */
3340
3341 if ((x == frame_pointer_rtx || x == hard_frame_pointer_rtx
3342 || x == arg_pointer_rtx || x == pic_offset_table_rtx)
3343 && ! current_function_has_nonlocal_goto)
3344 return 1;
3345
3346 if (LOOP_INFO (loop)->has_call
3347 && REGNO (x) < FIRST_PSEUDO_REGISTER && call_used_regs[REGNO (x)])
3348 return 0;
3349
3350 /* Out-of-range regs can occur when we are called from unrolling.
3351 These registers created by the unroller are set in the loop,
3352 hence are never invariant.
3353 Other out-of-range regs can be generated by load_mems; those that
3354 are written to in the loop are not invariant, while those that are
3355 not written to are invariant. It would be easy for load_mems
3356 to set n_times_set correctly for these registers, however, there
3357 is no easy way to distinguish them from registers created by the
3358 unroller. */
3359
3360 if (REGNO (x) >= (unsigned) regs->num)
3361 return 0;
3362
3363 if (regs->array[REGNO (x)].set_in_loop < 0)
3364 return 2;
3365
3366 return regs->array[REGNO (x)].set_in_loop == 0;
3367
3368 case MEM:
3369 /* Volatile memory references must be rejected. Do this before
3370 checking for read-only items, so that volatile read-only items
3371 will be rejected also. */
3372 if (MEM_VOLATILE_P (x))
3373 return 0;
3374
3375 /* See if there is any dependence between a store and this load. */
3376 mem_list_entry = loop_info->store_mems;
3377 while (mem_list_entry)
3378 {
3379 if (true_dependence (XEXP (mem_list_entry, 0), VOIDmode,
3380 x, rtx_varies_p))
3381 return 0;
3382
3383 mem_list_entry = XEXP (mem_list_entry, 1);
3384 }
3385
3386 /* It's not invalidated by a store in memory
3387 but we must still verify the address is invariant. */
3388 break;
3389
3390 case ASM_OPERANDS:
3391 /* Don't mess with insns declared volatile. */
3392 if (MEM_VOLATILE_P (x))
3393 return 0;
3394 break;
3395
3396 default:
3397 break;
3398 }
3399
3400 fmt = GET_RTX_FORMAT (code);
3401 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3402 {
3403 if (fmt[i] == 'e')
3404 {
3405 int tem = loop_invariant_p (loop, XEXP (x, i));
3406 if (tem == 0)
3407 return 0;
3408 if (tem == 2)
3409 conditional = 1;
3410 }
3411 else if (fmt[i] == 'E')
3412 {
3413 int j;
3414 for (j = 0; j < XVECLEN (x, i); j++)
3415 {
3416 int tem = loop_invariant_p (loop, XVECEXP (x, i, j));
3417 if (tem == 0)
3418 return 0;
3419 if (tem == 2)
3420 conditional = 1;
3421 }
3422
3423 }
3424 }
3425
3426 return 1 + conditional;
3427 }
3428 \f
3429 /* Return nonzero if all the insns in the loop that set REG
3430 are INSN and the immediately following insns,
3431 and if each of those insns sets REG in an invariant way
3432 (not counting uses of REG in them).
3433
3434 The value is 2 if some of these insns are only conditionally invariant.
3435
3436 We assume that INSN itself is the first set of REG
3437 and that its source is invariant. */
3438
3439 static int
3440 consec_sets_invariant_p (const struct loop *loop, rtx reg, int n_sets,
3441 rtx insn)
3442 {
3443 struct loop_regs *regs = LOOP_REGS (loop);
3444 rtx p = insn;
3445 unsigned int regno = REGNO (reg);
3446 rtx temp;
3447 /* Number of sets we have to insist on finding after INSN. */
3448 int count = n_sets - 1;
3449 int old = regs->array[regno].set_in_loop;
3450 int value = 0;
3451 int this;
3452
3453 /* If N_SETS hit the limit, we can't rely on its value. */
3454 if (n_sets == 127)
3455 return 0;
3456
3457 regs->array[regno].set_in_loop = 0;
3458
3459 while (count > 0)
3460 {
3461 enum rtx_code code;
3462 rtx set;
3463
3464 p = NEXT_INSN (p);
3465 code = GET_CODE (p);
3466
3467 /* If library call, skip to end of it. */
3468 if (code == INSN && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
3469 p = XEXP (temp, 0);
3470
3471 this = 0;
3472 if (code == INSN
3473 && (set = single_set (p))
3474 && REG_P (SET_DEST (set))
3475 && REGNO (SET_DEST (set)) == regno)
3476 {
3477 this = loop_invariant_p (loop, SET_SRC (set));
3478 if (this != 0)
3479 value |= this;
3480 else if ((temp = find_reg_note (p, REG_EQUAL, NULL_RTX)))
3481 {
3482 /* If this is a libcall, then any invariant REG_EQUAL note is OK.
3483 If this is an ordinary insn, then only CONSTANT_P REG_EQUAL
3484 notes are OK. */
3485 this = (CONSTANT_P (XEXP (temp, 0))
3486 || (find_reg_note (p, REG_RETVAL, NULL_RTX)
3487 && loop_invariant_p (loop, XEXP (temp, 0))));
3488 if (this != 0)
3489 value |= this;
3490 }
3491 }
3492 if (this != 0)
3493 count--;
3494 else if (code != NOTE)
3495 {
3496 regs->array[regno].set_in_loop = old;
3497 return 0;
3498 }
3499 }
3500
3501 regs->array[regno].set_in_loop = old;
3502 /* If loop_invariant_p ever returned 2, we return 2. */
3503 return 1 + (value & 2);
3504 }
3505 \f
3506 /* Look at all uses (not sets) of registers in X. For each, if it is
3507 the single use, set USAGE[REGNO] to INSN; if there was a previous use in
3508 a different insn, set USAGE[REGNO] to const0_rtx. */
3509
3510 static void
3511 find_single_use_in_loop (struct loop_regs *regs, rtx insn, rtx x)
3512 {
3513 enum rtx_code code = GET_CODE (x);
3514 const char *fmt = GET_RTX_FORMAT (code);
3515 int i, j;
3516
3517 if (code == REG)
3518 regs->array[REGNO (x)].single_usage
3519 = (regs->array[REGNO (x)].single_usage != 0
3520 && regs->array[REGNO (x)].single_usage != insn)
3521 ? const0_rtx : insn;
3522
3523 else if (code == SET)
3524 {
3525 /* Don't count SET_DEST if it is a REG; otherwise count things
3526 in SET_DEST because if a register is partially modified, it won't
3527 show up as a potential movable so we don't care how USAGE is set
3528 for it. */
3529 if (!REG_P (SET_DEST (x)))
3530 find_single_use_in_loop (regs, insn, SET_DEST (x));
3531 find_single_use_in_loop (regs, insn, SET_SRC (x));
3532 }
3533 else
3534 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3535 {
3536 if (fmt[i] == 'e' && XEXP (x, i) != 0)
3537 find_single_use_in_loop (regs, insn, XEXP (x, i));
3538 else if (fmt[i] == 'E')
3539 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3540 find_single_use_in_loop (regs, insn, XVECEXP (x, i, j));
3541 }
3542 }
3543 \f
3544 /* Count and record any set in X which is contained in INSN. Update
3545 REGS->array[I].MAY_NOT_OPTIMIZE and LAST_SET for any register I set
3546 in X. */
3547
3548 static void
3549 count_one_set (struct loop_regs *regs, rtx insn, rtx x, rtx *last_set)
3550 {
3551 if (GET_CODE (x) == CLOBBER && REG_P (XEXP (x, 0)))
3552 /* Don't move a reg that has an explicit clobber.
3553 It's not worth the pain to try to do it correctly. */
3554 regs->array[REGNO (XEXP (x, 0))].may_not_optimize = 1;
3555
3556 if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
3557 {
3558 rtx dest = SET_DEST (x);
3559 while (GET_CODE (dest) == SUBREG
3560 || GET_CODE (dest) == ZERO_EXTRACT
3561 || GET_CODE (dest) == SIGN_EXTRACT
3562 || GET_CODE (dest) == STRICT_LOW_PART)
3563 dest = XEXP (dest, 0);
3564 if (REG_P (dest))
3565 {
3566 int i;
3567 int regno = REGNO (dest);
3568 for (i = 0; i < LOOP_REGNO_NREGS (regno, dest); i++)
3569 {
3570 /* If this is the first setting of this reg
3571 in current basic block, and it was set before,
3572 it must be set in two basic blocks, so it cannot
3573 be moved out of the loop. */
3574 if (regs->array[regno].set_in_loop > 0
3575 && last_set[regno] == 0)
3576 regs->array[regno+i].may_not_optimize = 1;
3577 /* If this is not first setting in current basic block,
3578 see if reg was used in between previous one and this.
3579 If so, neither one can be moved. */
3580 if (last_set[regno] != 0
3581 && reg_used_between_p (dest, last_set[regno], insn))
3582 regs->array[regno+i].may_not_optimize = 1;
3583 if (regs->array[regno+i].set_in_loop < 127)
3584 ++regs->array[regno+i].set_in_loop;
3585 last_set[regno+i] = insn;
3586 }
3587 }
3588 }
3589 }
3590 \f
3591 /* Given a loop that is bounded by LOOP->START and LOOP->END and that
3592 is entered at LOOP->SCAN_START, return 1 if the register set in SET
3593 contained in insn INSN is used by any insn that precedes INSN in
3594 cyclic order starting from the loop entry point.
3595
3596 We don't want to use INSN_LUID here because if we restrict INSN to those
3597 that have a valid INSN_LUID, it means we cannot move an invariant out
3598 from an inner loop past two loops. */
3599
3600 static int
3601 loop_reg_used_before_p (const struct loop *loop, rtx set, rtx insn)
3602 {
3603 rtx reg = SET_DEST (set);
3604 rtx p;
3605
3606 /* Scan forward checking for register usage. If we hit INSN, we
3607 are done. Otherwise, if we hit LOOP->END, wrap around to LOOP->START. */
3608 for (p = loop->scan_start; p != insn; p = NEXT_INSN (p))
3609 {
3610 if (INSN_P (p) && reg_overlap_mentioned_p (reg, PATTERN (p)))
3611 return 1;
3612
3613 if (p == loop->end)
3614 p = loop->start;
3615 }
3616
3617 return 0;
3618 }
3619 \f
3620
3621 /* Information we collect about arrays that we might want to prefetch. */
3622 struct prefetch_info
3623 {
3624 struct iv_class *class; /* Class this prefetch is based on. */
3625 struct induction *giv; /* GIV this prefetch is based on. */
3626 rtx base_address; /* Start prefetching from this address plus
3627 index. */
3628 HOST_WIDE_INT index;
3629 HOST_WIDE_INT stride; /* Prefetch stride in bytes in each
3630 iteration. */
3631 unsigned int bytes_accessed; /* Sum of sizes of all accesses to this
3632 prefetch area in one iteration. */
3633 unsigned int total_bytes; /* Total bytes loop will access in this block.
3634 This is set only for loops with known
3635 iteration counts and is 0xffffffff
3636 otherwise. */
3637 int prefetch_in_loop; /* Number of prefetch insns in loop. */
3638 int prefetch_before_loop; /* Number of prefetch insns before loop. */
3639 unsigned int write : 1; /* 1 for read/write prefetches. */
3640 };
3641
3642 /* Data used by check_store function. */
3643 struct check_store_data
3644 {
3645 rtx mem_address;
3646 int mem_write;
3647 };
3648
3649 static void check_store (rtx, rtx, void *);
3650 static void emit_prefetch_instructions (struct loop *);
3651 static int rtx_equal_for_prefetch_p (rtx, rtx);
3652
3653 /* Set mem_write when mem_address is found. Used as callback to
3654 note_stores. */
3655 static void
3656 check_store (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
3657 {
3658 struct check_store_data *d = (struct check_store_data *) data;
3659
3660 if ((MEM_P (x)) && rtx_equal_p (d->mem_address, XEXP (x, 0)))
3661 d->mem_write = 1;
3662 }
3663 \f
3664 /* Like rtx_equal_p, but attempts to swap commutative operands. This is
3665 important to get some addresses combined. Later more sophisticated
3666 transformations can be added when necessary.
3667
3668 ??? Same trick with swapping operand is done at several other places.
3669 It can be nice to develop some common way to handle this. */
3670
3671 static int
3672 rtx_equal_for_prefetch_p (rtx x, rtx y)
3673 {
3674 int i;
3675 int j;
3676 enum rtx_code code = GET_CODE (x);
3677 const char *fmt;
3678
3679 if (x == y)
3680 return 1;
3681 if (code != GET_CODE (y))
3682 return 0;
3683
3684 if (COMMUTATIVE_ARITH_P (x))
3685 {
3686 return ((rtx_equal_for_prefetch_p (XEXP (x, 0), XEXP (y, 0))
3687 && rtx_equal_for_prefetch_p (XEXP (x, 1), XEXP (y, 1)))
3688 || (rtx_equal_for_prefetch_p (XEXP (x, 0), XEXP (y, 1))
3689 && rtx_equal_for_prefetch_p (XEXP (x, 1), XEXP (y, 0))));
3690 }
3691
3692 /* Compare the elements. If any pair of corresponding elements fails to
3693 match, return 0 for the whole thing. */
3694
3695 fmt = GET_RTX_FORMAT (code);
3696 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3697 {
3698 switch (fmt[i])
3699 {
3700 case 'w':
3701 if (XWINT (x, i) != XWINT (y, i))
3702 return 0;
3703 break;
3704
3705 case 'i':
3706 if (XINT (x, i) != XINT (y, i))
3707 return 0;
3708 break;
3709
3710 case 'E':
3711 /* Two vectors must have the same length. */
3712 if (XVECLEN (x, i) != XVECLEN (y, i))
3713 return 0;
3714
3715 /* And the corresponding elements must match. */
3716 for (j = 0; j < XVECLEN (x, i); j++)
3717 if (rtx_equal_for_prefetch_p (XVECEXP (x, i, j),
3718 XVECEXP (y, i, j)) == 0)
3719 return 0;
3720 break;
3721
3722 case 'e':
3723 if (rtx_equal_for_prefetch_p (XEXP (x, i), XEXP (y, i)) == 0)
3724 return 0;
3725 break;
3726
3727 case 's':
3728 if (strcmp (XSTR (x, i), XSTR (y, i)))
3729 return 0;
3730 break;
3731
3732 case 'u':
3733 /* These are just backpointers, so they don't matter. */
3734 break;
3735
3736 case '0':
3737 break;
3738
3739 /* It is believed that rtx's at this level will never
3740 contain anything but integers and other rtx's,
3741 except for within LABEL_REFs and SYMBOL_REFs. */
3742 default:
3743 abort ();
3744 }
3745 }
3746 return 1;
3747 }
3748 \f
3749 /* Remove constant addition value from the expression X (when present)
3750 and return it. */
3751
3752 static HOST_WIDE_INT
3753 remove_constant_addition (rtx *x)
3754 {
3755 HOST_WIDE_INT addval = 0;
3756 rtx exp = *x;
3757
3758 /* Avoid clobbering a shared CONST expression. */
3759 if (GET_CODE (exp) == CONST)
3760 {
3761 if (GET_CODE (XEXP (exp, 0)) == PLUS
3762 && GET_CODE (XEXP (XEXP (exp, 0), 0)) == SYMBOL_REF
3763 && GET_CODE (XEXP (XEXP (exp, 0), 1)) == CONST_INT)
3764 {
3765 *x = XEXP (XEXP (exp, 0), 0);
3766 return INTVAL (XEXP (XEXP (exp, 0), 1));
3767 }
3768 return 0;
3769 }
3770
3771 if (GET_CODE (exp) == CONST_INT)
3772 {
3773 addval = INTVAL (exp);
3774 *x = const0_rtx;
3775 }
3776
3777 /* For plus expression recurse on ourself. */
3778 else if (GET_CODE (exp) == PLUS)
3779 {
3780 addval += remove_constant_addition (&XEXP (exp, 0));
3781 addval += remove_constant_addition (&XEXP (exp, 1));
3782
3783 /* In case our parameter was constant, remove extra zero from the
3784 expression. */
3785 if (XEXP (exp, 0) == const0_rtx)
3786 *x = XEXP (exp, 1);
3787 else if (XEXP (exp, 1) == const0_rtx)
3788 *x = XEXP (exp, 0);
3789 }
3790
3791 return addval;
3792 }
3793
3794 /* Attempt to identify accesses to arrays that are most likely to cause cache
3795 misses, and emit prefetch instructions a few prefetch blocks forward.
3796
3797 To detect the arrays we use the GIV information that was collected by the
3798 strength reduction pass.
3799
3800 The prefetch instructions are generated after the GIV information is done
3801 and before the strength reduction process. The new GIVs are injected into
3802 the strength reduction tables, so the prefetch addresses are optimized as
3803 well.
3804
3805 GIVs are split into base address, stride, and constant addition values.
3806 GIVs with the same address, stride and close addition values are combined
3807 into a single prefetch. Also writes to GIVs are detected, so that prefetch
3808 for write instructions can be used for the block we write to, on machines
3809 that support write prefetches.
3810
3811 Several heuristics are used to determine when to prefetch. They are
3812 controlled by defined symbols that can be overridden for each target. */
3813
3814 static void
3815 emit_prefetch_instructions (struct loop *loop)
3816 {
3817 int num_prefetches = 0;
3818 int num_real_prefetches = 0;
3819 int num_real_write_prefetches = 0;
3820 int num_prefetches_before = 0;
3821 int num_write_prefetches_before = 0;
3822 int ahead = 0;
3823 int i;
3824 struct iv_class *bl;
3825 struct induction *iv;
3826 struct prefetch_info info[MAX_PREFETCHES];
3827 struct loop_ivs *ivs = LOOP_IVS (loop);
3828
3829 if (!HAVE_prefetch)
3830 return;
3831
3832 /* Consider only loops w/o calls. When a call is done, the loop is probably
3833 slow enough to read the memory. */
3834 if (PREFETCH_NO_CALL && LOOP_INFO (loop)->has_call)
3835 {
3836 if (loop_dump_stream)
3837 fprintf (loop_dump_stream, "Prefetch: ignoring loop: has call.\n");
3838
3839 return;
3840 }
3841
3842 /* Don't prefetch in loops known to have few iterations. */
3843 if (PREFETCH_NO_LOW_LOOPCNT
3844 && LOOP_INFO (loop)->n_iterations
3845 && LOOP_INFO (loop)->n_iterations <= PREFETCH_LOW_LOOPCNT)
3846 {
3847 if (loop_dump_stream)
3848 fprintf (loop_dump_stream,
3849 "Prefetch: ignoring loop: not enough iterations.\n");
3850 return;
3851 }
3852
3853 /* Search all induction variables and pick those interesting for the prefetch
3854 machinery. */
3855 for (bl = ivs->list; bl; bl = bl->next)
3856 {
3857 struct induction *biv = bl->biv, *biv1;
3858 int basestride = 0;
3859
3860 biv1 = biv;
3861
3862 /* Expect all BIVs to be executed in each iteration. This makes our
3863 analysis more conservative. */
3864 while (biv1)
3865 {
3866 /* Discard non-constant additions that we can't handle well yet, and
3867 BIVs that are executed multiple times; such BIVs ought to be
3868 handled in the nested loop. We accept not_every_iteration BIVs,
3869 since these only result in larger strides and make our
3870 heuristics more conservative. */
3871 if (GET_CODE (biv->add_val) != CONST_INT)
3872 {
3873 if (loop_dump_stream)
3874 {
3875 fprintf (loop_dump_stream,
3876 "Prefetch: ignoring biv %d: non-constant addition at insn %d:",
3877 REGNO (biv->src_reg), INSN_UID (biv->insn));
3878 print_rtl (loop_dump_stream, biv->add_val);
3879 fprintf (loop_dump_stream, "\n");
3880 }
3881 break;
3882 }
3883
3884 if (biv->maybe_multiple)
3885 {
3886 if (loop_dump_stream)
3887 {
3888 fprintf (loop_dump_stream,
3889 "Prefetch: ignoring biv %d: maybe_multiple at insn %i:",
3890 REGNO (biv->src_reg), INSN_UID (biv->insn));
3891 print_rtl (loop_dump_stream, biv->add_val);
3892 fprintf (loop_dump_stream, "\n");
3893 }
3894 break;
3895 }
3896
3897 basestride += INTVAL (biv1->add_val);
3898 biv1 = biv1->next_iv;
3899 }
3900
3901 if (biv1 || !basestride)
3902 continue;
3903
3904 for (iv = bl->giv; iv; iv = iv->next_iv)
3905 {
3906 rtx address;
3907 rtx temp;
3908 HOST_WIDE_INT index = 0;
3909 int add = 1;
3910 HOST_WIDE_INT stride = 0;
3911 int stride_sign = 1;
3912 struct check_store_data d;
3913 const char *ignore_reason = NULL;
3914 int size = GET_MODE_SIZE (GET_MODE (iv));
3915
3916 /* See whether an induction variable is interesting to us and if
3917 not, report the reason. */
3918 if (iv->giv_type != DEST_ADDR)
3919 ignore_reason = "giv is not a destination address";
3920
3921 /* We are interested only in constant stride memory references
3922 in order to be able to compute density easily. */
3923 else if (GET_CODE (iv->mult_val) != CONST_INT)
3924 ignore_reason = "stride is not constant";
3925
3926 else
3927 {
3928 stride = INTVAL (iv->mult_val) * basestride;
3929 if (stride < 0)
3930 {
3931 stride = -stride;
3932 stride_sign = -1;
3933 }
3934
3935 /* On some targets, reversed order prefetches are not
3936 worthwhile. */
3937 if (PREFETCH_NO_REVERSE_ORDER && stride_sign < 0)
3938 ignore_reason = "reversed order stride";
3939
3940 /* Prefetch of accesses with an extreme stride might not be
3941 worthwhile, either. */
3942 else if (PREFETCH_NO_EXTREME_STRIDE
3943 && stride > PREFETCH_EXTREME_STRIDE)
3944 ignore_reason = "extreme stride";
3945
3946 /* Ignore GIVs with varying add values; we can't predict the
3947 value for the next iteration. */
3948 else if (!loop_invariant_p (loop, iv->add_val))
3949 ignore_reason = "giv has varying add value";
3950
3951 /* Ignore GIVs in the nested loops; they ought to have been
3952 handled already. */
3953 else if (iv->maybe_multiple)
3954 ignore_reason = "giv is in nested loop";
3955 }
3956
3957 if (ignore_reason != NULL)
3958 {
3959 if (loop_dump_stream)
3960 fprintf (loop_dump_stream,
3961 "Prefetch: ignoring giv at %d: %s.\n",
3962 INSN_UID (iv->insn), ignore_reason);
3963 continue;
3964 }
3965
3966 /* Determine the pointer to the basic array we are examining. It is
3967 the sum of the BIV's initial value and the GIV's add_val. */
3968 address = copy_rtx (iv->add_val);
3969 temp = copy_rtx (bl->initial_value);
3970
3971 address = simplify_gen_binary (PLUS, Pmode, temp, address);
3972 index = remove_constant_addition (&address);
3973
3974 d.mem_write = 0;
3975 d.mem_address = *iv->location;
3976
3977 /* When the GIV is not always executed, we might be better off by
3978 not dirtying the cache pages. */
3979 if (PREFETCH_CONDITIONAL || iv->always_executed)
3980 note_stores (PATTERN (iv->insn), check_store, &d);
3981 else
3982 {
3983 if (loop_dump_stream)
3984 fprintf (loop_dump_stream, "Prefetch: Ignoring giv at %d: %s\n",
3985 INSN_UID (iv->insn), "in conditional code.");
3986 continue;
3987 }
3988
3989 /* Attempt to find another prefetch to the same array and see if we
3990 can merge this one. */
3991 for (i = 0; i < num_prefetches; i++)
3992 if (rtx_equal_for_prefetch_p (address, info[i].base_address)
3993 && stride == info[i].stride)
3994 {
3995 /* In case both access same array (same location
3996 just with small difference in constant indexes), merge
3997 the prefetches. Just do the later and the earlier will
3998 get prefetched from previous iteration.
3999 The artificial threshold should not be too small,
4000 but also not bigger than small portion of memory usually
4001 traversed by single loop. */
4002 if (index >= info[i].index
4003 && index - info[i].index < PREFETCH_EXTREME_DIFFERENCE)
4004 {
4005 info[i].write |= d.mem_write;
4006 info[i].bytes_accessed += size;
4007 info[i].index = index;
4008 info[i].giv = iv;
4009 info[i].class = bl;
4010 info[num_prefetches].base_address = address;
4011 add = 0;
4012 break;
4013 }
4014
4015 if (index < info[i].index
4016 && info[i].index - index < PREFETCH_EXTREME_DIFFERENCE)
4017 {
4018 info[i].write |= d.mem_write;
4019 info[i].bytes_accessed += size;
4020 add = 0;
4021 break;
4022 }
4023 }
4024
4025 /* Merging failed. */
4026 if (add)
4027 {
4028 info[num_prefetches].giv = iv;
4029 info[num_prefetches].class = bl;
4030 info[num_prefetches].index = index;
4031 info[num_prefetches].stride = stride;
4032 info[num_prefetches].base_address = address;
4033 info[num_prefetches].write = d.mem_write;
4034 info[num_prefetches].bytes_accessed = size;
4035 num_prefetches++;
4036 if (num_prefetches >= MAX_PREFETCHES)
4037 {
4038 if (loop_dump_stream)
4039 fprintf (loop_dump_stream,
4040 "Maximal number of prefetches exceeded.\n");
4041 return;
4042 }
4043 }
4044 }
4045 }
4046
4047 for (i = 0; i < num_prefetches; i++)
4048 {
4049 int density;
4050
4051 /* Attempt to calculate the total number of bytes fetched by all
4052 iterations of the loop. Avoid overflow. */
4053 if (LOOP_INFO (loop)->n_iterations
4054 && ((unsigned HOST_WIDE_INT) (0xffffffff / info[i].stride)
4055 >= LOOP_INFO (loop)->n_iterations))
4056 info[i].total_bytes = info[i].stride * LOOP_INFO (loop)->n_iterations;
4057 else
4058 info[i].total_bytes = 0xffffffff;
4059
4060 density = info[i].bytes_accessed * 100 / info[i].stride;
4061
4062 /* Prefetch might be worthwhile only when the loads/stores are dense. */
4063 if (PREFETCH_ONLY_DENSE_MEM)
4064 if (density * 256 > PREFETCH_DENSE_MEM * 100
4065 && (info[i].total_bytes / PREFETCH_BLOCK
4066 >= PREFETCH_BLOCKS_BEFORE_LOOP_MIN))
4067 {
4068 info[i].prefetch_before_loop = 1;
4069 info[i].prefetch_in_loop
4070 = (info[i].total_bytes / PREFETCH_BLOCK
4071 > PREFETCH_BLOCKS_BEFORE_LOOP_MAX);
4072 }
4073 else
4074 {
4075 info[i].prefetch_in_loop = 0, info[i].prefetch_before_loop = 0;
4076 if (loop_dump_stream)
4077 fprintf (loop_dump_stream,
4078 "Prefetch: ignoring giv at %d: %d%% density is too low.\n",
4079 INSN_UID (info[i].giv->insn), density);
4080 }
4081 else
4082 info[i].prefetch_in_loop = 1, info[i].prefetch_before_loop = 1;
4083
4084 /* Find how many prefetch instructions we'll use within the loop. */
4085 if (info[i].prefetch_in_loop != 0)
4086 {
4087 info[i].prefetch_in_loop = ((info[i].stride + PREFETCH_BLOCK - 1)
4088 / PREFETCH_BLOCK);
4089 num_real_prefetches += info[i].prefetch_in_loop;
4090 if (info[i].write)
4091 num_real_write_prefetches += info[i].prefetch_in_loop;
4092 }
4093 }
4094
4095 /* Determine how many iterations ahead to prefetch within the loop, based
4096 on how many prefetches we currently expect to do within the loop. */
4097 if (num_real_prefetches != 0)
4098 {
4099 if ((ahead = SIMULTANEOUS_PREFETCHES / num_real_prefetches) == 0)
4100 {
4101 if (loop_dump_stream)
4102 fprintf (loop_dump_stream,
4103 "Prefetch: ignoring prefetches within loop: ahead is zero; %d < %d\n",
4104 SIMULTANEOUS_PREFETCHES, num_real_prefetches);
4105 num_real_prefetches = 0, num_real_write_prefetches = 0;
4106 }
4107 }
4108 /* We'll also use AHEAD to determine how many prefetch instructions to
4109 emit before a loop, so don't leave it zero. */
4110 if (ahead == 0)
4111 ahead = PREFETCH_BLOCKS_BEFORE_LOOP_MAX;
4112
4113 for (i = 0; i < num_prefetches; i++)
4114 {
4115 /* Update if we've decided not to prefetch anything within the loop. */
4116 if (num_real_prefetches == 0)
4117 info[i].prefetch_in_loop = 0;
4118
4119 /* Find how many prefetch instructions we'll use before the loop. */
4120 if (info[i].prefetch_before_loop != 0)
4121 {
4122 int n = info[i].total_bytes / PREFETCH_BLOCK;
4123 if (n > ahead)
4124 n = ahead;
4125 info[i].prefetch_before_loop = n;
4126 num_prefetches_before += n;
4127 if (info[i].write)
4128 num_write_prefetches_before += n;
4129 }
4130
4131 if (loop_dump_stream)
4132 {
4133 if (info[i].prefetch_in_loop == 0
4134 && info[i].prefetch_before_loop == 0)
4135 continue;
4136 fprintf (loop_dump_stream, "Prefetch insn: %d",
4137 INSN_UID (info[i].giv->insn));
4138 fprintf (loop_dump_stream,
4139 "; in loop: %d; before: %d; %s\n",
4140 info[i].prefetch_in_loop,
4141 info[i].prefetch_before_loop,
4142 info[i].write ? "read/write" : "read only");
4143 fprintf (loop_dump_stream,
4144 " density: %d%%; bytes_accessed: %u; total_bytes: %u\n",
4145 (int) (info[i].bytes_accessed * 100 / info[i].stride),
4146 info[i].bytes_accessed, info[i].total_bytes);
4147 fprintf (loop_dump_stream, " index: " HOST_WIDE_INT_PRINT_DEC
4148 "; stride: " HOST_WIDE_INT_PRINT_DEC "; address: ",
4149 info[i].index, info[i].stride);
4150 print_rtl (loop_dump_stream, info[i].base_address);
4151 fprintf (loop_dump_stream, "\n");
4152 }
4153 }
4154
4155 if (num_real_prefetches + num_prefetches_before > 0)
4156 {
4157 /* Record that this loop uses prefetch instructions. */
4158 LOOP_INFO (loop)->has_prefetch = 1;
4159
4160 if (loop_dump_stream)
4161 {
4162 fprintf (loop_dump_stream, "Real prefetches needed within loop: %d (write: %d)\n",
4163 num_real_prefetches, num_real_write_prefetches);
4164 fprintf (loop_dump_stream, "Real prefetches needed before loop: %d (write: %d)\n",
4165 num_prefetches_before, num_write_prefetches_before);
4166 }
4167 }
4168
4169 for (i = 0; i < num_prefetches; i++)
4170 {
4171 int y;
4172
4173 for (y = 0; y < info[i].prefetch_in_loop; y++)
4174 {
4175 rtx loc = copy_rtx (*info[i].giv->location);
4176 rtx insn;
4177 int bytes_ahead = PREFETCH_BLOCK * (ahead + y);
4178 rtx before_insn = info[i].giv->insn;
4179 rtx prev_insn = PREV_INSN (info[i].giv->insn);
4180 rtx seq;
4181
4182 /* We can save some effort by offsetting the address on
4183 architectures with offsettable memory references. */
4184 if (offsettable_address_p (0, VOIDmode, loc))
4185 loc = plus_constant (loc, bytes_ahead);
4186 else
4187 {
4188 rtx reg = gen_reg_rtx (Pmode);
4189 loop_iv_add_mult_emit_before (loop, loc, const1_rtx,
4190 GEN_INT (bytes_ahead), reg,
4191 0, before_insn);
4192 loc = reg;
4193 }
4194
4195 start_sequence ();
4196 /* Make sure the address operand is valid for prefetch. */
4197 if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
4198 (loc, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
4199 loc = force_reg (Pmode, loc);
4200 emit_insn (gen_prefetch (loc, GEN_INT (info[i].write),
4201 GEN_INT (3)));
4202 seq = get_insns ();
4203 end_sequence ();
4204 emit_insn_before (seq, before_insn);
4205
4206 /* Check all insns emitted and record the new GIV
4207 information. */
4208 insn = NEXT_INSN (prev_insn);
4209 while (insn != before_insn)
4210 {
4211 insn = check_insn_for_givs (loop, insn,
4212 info[i].giv->always_executed,
4213 info[i].giv->maybe_multiple);
4214 insn = NEXT_INSN (insn);
4215 }
4216 }
4217
4218 if (PREFETCH_BEFORE_LOOP)
4219 {
4220 /* Emit insns before the loop to fetch the first cache lines or,
4221 if we're not prefetching within the loop, everything we expect
4222 to need. */
4223 for (y = 0; y < info[i].prefetch_before_loop; y++)
4224 {
4225 rtx reg = gen_reg_rtx (Pmode);
4226 rtx loop_start = loop->start;
4227 rtx init_val = info[i].class->initial_value;
4228 rtx add_val = simplify_gen_binary (PLUS, Pmode,
4229 info[i].giv->add_val,
4230 GEN_INT (y * PREFETCH_BLOCK));
4231
4232 /* Functions called by LOOP_IV_ADD_EMIT_BEFORE expect a
4233 non-constant INIT_VAL to have the same mode as REG, which
4234 in this case we know to be Pmode. */
4235 if (GET_MODE (init_val) != Pmode && !CONSTANT_P (init_val))
4236 {
4237 rtx seq;
4238
4239 start_sequence ();
4240 init_val = convert_to_mode (Pmode, init_val, 0);
4241 seq = get_insns ();
4242 end_sequence ();
4243 loop_insn_emit_before (loop, 0, loop_start, seq);
4244 }
4245 loop_iv_add_mult_emit_before (loop, init_val,
4246 info[i].giv->mult_val,
4247 add_val, reg, 0, loop_start);
4248 emit_insn_before (gen_prefetch (reg, GEN_INT (info[i].write),
4249 GEN_INT (3)),
4250 loop_start);
4251 }
4252 }
4253 }
4254
4255 return;
4256 }
4257 \f
4258 /* Communication with routines called via `note_stores'. */
4259
4260 static rtx note_insn;
4261
4262 /* Dummy register to have nonzero DEST_REG for DEST_ADDR type givs. */
4263
4264 static rtx addr_placeholder;
4265
4266 /* ??? Unfinished optimizations, and possible future optimizations,
4267 for the strength reduction code. */
4268
4269 /* ??? The interaction of biv elimination, and recognition of 'constant'
4270 bivs, may cause problems. */
4271
4272 /* ??? Add heuristics so that DEST_ADDR strength reduction does not cause
4273 performance problems.
4274
4275 Perhaps don't eliminate things that can be combined with an addressing
4276 mode. Find all givs that have the same biv, mult_val, and add_val;
4277 then for each giv, check to see if its only use dies in a following
4278 memory address. If so, generate a new memory address and check to see
4279 if it is valid. If it is valid, then store the modified memory address,
4280 otherwise, mark the giv as not done so that it will get its own iv. */
4281
4282 /* ??? Could try to optimize branches when it is known that a biv is always
4283 positive. */
4284
4285 /* ??? When replace a biv in a compare insn, we should replace with closest
4286 giv so that an optimized branch can still be recognized by the combiner,
4287 e.g. the VAX acb insn. */
4288
4289 /* ??? Many of the checks involving uid_luid could be simplified if regscan
4290 was rerun in loop_optimize whenever a register was added or moved.
4291 Also, some of the optimizations could be a little less conservative. */
4292 \f
4293 /* Scan the loop body and call FNCALL for each insn. In the addition to the
4294 LOOP and INSN parameters pass MAYBE_MULTIPLE and NOT_EVERY_ITERATION to the
4295 callback.
4296
4297 NOT_EVERY_ITERATION is 1 if current insn is not known to be executed at
4298 least once for every loop iteration except for the last one.
4299
4300 MAYBE_MULTIPLE is 1 if current insn may be executed more than once for every
4301 loop iteration.
4302 */
4303 void
4304 for_each_insn_in_loop (struct loop *loop, loop_insn_callback fncall)
4305 {
4306 int not_every_iteration = 0;
4307 int maybe_multiple = 0;
4308 int past_loop_latch = 0;
4309 int loop_depth = 0;
4310 rtx p;
4311
4312 /* If loop_scan_start points to the loop exit test, we have to be wary of
4313 subversive use of gotos inside expression statements. */
4314 if (prev_nonnote_insn (loop->scan_start) != prev_nonnote_insn (loop->start))
4315 maybe_multiple = back_branch_in_range_p (loop, loop->scan_start);
4316
4317 /* Scan through loop and update NOT_EVERY_ITERATION and MAYBE_MULTIPLE. */
4318 for (p = next_insn_in_loop (loop, loop->scan_start);
4319 p != NULL_RTX;
4320 p = next_insn_in_loop (loop, p))
4321 {
4322 p = fncall (loop, p, not_every_iteration, maybe_multiple);
4323
4324 /* Past CODE_LABEL, we get to insns that may be executed multiple
4325 times. The only way we can be sure that they can't is if every
4326 jump insn between here and the end of the loop either
4327 returns, exits the loop, is a jump to a location that is still
4328 behind the label, or is a jump to the loop start. */
4329
4330 if (GET_CODE (p) == CODE_LABEL)
4331 {
4332 rtx insn = p;
4333
4334 maybe_multiple = 0;
4335
4336 while (1)
4337 {
4338 insn = NEXT_INSN (insn);
4339 if (insn == loop->scan_start)
4340 break;
4341 if (insn == loop->end)
4342 {
4343 if (loop->top != 0)
4344 insn = loop->top;
4345 else
4346 break;
4347 if (insn == loop->scan_start)
4348 break;
4349 }
4350
4351 if (GET_CODE (insn) == JUMP_INSN
4352 && GET_CODE (PATTERN (insn)) != RETURN
4353 && (!any_condjump_p (insn)
4354 || (JUMP_LABEL (insn) != 0
4355 && JUMP_LABEL (insn) != loop->scan_start
4356 && !loop_insn_first_p (p, JUMP_LABEL (insn)))))
4357 {
4358 maybe_multiple = 1;
4359 break;
4360 }
4361 }
4362 }
4363
4364 /* Past a jump, we get to insns for which we can't count
4365 on whether they will be executed during each iteration. */
4366 /* This code appears twice in strength_reduce. There is also similar
4367 code in scan_loop. */
4368 if (GET_CODE (p) == JUMP_INSN
4369 /* If we enter the loop in the middle, and scan around to the
4370 beginning, don't set not_every_iteration for that.
4371 This can be any kind of jump, since we want to know if insns
4372 will be executed if the loop is executed. */
4373 && !(JUMP_LABEL (p) == loop->top
4374 && ((NEXT_INSN (NEXT_INSN (p)) == loop->end
4375 && any_uncondjump_p (p))
4376 || (NEXT_INSN (p) == loop->end && any_condjump_p (p)))))
4377 {
4378 rtx label = 0;
4379
4380 /* If this is a jump outside the loop, then it also doesn't
4381 matter. Check to see if the target of this branch is on the
4382 loop->exits_labels list. */
4383
4384 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
4385 if (XEXP (label, 0) == JUMP_LABEL (p))
4386 break;
4387
4388 if (!label)
4389 not_every_iteration = 1;
4390 }
4391
4392 else if (GET_CODE (p) == NOTE)
4393 {
4394 /* At the virtual top of a converted loop, insns are again known to
4395 be executed each iteration: logically, the loop begins here
4396 even though the exit code has been duplicated.
4397
4398 Insns are also again known to be executed each iteration at
4399 the LOOP_CONT note. */
4400 if ((NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_VTOP
4401 || NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_CONT)
4402 && loop_depth == 0)
4403 not_every_iteration = 0;
4404 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
4405 loop_depth++;
4406 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_END)
4407 loop_depth--;
4408 }
4409
4410 /* Note if we pass a loop latch. If we do, then we can not clear
4411 NOT_EVERY_ITERATION below when we pass the last CODE_LABEL in
4412 a loop since a jump before the last CODE_LABEL may have started
4413 a new loop iteration.
4414
4415 Note that LOOP_TOP is only set for rotated loops and we need
4416 this check for all loops, so compare against the CODE_LABEL
4417 which immediately follows LOOP_START. */
4418 if (GET_CODE (p) == JUMP_INSN
4419 && JUMP_LABEL (p) == NEXT_INSN (loop->start))
4420 past_loop_latch = 1;
4421
4422 /* Unlike in the code motion pass where MAYBE_NEVER indicates that
4423 an insn may never be executed, NOT_EVERY_ITERATION indicates whether
4424 or not an insn is known to be executed each iteration of the
4425 loop, whether or not any iterations are known to occur.
4426
4427 Therefore, if we have just passed a label and have no more labels
4428 between here and the test insn of the loop, and we have not passed
4429 a jump to the top of the loop, then we know these insns will be
4430 executed each iteration. */
4431
4432 if (not_every_iteration
4433 && !past_loop_latch
4434 && GET_CODE (p) == CODE_LABEL
4435 && no_labels_between_p (p, loop->end)
4436 && loop_insn_first_p (p, loop->cont))
4437 not_every_iteration = 0;
4438 }
4439 }
4440 \f
4441 static void
4442 loop_bivs_find (struct loop *loop)
4443 {
4444 struct loop_regs *regs = LOOP_REGS (loop);
4445 struct loop_ivs *ivs = LOOP_IVS (loop);
4446 /* Temporary list pointers for traversing ivs->list. */
4447 struct iv_class *bl, **backbl;
4448
4449 ivs->list = 0;
4450
4451 for_each_insn_in_loop (loop, check_insn_for_bivs);
4452
4453 /* Scan ivs->list to remove all regs that proved not to be bivs.
4454 Make a sanity check against regs->n_times_set. */
4455 for (backbl = &ivs->list, bl = *backbl; bl; bl = bl->next)
4456 {
4457 if (REG_IV_TYPE (ivs, bl->regno) != BASIC_INDUCT
4458 /* Above happens if register modified by subreg, etc. */
4459 /* Make sure it is not recognized as a basic induction var: */
4460 || regs->array[bl->regno].n_times_set != bl->biv_count
4461 /* If never incremented, it is invariant that we decided not to
4462 move. So leave it alone. */
4463 || ! bl->incremented)
4464 {
4465 if (loop_dump_stream)
4466 fprintf (loop_dump_stream, "Biv %d: discarded, %s\n",
4467 bl->regno,
4468 (REG_IV_TYPE (ivs, bl->regno) != BASIC_INDUCT
4469 ? "not induction variable"
4470 : (! bl->incremented ? "never incremented"
4471 : "count error")));
4472
4473 REG_IV_TYPE (ivs, bl->regno) = NOT_BASIC_INDUCT;
4474 *backbl = bl->next;
4475 }
4476 else
4477 {
4478 backbl = &bl->next;
4479
4480 if (loop_dump_stream)
4481 fprintf (loop_dump_stream, "Biv %d: verified\n", bl->regno);
4482 }
4483 }
4484 }
4485
4486
4487 /* Determine how BIVS are initialized by looking through pre-header
4488 extended basic block. */
4489 static void
4490 loop_bivs_init_find (struct loop *loop)
4491 {
4492 struct loop_ivs *ivs = LOOP_IVS (loop);
4493 /* Temporary list pointers for traversing ivs->list. */
4494 struct iv_class *bl;
4495 int call_seen;
4496 rtx p;
4497
4498 /* Find initial value for each biv by searching backwards from loop_start,
4499 halting at first label. Also record any test condition. */
4500
4501 call_seen = 0;
4502 for (p = loop->start; p && GET_CODE (p) != CODE_LABEL; p = PREV_INSN (p))
4503 {
4504 rtx test;
4505
4506 note_insn = p;
4507
4508 if (GET_CODE (p) == CALL_INSN)
4509 call_seen = 1;
4510
4511 if (INSN_P (p))
4512 note_stores (PATTERN (p), record_initial, ivs);
4513
4514 /* Record any test of a biv that branches around the loop if no store
4515 between it and the start of loop. We only care about tests with
4516 constants and registers and only certain of those. */
4517 if (GET_CODE (p) == JUMP_INSN
4518 && JUMP_LABEL (p) != 0
4519 && next_real_insn (JUMP_LABEL (p)) == next_real_insn (loop->end)
4520 && (test = get_condition_for_loop (loop, p)) != 0
4521 && REG_P (XEXP (test, 0))
4522 && REGNO (XEXP (test, 0)) < max_reg_before_loop
4523 && (bl = REG_IV_CLASS (ivs, REGNO (XEXP (test, 0)))) != 0
4524 && valid_initial_value_p (XEXP (test, 1), p, call_seen, loop->start)
4525 && bl->init_insn == 0)
4526 {
4527 /* If an NE test, we have an initial value! */
4528 if (GET_CODE (test) == NE)
4529 {
4530 bl->init_insn = p;
4531 bl->init_set = gen_rtx_SET (VOIDmode,
4532 XEXP (test, 0), XEXP (test, 1));
4533 }
4534 else
4535 bl->initial_test = test;
4536 }
4537 }
4538 }
4539
4540
4541 /* Look at the each biv and see if we can say anything better about its
4542 initial value from any initializing insns set up above. (This is done
4543 in two passes to avoid missing SETs in a PARALLEL.) */
4544 static void
4545 loop_bivs_check (struct loop *loop)
4546 {
4547 struct loop_ivs *ivs = LOOP_IVS (loop);
4548 /* Temporary list pointers for traversing ivs->list. */
4549 struct iv_class *bl;
4550 struct iv_class **backbl;
4551
4552 for (backbl = &ivs->list; (bl = *backbl); backbl = &bl->next)
4553 {
4554 rtx src;
4555 rtx note;
4556
4557 if (! bl->init_insn)
4558 continue;
4559
4560 /* IF INIT_INSN has a REG_EQUAL or REG_EQUIV note and the value
4561 is a constant, use the value of that. */
4562 if (((note = find_reg_note (bl->init_insn, REG_EQUAL, 0)) != NULL
4563 && CONSTANT_P (XEXP (note, 0)))
4564 || ((note = find_reg_note (bl->init_insn, REG_EQUIV, 0)) != NULL
4565 && CONSTANT_P (XEXP (note, 0))))
4566 src = XEXP (note, 0);
4567 else
4568 src = SET_SRC (bl->init_set);
4569
4570 if (loop_dump_stream)
4571 fprintf (loop_dump_stream,
4572 "Biv %d: initialized at insn %d: initial value ",
4573 bl->regno, INSN_UID (bl->init_insn));
4574
4575 if ((GET_MODE (src) == GET_MODE (regno_reg_rtx[bl->regno])
4576 || GET_MODE (src) == VOIDmode)
4577 && valid_initial_value_p (src, bl->init_insn,
4578 LOOP_INFO (loop)->pre_header_has_call,
4579 loop->start))
4580 {
4581 bl->initial_value = src;
4582
4583 if (loop_dump_stream)
4584 {
4585 print_simple_rtl (loop_dump_stream, src);
4586 fputc ('\n', loop_dump_stream);
4587 }
4588 }
4589 /* If we can't make it a giv,
4590 let biv keep initial value of "itself". */
4591 else if (loop_dump_stream)
4592 fprintf (loop_dump_stream, "is complex\n");
4593 }
4594 }
4595
4596
4597 /* Search the loop for general induction variables. */
4598
4599 static void
4600 loop_givs_find (struct loop* loop)
4601 {
4602 for_each_insn_in_loop (loop, check_insn_for_givs);
4603 }
4604
4605
4606 /* For each giv for which we still don't know whether or not it is
4607 replaceable, check to see if it is replaceable because its final value
4608 can be calculated. */
4609
4610 static void
4611 loop_givs_check (struct loop *loop)
4612 {
4613 struct loop_ivs *ivs = LOOP_IVS (loop);
4614 struct iv_class *bl;
4615
4616 for (bl = ivs->list; bl; bl = bl->next)
4617 {
4618 struct induction *v;
4619
4620 for (v = bl->giv; v; v = v->next_iv)
4621 if (! v->replaceable && ! v->not_replaceable)
4622 check_final_value (loop, v);
4623 }
4624 }
4625
4626
4627 /* Return nonzero if it is possible to eliminate the biv BL provided
4628 all givs are reduced. This is possible if either the reg is not
4629 used outside the loop, or we can compute what its final value will
4630 be. */
4631
4632 static int
4633 loop_biv_eliminable_p (struct loop *loop, struct iv_class *bl,
4634 int threshold, int insn_count)
4635 {
4636 /* For architectures with a decrement_and_branch_until_zero insn,
4637 don't do this if we put a REG_NONNEG note on the endtest for this
4638 biv. */
4639
4640 #ifdef HAVE_decrement_and_branch_until_zero
4641 if (bl->nonneg)
4642 {
4643 if (loop_dump_stream)
4644 fprintf (loop_dump_stream,
4645 "Cannot eliminate nonneg biv %d.\n", bl->regno);
4646 return 0;
4647 }
4648 #endif
4649
4650 /* Check that biv is used outside loop or if it has a final value.
4651 Compare against bl->init_insn rather than loop->start. We aren't
4652 concerned with any uses of the biv between init_insn and
4653 loop->start since these won't be affected by the value of the biv
4654 elsewhere in the function, so long as init_insn doesn't use the
4655 biv itself. */
4656
4657 if ((REGNO_LAST_LUID (bl->regno) < INSN_LUID (loop->end)
4658 && bl->init_insn
4659 && INSN_UID (bl->init_insn) < max_uid_for_loop
4660 && REGNO_FIRST_LUID (bl->regno) >= INSN_LUID (bl->init_insn)
4661 && ! reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set)))
4662 || (bl->final_value = final_biv_value (loop, bl)))
4663 return maybe_eliminate_biv (loop, bl, 0, threshold, insn_count);
4664
4665 if (loop_dump_stream)
4666 {
4667 fprintf (loop_dump_stream,
4668 "Cannot eliminate biv %d.\n",
4669 bl->regno);
4670 fprintf (loop_dump_stream,
4671 "First use: insn %d, last use: insn %d.\n",
4672 REGNO_FIRST_UID (bl->regno),
4673 REGNO_LAST_UID (bl->regno));
4674 }
4675 return 0;
4676 }
4677
4678
4679 /* Reduce each giv of BL that we have decided to reduce. */
4680
4681 static void
4682 loop_givs_reduce (struct loop *loop, struct iv_class *bl)
4683 {
4684 struct induction *v;
4685
4686 for (v = bl->giv; v; v = v->next_iv)
4687 {
4688 struct induction *tv;
4689 if (! v->ignore && v->same == 0)
4690 {
4691 int auto_inc_opt = 0;
4692
4693 /* If the code for derived givs immediately below has already
4694 allocated a new_reg, we must keep it. */
4695 if (! v->new_reg)
4696 v->new_reg = gen_reg_rtx (v->mode);
4697
4698 #ifdef AUTO_INC_DEC
4699 /* If the target has auto-increment addressing modes, and
4700 this is an address giv, then try to put the increment
4701 immediately after its use, so that flow can create an
4702 auto-increment addressing mode. */
4703 /* Don't do this for loops entered at the bottom, to avoid
4704 this invalid transformation:
4705 jmp L; -> jmp L;
4706 TOP: TOP:
4707 use giv use giv
4708 L: inc giv
4709 inc biv L:
4710 test biv test giv
4711 cbr TOP cbr TOP
4712 */
4713 if (v->giv_type == DEST_ADDR && bl->biv_count == 1
4714 && bl->biv->always_executed && ! bl->biv->maybe_multiple
4715 /* We don't handle reversed biv's because bl->biv->insn
4716 does not have a valid INSN_LUID. */
4717 && ! bl->reversed
4718 && v->always_executed && ! v->maybe_multiple
4719 && INSN_UID (v->insn) < max_uid_for_loop
4720 && !loop->top)
4721 {
4722 /* If other giv's have been combined with this one, then
4723 this will work only if all uses of the other giv's occur
4724 before this giv's insn. This is difficult to check.
4725
4726 We simplify this by looking for the common case where
4727 there is one DEST_REG giv, and this giv's insn is the
4728 last use of the dest_reg of that DEST_REG giv. If the
4729 increment occurs after the address giv, then we can
4730 perform the optimization. (Otherwise, the increment
4731 would have to go before other_giv, and we would not be
4732 able to combine it with the address giv to get an
4733 auto-inc address.) */
4734 if (v->combined_with)
4735 {
4736 struct induction *other_giv = 0;
4737
4738 for (tv = bl->giv; tv; tv = tv->next_iv)
4739 if (tv->same == v)
4740 {
4741 if (other_giv)
4742 break;
4743 else
4744 other_giv = tv;
4745 }
4746 if (! tv && other_giv
4747 && REGNO (other_giv->dest_reg) < max_reg_before_loop
4748 && (REGNO_LAST_UID (REGNO (other_giv->dest_reg))
4749 == INSN_UID (v->insn))
4750 && INSN_LUID (v->insn) < INSN_LUID (bl->biv->insn))
4751 auto_inc_opt = 1;
4752 }
4753 /* Check for case where increment is before the address
4754 giv. Do this test in "loop order". */
4755 else if ((INSN_LUID (v->insn) > INSN_LUID (bl->biv->insn)
4756 && (INSN_LUID (v->insn) < INSN_LUID (loop->scan_start)
4757 || (INSN_LUID (bl->biv->insn)
4758 > INSN_LUID (loop->scan_start))))
4759 || (INSN_LUID (v->insn) < INSN_LUID (loop->scan_start)
4760 && (INSN_LUID (loop->scan_start)
4761 < INSN_LUID (bl->biv->insn))))
4762 auto_inc_opt = -1;
4763 else
4764 auto_inc_opt = 1;
4765
4766 #ifdef HAVE_cc0
4767 {
4768 rtx prev;
4769
4770 /* We can't put an insn immediately after one setting
4771 cc0, or immediately before one using cc0. */
4772 if ((auto_inc_opt == 1 && sets_cc0_p (PATTERN (v->insn)))
4773 || (auto_inc_opt == -1
4774 && (prev = prev_nonnote_insn (v->insn)) != 0
4775 && INSN_P (prev)
4776 && sets_cc0_p (PATTERN (prev))))
4777 auto_inc_opt = 0;
4778 }
4779 #endif
4780
4781 if (auto_inc_opt)
4782 v->auto_inc_opt = 1;
4783 }
4784 #endif
4785
4786 /* For each place where the biv is incremented, add an insn
4787 to increment the new, reduced reg for the giv. */
4788 for (tv = bl->biv; tv; tv = tv->next_iv)
4789 {
4790 rtx insert_before;
4791
4792 /* Skip if location is the same as a previous one. */
4793 if (tv->same)
4794 continue;
4795 if (! auto_inc_opt)
4796 insert_before = NEXT_INSN (tv->insn);
4797 else if (auto_inc_opt == 1)
4798 insert_before = NEXT_INSN (v->insn);
4799 else
4800 insert_before = v->insn;
4801
4802 if (tv->mult_val == const1_rtx)
4803 loop_iv_add_mult_emit_before (loop, tv->add_val, v->mult_val,
4804 v->new_reg, v->new_reg,
4805 0, insert_before);
4806 else /* tv->mult_val == const0_rtx */
4807 /* A multiply is acceptable here
4808 since this is presumed to be seldom executed. */
4809 loop_iv_add_mult_emit_before (loop, tv->add_val, v->mult_val,
4810 v->add_val, v->new_reg,
4811 0, insert_before);
4812 }
4813
4814 /* Add code at loop start to initialize giv's reduced reg. */
4815
4816 loop_iv_add_mult_hoist (loop,
4817 extend_value_for_giv (v, bl->initial_value),
4818 v->mult_val, v->add_val, v->new_reg);
4819 }
4820 }
4821 }
4822
4823
4824 /* Check for givs whose first use is their definition and whose
4825 last use is the definition of another giv. If so, it is likely
4826 dead and should not be used to derive another giv nor to
4827 eliminate a biv. */
4828
4829 static void
4830 loop_givs_dead_check (struct loop *loop ATTRIBUTE_UNUSED, struct iv_class *bl)
4831 {
4832 struct induction *v;
4833
4834 for (v = bl->giv; v; v = v->next_iv)
4835 {
4836 if (v->ignore
4837 || (v->same && v->same->ignore))
4838 continue;
4839
4840 if (v->giv_type == DEST_REG
4841 && REGNO_FIRST_UID (REGNO (v->dest_reg)) == INSN_UID (v->insn))
4842 {
4843 struct induction *v1;
4844
4845 for (v1 = bl->giv; v1; v1 = v1->next_iv)
4846 if (REGNO_LAST_UID (REGNO (v->dest_reg)) == INSN_UID (v1->insn))
4847 v->maybe_dead = 1;
4848 }
4849 }
4850 }
4851
4852
4853 static void
4854 loop_givs_rescan (struct loop *loop, struct iv_class *bl, rtx *reg_map)
4855 {
4856 struct induction *v;
4857
4858 for (v = bl->giv; v; v = v->next_iv)
4859 {
4860 if (v->same && v->same->ignore)
4861 v->ignore = 1;
4862
4863 if (v->ignore)
4864 continue;
4865
4866 /* Update expression if this was combined, in case other giv was
4867 replaced. */
4868 if (v->same)
4869 v->new_reg = replace_rtx (v->new_reg,
4870 v->same->dest_reg, v->same->new_reg);
4871
4872 /* See if this register is known to be a pointer to something. If
4873 so, see if we can find the alignment. First see if there is a
4874 destination register that is a pointer. If so, this shares the
4875 alignment too. Next see if we can deduce anything from the
4876 computational information. If not, and this is a DEST_ADDR
4877 giv, at least we know that it's a pointer, though we don't know
4878 the alignment. */
4879 if (REG_P (v->new_reg)
4880 && v->giv_type == DEST_REG
4881 && REG_POINTER (v->dest_reg))
4882 mark_reg_pointer (v->new_reg,
4883 REGNO_POINTER_ALIGN (REGNO (v->dest_reg)));
4884 else if (REG_P (v->new_reg)
4885 && REG_POINTER (v->src_reg))
4886 {
4887 unsigned int align = REGNO_POINTER_ALIGN (REGNO (v->src_reg));
4888
4889 if (align == 0
4890 || GET_CODE (v->add_val) != CONST_INT
4891 || INTVAL (v->add_val) % (align / BITS_PER_UNIT) != 0)
4892 align = 0;
4893
4894 mark_reg_pointer (v->new_reg, align);
4895 }
4896 else if (REG_P (v->new_reg)
4897 && REG_P (v->add_val)
4898 && REG_POINTER (v->add_val))
4899 {
4900 unsigned int align = REGNO_POINTER_ALIGN (REGNO (v->add_val));
4901
4902 if (align == 0 || GET_CODE (v->mult_val) != CONST_INT
4903 || INTVAL (v->mult_val) % (align / BITS_PER_UNIT) != 0)
4904 align = 0;
4905
4906 mark_reg_pointer (v->new_reg, align);
4907 }
4908 else if (REG_P (v->new_reg) && v->giv_type == DEST_ADDR)
4909 mark_reg_pointer (v->new_reg, 0);
4910
4911 if (v->giv_type == DEST_ADDR)
4912 /* Store reduced reg as the address in the memref where we found
4913 this giv. */
4914 validate_change (v->insn, v->location, v->new_reg, 0);
4915 else if (v->replaceable)
4916 {
4917 reg_map[REGNO (v->dest_reg)] = v->new_reg;
4918 }
4919 else
4920 {
4921 rtx original_insn = v->insn;
4922 rtx note;
4923
4924 /* Not replaceable; emit an insn to set the original giv reg from
4925 the reduced giv, same as above. */
4926 v->insn = loop_insn_emit_after (loop, 0, original_insn,
4927 gen_move_insn (v->dest_reg,
4928 v->new_reg));
4929
4930 /* The original insn may have a REG_EQUAL note. This note is
4931 now incorrect and may result in invalid substitutions later.
4932 The original insn is dead, but may be part of a libcall
4933 sequence, which doesn't seem worth the bother of handling. */
4934 note = find_reg_note (original_insn, REG_EQUAL, NULL_RTX);
4935 if (note)
4936 remove_note (original_insn, note);
4937 }
4938
4939 /* When a loop is reversed, givs which depend on the reversed
4940 biv, and which are live outside the loop, must be set to their
4941 correct final value. This insn is only needed if the giv is
4942 not replaceable. The correct final value is the same as the
4943 value that the giv starts the reversed loop with. */
4944 if (bl->reversed && ! v->replaceable)
4945 loop_iv_add_mult_sink (loop,
4946 extend_value_for_giv (v, bl->initial_value),
4947 v->mult_val, v->add_val, v->dest_reg);
4948 else if (v->final_value)
4949 loop_insn_sink_or_swim (loop,
4950 gen_load_of_final_value (v->dest_reg,
4951 v->final_value));
4952
4953 if (loop_dump_stream)
4954 {
4955 fprintf (loop_dump_stream, "giv at %d reduced to ",
4956 INSN_UID (v->insn));
4957 print_simple_rtl (loop_dump_stream, v->new_reg);
4958 fprintf (loop_dump_stream, "\n");
4959 }
4960 }
4961 }
4962
4963
4964 static int
4965 loop_giv_reduce_benefit (struct loop *loop ATTRIBUTE_UNUSED,
4966 struct iv_class *bl, struct induction *v,
4967 rtx test_reg)
4968 {
4969 int add_cost;
4970 int benefit;
4971
4972 benefit = v->benefit;
4973 PUT_MODE (test_reg, v->mode);
4974 add_cost = iv_add_mult_cost (bl->biv->add_val, v->mult_val,
4975 test_reg, test_reg);
4976
4977 /* Reduce benefit if not replaceable, since we will insert a
4978 move-insn to replace the insn that calculates this giv. Don't do
4979 this unless the giv is a user variable, since it will often be
4980 marked non-replaceable because of the duplication of the exit
4981 code outside the loop. In such a case, the copies we insert are
4982 dead and will be deleted. So they don't have a cost. Similar
4983 situations exist. */
4984 /* ??? The new final_[bg]iv_value code does a much better job of
4985 finding replaceable giv's, and hence this code may no longer be
4986 necessary. */
4987 if (! v->replaceable && ! bl->eliminable
4988 && REG_USERVAR_P (v->dest_reg))
4989 benefit -= copy_cost;
4990
4991 /* Decrease the benefit to count the add-insns that we will insert
4992 to increment the reduced reg for the giv. ??? This can
4993 overestimate the run-time cost of the additional insns, e.g. if
4994 there are multiple basic blocks that increment the biv, but only
4995 one of these blocks is executed during each iteration. There is
4996 no good way to detect cases like this with the current structure
4997 of the loop optimizer. This code is more accurate for
4998 determining code size than run-time benefits. */
4999 benefit -= add_cost * bl->biv_count;
5000
5001 /* Decide whether to strength-reduce this giv or to leave the code
5002 unchanged (recompute it from the biv each time it is used). This
5003 decision can be made independently for each giv. */
5004
5005 #ifdef AUTO_INC_DEC
5006 /* Attempt to guess whether autoincrement will handle some of the
5007 new add insns; if so, increase BENEFIT (undo the subtraction of
5008 add_cost that was done above). */
5009 if (v->giv_type == DEST_ADDR
5010 /* Increasing the benefit is risky, since this is only a guess.
5011 Avoid increasing register pressure in cases where there would
5012 be no other benefit from reducing this giv. */
5013 && benefit > 0
5014 && GET_CODE (v->mult_val) == CONST_INT)
5015 {
5016 int size = GET_MODE_SIZE (GET_MODE (v->mem));
5017
5018 if (HAVE_POST_INCREMENT
5019 && INTVAL (v->mult_val) == size)
5020 benefit += add_cost * bl->biv_count;
5021 else if (HAVE_PRE_INCREMENT
5022 && INTVAL (v->mult_val) == size)
5023 benefit += add_cost * bl->biv_count;
5024 else if (HAVE_POST_DECREMENT
5025 && -INTVAL (v->mult_val) == size)
5026 benefit += add_cost * bl->biv_count;
5027 else if (HAVE_PRE_DECREMENT
5028 && -INTVAL (v->mult_val) == size)
5029 benefit += add_cost * bl->biv_count;
5030 }
5031 #endif
5032
5033 return benefit;
5034 }
5035
5036
5037 /* Free IV structures for LOOP. */
5038
5039 static void
5040 loop_ivs_free (struct loop *loop)
5041 {
5042 struct loop_ivs *ivs = LOOP_IVS (loop);
5043 struct iv_class *iv = ivs->list;
5044
5045 free (ivs->regs);
5046
5047 while (iv)
5048 {
5049 struct iv_class *next = iv->next;
5050 struct induction *induction;
5051 struct induction *next_induction;
5052
5053 for (induction = iv->biv; induction; induction = next_induction)
5054 {
5055 next_induction = induction->next_iv;
5056 free (induction);
5057 }
5058 for (induction = iv->giv; induction; induction = next_induction)
5059 {
5060 next_induction = induction->next_iv;
5061 free (induction);
5062 }
5063
5064 free (iv);
5065 iv = next;
5066 }
5067 }
5068
5069
5070 /* Perform strength reduction and induction variable elimination.
5071
5072 Pseudo registers created during this function will be beyond the
5073 last valid index in several tables including
5074 REGS->ARRAY[I].N_TIMES_SET and REGNO_LAST_UID. This does not cause a
5075 problem here, because the added registers cannot be givs outside of
5076 their loop, and hence will never be reconsidered. But scan_loop
5077 must check regnos to make sure they are in bounds. */
5078
5079 static void
5080 strength_reduce (struct loop *loop, int flags)
5081 {
5082 struct loop_info *loop_info = LOOP_INFO (loop);
5083 struct loop_regs *regs = LOOP_REGS (loop);
5084 struct loop_ivs *ivs = LOOP_IVS (loop);
5085 rtx p;
5086 /* Temporary list pointer for traversing ivs->list. */
5087 struct iv_class *bl;
5088 /* Ratio of extra register life span we can justify
5089 for saving an instruction. More if loop doesn't call subroutines
5090 since in that case saving an insn makes more difference
5091 and more registers are available. */
5092 /* ??? could set this to last value of threshold in move_movables */
5093 int threshold = (loop_info->has_call ? 1 : 2) * (3 + n_non_fixed_regs);
5094 /* Map of pseudo-register replacements. */
5095 rtx *reg_map = NULL;
5096 int reg_map_size;
5097 int unrolled_insn_copies = 0;
5098 rtx test_reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
5099 int insn_count = count_insns_in_loop (loop);
5100
5101 addr_placeholder = gen_reg_rtx (Pmode);
5102
5103 ivs->n_regs = max_reg_before_loop;
5104 ivs->regs = xcalloc (ivs->n_regs, sizeof (struct iv));
5105
5106 /* Find all BIVs in loop. */
5107 loop_bivs_find (loop);
5108
5109 /* Exit if there are no bivs. */
5110 if (! ivs->list)
5111 {
5112 /* Can still unroll the loop anyways, but indicate that there is no
5113 strength reduction info available. */
5114 if (flags & LOOP_UNROLL)
5115 unroll_loop (loop, insn_count, 0);
5116
5117 loop_ivs_free (loop);
5118 return;
5119 }
5120
5121 /* Determine how BIVS are initialized by looking through pre-header
5122 extended basic block. */
5123 loop_bivs_init_find (loop);
5124
5125 /* Look at the each biv and see if we can say anything better about its
5126 initial value from any initializing insns set up above. */
5127 loop_bivs_check (loop);
5128
5129 /* Search the loop for general induction variables. */
5130 loop_givs_find (loop);
5131
5132 /* Try to calculate and save the number of loop iterations. This is
5133 set to zero if the actual number can not be calculated. This must
5134 be called after all giv's have been identified, since otherwise it may
5135 fail if the iteration variable is a giv. */
5136 loop_iterations (loop);
5137
5138 #ifdef HAVE_prefetch
5139 if (flags & LOOP_PREFETCH)
5140 emit_prefetch_instructions (loop);
5141 #endif
5142
5143 /* Now for each giv for which we still don't know whether or not it is
5144 replaceable, check to see if it is replaceable because its final value
5145 can be calculated. This must be done after loop_iterations is called,
5146 so that final_giv_value will work correctly. */
5147 loop_givs_check (loop);
5148
5149 /* Try to prove that the loop counter variable (if any) is always
5150 nonnegative; if so, record that fact with a REG_NONNEG note
5151 so that "decrement and branch until zero" insn can be used. */
5152 check_dbra_loop (loop, insn_count);
5153
5154 /* Create reg_map to hold substitutions for replaceable giv regs.
5155 Some givs might have been made from biv increments, so look at
5156 ivs->reg_iv_type for a suitable size. */
5157 reg_map_size = ivs->n_regs;
5158 reg_map = xcalloc (reg_map_size, sizeof (rtx));
5159
5160 /* Examine each iv class for feasibility of strength reduction/induction
5161 variable elimination. */
5162
5163 for (bl = ivs->list; bl; bl = bl->next)
5164 {
5165 struct induction *v;
5166 int benefit;
5167
5168 /* Test whether it will be possible to eliminate this biv
5169 provided all givs are reduced. */
5170 bl->eliminable = loop_biv_eliminable_p (loop, bl, threshold, insn_count);
5171
5172 /* This will be true at the end, if all givs which depend on this
5173 biv have been strength reduced.
5174 We can't (currently) eliminate the biv unless this is so. */
5175 bl->all_reduced = 1;
5176
5177 /* Check each extension dependent giv in this class to see if its
5178 root biv is safe from wrapping in the interior mode. */
5179 check_ext_dependent_givs (loop, bl);
5180
5181 /* Combine all giv's for this iv_class. */
5182 combine_givs (regs, bl);
5183
5184 for (v = bl->giv; v; v = v->next_iv)
5185 {
5186 struct induction *tv;
5187
5188 if (v->ignore || v->same)
5189 continue;
5190
5191 benefit = loop_giv_reduce_benefit (loop, bl, v, test_reg);
5192
5193 /* If an insn is not to be strength reduced, then set its ignore
5194 flag, and clear bl->all_reduced. */
5195
5196 /* A giv that depends on a reversed biv must be reduced if it is
5197 used after the loop exit, otherwise, it would have the wrong
5198 value after the loop exit. To make it simple, just reduce all
5199 of such giv's whether or not we know they are used after the loop
5200 exit. */
5201
5202 if (! flag_reduce_all_givs
5203 && v->lifetime * threshold * benefit < insn_count
5204 && ! bl->reversed)
5205 {
5206 if (loop_dump_stream)
5207 fprintf (loop_dump_stream,
5208 "giv of insn %d not worth while, %d vs %d.\n",
5209 INSN_UID (v->insn),
5210 v->lifetime * threshold * benefit, insn_count);
5211 v->ignore = 1;
5212 bl->all_reduced = 0;
5213 }
5214 else
5215 {
5216 /* Check that we can increment the reduced giv without a
5217 multiply insn. If not, reject it. */
5218
5219 for (tv = bl->biv; tv; tv = tv->next_iv)
5220 if (tv->mult_val == const1_rtx
5221 && ! product_cheap_p (tv->add_val, v->mult_val))
5222 {
5223 if (loop_dump_stream)
5224 fprintf (loop_dump_stream,
5225 "giv of insn %d: would need a multiply.\n",
5226 INSN_UID (v->insn));
5227 v->ignore = 1;
5228 bl->all_reduced = 0;
5229 break;
5230 }
5231 }
5232 }
5233
5234 /* Check for givs whose first use is their definition and whose
5235 last use is the definition of another giv. If so, it is likely
5236 dead and should not be used to derive another giv nor to
5237 eliminate a biv. */
5238 loop_givs_dead_check (loop, bl);
5239
5240 /* Reduce each giv that we decided to reduce. */
5241 loop_givs_reduce (loop, bl);
5242
5243 /* Rescan all givs. If a giv is the same as a giv not reduced, mark it
5244 as not reduced.
5245
5246 For each giv register that can be reduced now: if replaceable,
5247 substitute reduced reg wherever the old giv occurs;
5248 else add new move insn "giv_reg = reduced_reg". */
5249 loop_givs_rescan (loop, bl, reg_map);
5250
5251 /* All the givs based on the biv bl have been reduced if they
5252 merit it. */
5253
5254 /* For each giv not marked as maybe dead that has been combined with a
5255 second giv, clear any "maybe dead" mark on that second giv.
5256 v->new_reg will either be or refer to the register of the giv it
5257 combined with.
5258
5259 Doing this clearing avoids problems in biv elimination where
5260 a giv's new_reg is a complex value that can't be put in the
5261 insn but the giv combined with (with a reg as new_reg) is
5262 marked maybe_dead. Since the register will be used in either
5263 case, we'd prefer it be used from the simpler giv. */
5264
5265 for (v = bl->giv; v; v = v->next_iv)
5266 if (! v->maybe_dead && v->same)
5267 v->same->maybe_dead = 0;
5268
5269 /* Try to eliminate the biv, if it is a candidate.
5270 This won't work if ! bl->all_reduced,
5271 since the givs we planned to use might not have been reduced.
5272
5273 We have to be careful that we didn't initially think we could
5274 eliminate this biv because of a giv that we now think may be
5275 dead and shouldn't be used as a biv replacement.
5276
5277 Also, there is the possibility that we may have a giv that looks
5278 like it can be used to eliminate a biv, but the resulting insn
5279 isn't valid. This can happen, for example, on the 88k, where a
5280 JUMP_INSN can compare a register only with zero. Attempts to
5281 replace it with a compare with a constant will fail.
5282
5283 Note that in cases where this call fails, we may have replaced some
5284 of the occurrences of the biv with a giv, but no harm was done in
5285 doing so in the rare cases where it can occur. */
5286
5287 if (bl->all_reduced == 1 && bl->eliminable
5288 && maybe_eliminate_biv (loop, bl, 1, threshold, insn_count))
5289 {
5290 /* ?? If we created a new test to bypass the loop entirely,
5291 or otherwise drop straight in, based on this test, then
5292 we might want to rewrite it also. This way some later
5293 pass has more hope of removing the initialization of this
5294 biv entirely. */
5295
5296 /* If final_value != 0, then the biv may be used after loop end
5297 and we must emit an insn to set it just in case.
5298
5299 Reversed bivs already have an insn after the loop setting their
5300 value, so we don't need another one. We can't calculate the
5301 proper final value for such a biv here anyways. */
5302 if (bl->final_value && ! bl->reversed)
5303 loop_insn_sink_or_swim (loop,
5304 gen_load_of_final_value (bl->biv->dest_reg,
5305 bl->final_value));
5306
5307 if (loop_dump_stream)
5308 fprintf (loop_dump_stream, "Reg %d: biv eliminated\n",
5309 bl->regno);
5310 }
5311 /* See above note wrt final_value. But since we couldn't eliminate
5312 the biv, we must set the value after the loop instead of before. */
5313 else if (bl->final_value && ! bl->reversed)
5314 loop_insn_sink (loop, gen_load_of_final_value (bl->biv->dest_reg,
5315 bl->final_value));
5316 }
5317
5318 /* Go through all the instructions in the loop, making all the
5319 register substitutions scheduled in REG_MAP. */
5320
5321 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
5322 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
5323 || GET_CODE (p) == CALL_INSN)
5324 {
5325 replace_regs (PATTERN (p), reg_map, reg_map_size, 0);
5326 replace_regs (REG_NOTES (p), reg_map, reg_map_size, 0);
5327 INSN_CODE (p) = -1;
5328 }
5329
5330 if (loop_info->n_iterations > 0)
5331 {
5332 /* When we completely unroll a loop we will likely not need the increment
5333 of the loop BIV and we will not need the conditional branch at the
5334 end of the loop. */
5335 unrolled_insn_copies = insn_count - 2;
5336
5337 #ifdef HAVE_cc0
5338 /* When we completely unroll a loop on a HAVE_cc0 machine we will not
5339 need the comparison before the conditional branch at the end of the
5340 loop. */
5341 unrolled_insn_copies -= 1;
5342 #endif
5343
5344 /* We'll need one copy for each loop iteration. */
5345 unrolled_insn_copies *= loop_info->n_iterations;
5346
5347 /* A little slop to account for the ability to remove initialization
5348 code, better CSE, and other secondary benefits of completely
5349 unrolling some loops. */
5350 unrolled_insn_copies -= 1;
5351
5352 /* Clamp the value. */
5353 if (unrolled_insn_copies < 0)
5354 unrolled_insn_copies = 0;
5355 }
5356
5357 /* Unroll loops from within strength reduction so that we can use the
5358 induction variable information that strength_reduce has already
5359 collected. Always unroll loops that would be as small or smaller
5360 unrolled than when rolled. */
5361 if ((flags & LOOP_UNROLL)
5362 || ((flags & LOOP_AUTO_UNROLL)
5363 && loop_info->n_iterations > 0
5364 && unrolled_insn_copies <= insn_count))
5365 unroll_loop (loop, insn_count, 1);
5366
5367 if (loop_dump_stream)
5368 fprintf (loop_dump_stream, "\n");
5369
5370 loop_ivs_free (loop);
5371 if (reg_map)
5372 free (reg_map);
5373 }
5374 \f
5375 /*Record all basic induction variables calculated in the insn. */
5376 static rtx
5377 check_insn_for_bivs (struct loop *loop, rtx p, int not_every_iteration,
5378 int maybe_multiple)
5379 {
5380 struct loop_ivs *ivs = LOOP_IVS (loop);
5381 rtx set;
5382 rtx dest_reg;
5383 rtx inc_val;
5384 rtx mult_val;
5385 rtx *location;
5386
5387 if (GET_CODE (p) == INSN
5388 && (set = single_set (p))
5389 && REG_P (SET_DEST (set)))
5390 {
5391 dest_reg = SET_DEST (set);
5392 if (REGNO (dest_reg) < max_reg_before_loop
5393 && REGNO (dest_reg) >= FIRST_PSEUDO_REGISTER
5394 && REG_IV_TYPE (ivs, REGNO (dest_reg)) != NOT_BASIC_INDUCT)
5395 {
5396 if (basic_induction_var (loop, SET_SRC (set),
5397 GET_MODE (SET_SRC (set)),
5398 dest_reg, p, &inc_val, &mult_val,
5399 &location))
5400 {
5401 /* It is a possible basic induction variable.
5402 Create and initialize an induction structure for it. */
5403
5404 struct induction *v = xmalloc (sizeof (struct induction));
5405
5406 record_biv (loop, v, p, dest_reg, inc_val, mult_val, location,
5407 not_every_iteration, maybe_multiple);
5408 REG_IV_TYPE (ivs, REGNO (dest_reg)) = BASIC_INDUCT;
5409 }
5410 else if (REGNO (dest_reg) < ivs->n_regs)
5411 REG_IV_TYPE (ivs, REGNO (dest_reg)) = NOT_BASIC_INDUCT;
5412 }
5413 }
5414 return p;
5415 }
5416 \f
5417 /* Record all givs calculated in the insn.
5418 A register is a giv if: it is only set once, it is a function of a
5419 biv and a constant (or invariant), and it is not a biv. */
5420 static rtx
5421 check_insn_for_givs (struct loop *loop, rtx p, int not_every_iteration,
5422 int maybe_multiple)
5423 {
5424 struct loop_regs *regs = LOOP_REGS (loop);
5425
5426 rtx set;
5427 /* Look for a general induction variable in a register. */
5428 if (GET_CODE (p) == INSN
5429 && (set = single_set (p))
5430 && REG_P (SET_DEST (set))
5431 && ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
5432 {
5433 rtx src_reg;
5434 rtx dest_reg;
5435 rtx add_val;
5436 rtx mult_val;
5437 rtx ext_val;
5438 int benefit;
5439 rtx regnote = 0;
5440 rtx last_consec_insn;
5441
5442 dest_reg = SET_DEST (set);
5443 if (REGNO (dest_reg) < FIRST_PSEUDO_REGISTER)
5444 return p;
5445
5446 if (/* SET_SRC is a giv. */
5447 (general_induction_var (loop, SET_SRC (set), &src_reg, &add_val,
5448 &mult_val, &ext_val, 0, &benefit, VOIDmode)
5449 /* Equivalent expression is a giv. */
5450 || ((regnote = find_reg_note (p, REG_EQUAL, NULL_RTX))
5451 && general_induction_var (loop, XEXP (regnote, 0), &src_reg,
5452 &add_val, &mult_val, &ext_val, 0,
5453 &benefit, VOIDmode)))
5454 /* Don't try to handle any regs made by loop optimization.
5455 We have nothing on them in regno_first_uid, etc. */
5456 && REGNO (dest_reg) < max_reg_before_loop
5457 /* Don't recognize a BASIC_INDUCT_VAR here. */
5458 && dest_reg != src_reg
5459 /* This must be the only place where the register is set. */
5460 && (regs->array[REGNO (dest_reg)].n_times_set == 1
5461 /* or all sets must be consecutive and make a giv. */
5462 || (benefit = consec_sets_giv (loop, benefit, p,
5463 src_reg, dest_reg,
5464 &add_val, &mult_val, &ext_val,
5465 &last_consec_insn))))
5466 {
5467 struct induction *v = xmalloc (sizeof (struct induction));
5468
5469 /* If this is a library call, increase benefit. */
5470 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
5471 benefit += libcall_benefit (p);
5472
5473 /* Skip the consecutive insns, if there are any. */
5474 if (regs->array[REGNO (dest_reg)].n_times_set != 1)
5475 p = last_consec_insn;
5476
5477 record_giv (loop, v, p, src_reg, dest_reg, mult_val, add_val,
5478 ext_val, benefit, DEST_REG, not_every_iteration,
5479 maybe_multiple, (rtx*) 0);
5480
5481 }
5482 }
5483
5484 /* Look for givs which are memory addresses. */
5485 if (GET_CODE (p) == INSN)
5486 find_mem_givs (loop, PATTERN (p), p, not_every_iteration,
5487 maybe_multiple);
5488
5489 /* Update the status of whether giv can derive other givs. This can
5490 change when we pass a label or an insn that updates a biv. */
5491 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
5492 || GET_CODE (p) == CODE_LABEL)
5493 update_giv_derive (loop, p);
5494 return p;
5495 }
5496 \f
5497 /* Return 1 if X is a valid source for an initial value (or as value being
5498 compared against in an initial test).
5499
5500 X must be either a register or constant and must not be clobbered between
5501 the current insn and the start of the loop.
5502
5503 INSN is the insn containing X. */
5504
5505 static int
5506 valid_initial_value_p (rtx x, rtx insn, int call_seen, rtx loop_start)
5507 {
5508 if (CONSTANT_P (x))
5509 return 1;
5510
5511 /* Only consider pseudos we know about initialized in insns whose luids
5512 we know. */
5513 if (!REG_P (x)
5514 || REGNO (x) >= max_reg_before_loop)
5515 return 0;
5516
5517 /* Don't use call-clobbered registers across a call which clobbers it. On
5518 some machines, don't use any hard registers at all. */
5519 if (REGNO (x) < FIRST_PSEUDO_REGISTER
5520 && (SMALL_REGISTER_CLASSES
5521 || (call_used_regs[REGNO (x)] && call_seen)))
5522 return 0;
5523
5524 /* Don't use registers that have been clobbered before the start of the
5525 loop. */
5526 if (reg_set_between_p (x, insn, loop_start))
5527 return 0;
5528
5529 return 1;
5530 }
5531 \f
5532 /* Scan X for memory refs and check each memory address
5533 as a possible giv. INSN is the insn whose pattern X comes from.
5534 NOT_EVERY_ITERATION is 1 if the insn might not be executed during
5535 every loop iteration. MAYBE_MULTIPLE is 1 if the insn might be executed
5536 more than once in each loop iteration. */
5537
5538 static void
5539 find_mem_givs (const struct loop *loop, rtx x, rtx insn,
5540 int not_every_iteration, int maybe_multiple)
5541 {
5542 int i, j;
5543 enum rtx_code code;
5544 const char *fmt;
5545
5546 if (x == 0)
5547 return;
5548
5549 code = GET_CODE (x);
5550 switch (code)
5551 {
5552 case REG:
5553 case CONST_INT:
5554 case CONST:
5555 case CONST_DOUBLE:
5556 case SYMBOL_REF:
5557 case LABEL_REF:
5558 case PC:
5559 case CC0:
5560 case ADDR_VEC:
5561 case ADDR_DIFF_VEC:
5562 case USE:
5563 case CLOBBER:
5564 return;
5565
5566 case MEM:
5567 {
5568 rtx src_reg;
5569 rtx add_val;
5570 rtx mult_val;
5571 rtx ext_val;
5572 int benefit;
5573
5574 /* This code used to disable creating GIVs with mult_val == 1 and
5575 add_val == 0. However, this leads to lost optimizations when
5576 it comes time to combine a set of related DEST_ADDR GIVs, since
5577 this one would not be seen. */
5578
5579 if (general_induction_var (loop, XEXP (x, 0), &src_reg, &add_val,
5580 &mult_val, &ext_val, 1, &benefit,
5581 GET_MODE (x)))
5582 {
5583 /* Found one; record it. */
5584 struct induction *v = xmalloc (sizeof (struct induction));
5585
5586 record_giv (loop, v, insn, src_reg, addr_placeholder, mult_val,
5587 add_val, ext_val, benefit, DEST_ADDR,
5588 not_every_iteration, maybe_multiple, &XEXP (x, 0));
5589
5590 v->mem = x;
5591 }
5592 }
5593 return;
5594
5595 default:
5596 break;
5597 }
5598
5599 /* Recursively scan the subexpressions for other mem refs. */
5600
5601 fmt = GET_RTX_FORMAT (code);
5602 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5603 if (fmt[i] == 'e')
5604 find_mem_givs (loop, XEXP (x, i), insn, not_every_iteration,
5605 maybe_multiple);
5606 else if (fmt[i] == 'E')
5607 for (j = 0; j < XVECLEN (x, i); j++)
5608 find_mem_givs (loop, XVECEXP (x, i, j), insn, not_every_iteration,
5609 maybe_multiple);
5610 }
5611 \f
5612 /* Fill in the data about one biv update.
5613 V is the `struct induction' in which we record the biv. (It is
5614 allocated by the caller, with alloca.)
5615 INSN is the insn that sets it.
5616 DEST_REG is the biv's reg.
5617
5618 MULT_VAL is const1_rtx if the biv is being incremented here, in which case
5619 INC_VAL is the increment. Otherwise, MULT_VAL is const0_rtx and the biv is
5620 being set to INC_VAL.
5621
5622 NOT_EVERY_ITERATION is nonzero if this biv update is not know to be
5623 executed every iteration; MAYBE_MULTIPLE is nonzero if this biv update
5624 can be executed more than once per iteration. If MAYBE_MULTIPLE
5625 and NOT_EVERY_ITERATION are both zero, we know that the biv update is
5626 executed exactly once per iteration. */
5627
5628 static void
5629 record_biv (struct loop *loop, struct induction *v, rtx insn, rtx dest_reg,
5630 rtx inc_val, rtx mult_val, rtx *location,
5631 int not_every_iteration, int maybe_multiple)
5632 {
5633 struct loop_ivs *ivs = LOOP_IVS (loop);
5634 struct iv_class *bl;
5635
5636 v->insn = insn;
5637 v->src_reg = dest_reg;
5638 v->dest_reg = dest_reg;
5639 v->mult_val = mult_val;
5640 v->add_val = inc_val;
5641 v->ext_dependent = NULL_RTX;
5642 v->location = location;
5643 v->mode = GET_MODE (dest_reg);
5644 v->always_computable = ! not_every_iteration;
5645 v->always_executed = ! not_every_iteration;
5646 v->maybe_multiple = maybe_multiple;
5647 v->same = 0;
5648
5649 /* Add this to the reg's iv_class, creating a class
5650 if this is the first incrementation of the reg. */
5651
5652 bl = REG_IV_CLASS (ivs, REGNO (dest_reg));
5653 if (bl == 0)
5654 {
5655 /* Create and initialize new iv_class. */
5656
5657 bl = xmalloc (sizeof (struct iv_class));
5658
5659 bl->regno = REGNO (dest_reg);
5660 bl->biv = 0;
5661 bl->giv = 0;
5662 bl->biv_count = 0;
5663 bl->giv_count = 0;
5664
5665 /* Set initial value to the reg itself. */
5666 bl->initial_value = dest_reg;
5667 bl->final_value = 0;
5668 /* We haven't seen the initializing insn yet. */
5669 bl->init_insn = 0;
5670 bl->init_set = 0;
5671 bl->initial_test = 0;
5672 bl->incremented = 0;
5673 bl->eliminable = 0;
5674 bl->nonneg = 0;
5675 bl->reversed = 0;
5676 bl->total_benefit = 0;
5677
5678 /* Add this class to ivs->list. */
5679 bl->next = ivs->list;
5680 ivs->list = bl;
5681
5682 /* Put it in the array of biv register classes. */
5683 REG_IV_CLASS (ivs, REGNO (dest_reg)) = bl;
5684 }
5685 else
5686 {
5687 /* Check if location is the same as a previous one. */
5688 struct induction *induction;
5689 for (induction = bl->biv; induction; induction = induction->next_iv)
5690 if (location == induction->location)
5691 {
5692 v->same = induction;
5693 break;
5694 }
5695 }
5696
5697 /* Update IV_CLASS entry for this biv. */
5698 v->next_iv = bl->biv;
5699 bl->biv = v;
5700 bl->biv_count++;
5701 if (mult_val == const1_rtx)
5702 bl->incremented = 1;
5703
5704 if (loop_dump_stream)
5705 loop_biv_dump (v, loop_dump_stream, 0);
5706 }
5707 \f
5708 /* Fill in the data about one giv.
5709 V is the `struct induction' in which we record the giv. (It is
5710 allocated by the caller, with alloca.)
5711 INSN is the insn that sets it.
5712 BENEFIT estimates the savings from deleting this insn.
5713 TYPE is DEST_REG or DEST_ADDR; it says whether the giv is computed
5714 into a register or is used as a memory address.
5715
5716 SRC_REG is the biv reg which the giv is computed from.
5717 DEST_REG is the giv's reg (if the giv is stored in a reg).
5718 MULT_VAL and ADD_VAL are the coefficients used to compute the giv.
5719 LOCATION points to the place where this giv's value appears in INSN. */
5720
5721 static void
5722 record_giv (const struct loop *loop, struct induction *v, rtx insn,
5723 rtx src_reg, rtx dest_reg, rtx mult_val, rtx add_val,
5724 rtx ext_val, int benefit, enum g_types type,
5725 int not_every_iteration, int maybe_multiple, rtx *location)
5726 {
5727 struct loop_ivs *ivs = LOOP_IVS (loop);
5728 struct induction *b;
5729 struct iv_class *bl;
5730 rtx set = single_set (insn);
5731 rtx temp;
5732
5733 /* Attempt to prove constantness of the values. Don't let simplify_rtx
5734 undo the MULT canonicalization that we performed earlier. */
5735 temp = simplify_rtx (add_val);
5736 if (temp
5737 && ! (GET_CODE (add_val) == MULT
5738 && GET_CODE (temp) == ASHIFT))
5739 add_val = temp;
5740
5741 v->insn = insn;
5742 v->src_reg = src_reg;
5743 v->giv_type = type;
5744 v->dest_reg = dest_reg;
5745 v->mult_val = mult_val;
5746 v->add_val = add_val;
5747 v->ext_dependent = ext_val;
5748 v->benefit = benefit;
5749 v->location = location;
5750 v->cant_derive = 0;
5751 v->combined_with = 0;
5752 v->maybe_multiple = maybe_multiple;
5753 v->maybe_dead = 0;
5754 v->derive_adjustment = 0;
5755 v->same = 0;
5756 v->ignore = 0;
5757 v->new_reg = 0;
5758 v->final_value = 0;
5759 v->same_insn = 0;
5760 v->auto_inc_opt = 0;
5761 v->unrolled = 0;
5762 v->shared = 0;
5763
5764 /* The v->always_computable field is used in update_giv_derive, to
5765 determine whether a giv can be used to derive another giv. For a
5766 DEST_REG giv, INSN computes a new value for the giv, so its value
5767 isn't computable if INSN insn't executed every iteration.
5768 However, for a DEST_ADDR giv, INSN merely uses the value of the giv;
5769 it does not compute a new value. Hence the value is always computable
5770 regardless of whether INSN is executed each iteration. */
5771
5772 if (type == DEST_ADDR)
5773 v->always_computable = 1;
5774 else
5775 v->always_computable = ! not_every_iteration;
5776
5777 v->always_executed = ! not_every_iteration;
5778
5779 if (type == DEST_ADDR)
5780 {
5781 v->mode = GET_MODE (*location);
5782 v->lifetime = 1;
5783 }
5784 else /* type == DEST_REG */
5785 {
5786 v->mode = GET_MODE (SET_DEST (set));
5787
5788 v->lifetime = LOOP_REG_LIFETIME (loop, REGNO (dest_reg));
5789
5790 /* If the lifetime is zero, it means that this register is
5791 really a dead store. So mark this as a giv that can be
5792 ignored. This will not prevent the biv from being eliminated. */
5793 if (v->lifetime == 0)
5794 v->ignore = 1;
5795
5796 REG_IV_TYPE (ivs, REGNO (dest_reg)) = GENERAL_INDUCT;
5797 REG_IV_INFO (ivs, REGNO (dest_reg)) = v;
5798 }
5799
5800 /* Add the giv to the class of givs computed from one biv. */
5801
5802 bl = REG_IV_CLASS (ivs, REGNO (src_reg));
5803 if (bl)
5804 {
5805 v->next_iv = bl->giv;
5806 bl->giv = v;
5807 /* Don't count DEST_ADDR. This is supposed to count the number of
5808 insns that calculate givs. */
5809 if (type == DEST_REG)
5810 bl->giv_count++;
5811 bl->total_benefit += benefit;
5812 }
5813 else
5814 /* Fatal error, biv missing for this giv? */
5815 abort ();
5816
5817 if (type == DEST_ADDR)
5818 {
5819 v->replaceable = 1;
5820 v->not_replaceable = 0;
5821 }
5822 else
5823 {
5824 /* The giv can be replaced outright by the reduced register only if all
5825 of the following conditions are true:
5826 - the insn that sets the giv is always executed on any iteration
5827 on which the giv is used at all
5828 (there are two ways to deduce this:
5829 either the insn is executed on every iteration,
5830 or all uses follow that insn in the same basic block),
5831 - the giv is not used outside the loop
5832 - no assignments to the biv occur during the giv's lifetime. */
5833
5834 if (REGNO_FIRST_UID (REGNO (dest_reg)) == INSN_UID (insn)
5835 /* Previous line always fails if INSN was moved by loop opt. */
5836 && REGNO_LAST_LUID (REGNO (dest_reg))
5837 < INSN_LUID (loop->end)
5838 && (! not_every_iteration
5839 || last_use_this_basic_block (dest_reg, insn)))
5840 {
5841 /* Now check that there are no assignments to the biv within the
5842 giv's lifetime. This requires two separate checks. */
5843
5844 /* Check each biv update, and fail if any are between the first
5845 and last use of the giv.
5846
5847 If this loop contains an inner loop that was unrolled, then
5848 the insn modifying the biv may have been emitted by the loop
5849 unrolling code, and hence does not have a valid luid. Just
5850 mark the biv as not replaceable in this case. It is not very
5851 useful as a biv, because it is used in two different loops.
5852 It is very unlikely that we would be able to optimize the giv
5853 using this biv anyways. */
5854
5855 v->replaceable = 1;
5856 v->not_replaceable = 0;
5857 for (b = bl->biv; b; b = b->next_iv)
5858 {
5859 if (INSN_UID (b->insn) >= max_uid_for_loop
5860 || ((INSN_LUID (b->insn)
5861 >= REGNO_FIRST_LUID (REGNO (dest_reg)))
5862 && (INSN_LUID (b->insn)
5863 <= REGNO_LAST_LUID (REGNO (dest_reg)))))
5864 {
5865 v->replaceable = 0;
5866 v->not_replaceable = 1;
5867 break;
5868 }
5869 }
5870
5871 /* If there are any backwards branches that go from after the
5872 biv update to before it, then this giv is not replaceable. */
5873 if (v->replaceable)
5874 for (b = bl->biv; b; b = b->next_iv)
5875 if (back_branch_in_range_p (loop, b->insn))
5876 {
5877 v->replaceable = 0;
5878 v->not_replaceable = 1;
5879 break;
5880 }
5881 }
5882 else
5883 {
5884 /* May still be replaceable, we don't have enough info here to
5885 decide. */
5886 v->replaceable = 0;
5887 v->not_replaceable = 0;
5888 }
5889 }
5890
5891 /* Record whether the add_val contains a const_int, for later use by
5892 combine_givs. */
5893 {
5894 rtx tem = add_val;
5895
5896 v->no_const_addval = 1;
5897 if (tem == const0_rtx)
5898 ;
5899 else if (CONSTANT_P (add_val))
5900 v->no_const_addval = 0;
5901 if (GET_CODE (tem) == PLUS)
5902 {
5903 while (1)
5904 {
5905 if (GET_CODE (XEXP (tem, 0)) == PLUS)
5906 tem = XEXP (tem, 0);
5907 else if (GET_CODE (XEXP (tem, 1)) == PLUS)
5908 tem = XEXP (tem, 1);
5909 else
5910 break;
5911 }
5912 if (CONSTANT_P (XEXP (tem, 1)))
5913 v->no_const_addval = 0;
5914 }
5915 }
5916
5917 if (loop_dump_stream)
5918 loop_giv_dump (v, loop_dump_stream, 0);
5919 }
5920
5921 /* All this does is determine whether a giv can be made replaceable because
5922 its final value can be calculated. This code can not be part of record_giv
5923 above, because final_giv_value requires that the number of loop iterations
5924 be known, and that can not be accurately calculated until after all givs
5925 have been identified. */
5926
5927 static void
5928 check_final_value (const struct loop *loop, struct induction *v)
5929 {
5930 rtx final_value = 0;
5931
5932 /* DEST_ADDR givs will never reach here, because they are always marked
5933 replaceable above in record_giv. */
5934
5935 /* The giv can be replaced outright by the reduced register only if all
5936 of the following conditions are true:
5937 - the insn that sets the giv is always executed on any iteration
5938 on which the giv is used at all
5939 (there are two ways to deduce this:
5940 either the insn is executed on every iteration,
5941 or all uses follow that insn in the same basic block),
5942 - its final value can be calculated (this condition is different
5943 than the one above in record_giv)
5944 - it's not used before the it's set
5945 - no assignments to the biv occur during the giv's lifetime. */
5946
5947 #if 0
5948 /* This is only called now when replaceable is known to be false. */
5949 /* Clear replaceable, so that it won't confuse final_giv_value. */
5950 v->replaceable = 0;
5951 #endif
5952
5953 if ((final_value = final_giv_value (loop, v))
5954 && (v->always_executed
5955 || last_use_this_basic_block (v->dest_reg, v->insn)))
5956 {
5957 int biv_increment_seen = 0, before_giv_insn = 0;
5958 rtx p = v->insn;
5959 rtx last_giv_use;
5960
5961 v->replaceable = 1;
5962 v->not_replaceable = 0;
5963
5964 /* When trying to determine whether or not a biv increment occurs
5965 during the lifetime of the giv, we can ignore uses of the variable
5966 outside the loop because final_value is true. Hence we can not
5967 use regno_last_uid and regno_first_uid as above in record_giv. */
5968
5969 /* Search the loop to determine whether any assignments to the
5970 biv occur during the giv's lifetime. Start with the insn
5971 that sets the giv, and search around the loop until we come
5972 back to that insn again.
5973
5974 Also fail if there is a jump within the giv's lifetime that jumps
5975 to somewhere outside the lifetime but still within the loop. This
5976 catches spaghetti code where the execution order is not linear, and
5977 hence the above test fails. Here we assume that the giv lifetime
5978 does not extend from one iteration of the loop to the next, so as
5979 to make the test easier. Since the lifetime isn't known yet,
5980 this requires two loops. See also record_giv above. */
5981
5982 last_giv_use = v->insn;
5983
5984 while (1)
5985 {
5986 p = NEXT_INSN (p);
5987 if (p == loop->end)
5988 {
5989 before_giv_insn = 1;
5990 p = NEXT_INSN (loop->start);
5991 }
5992 if (p == v->insn)
5993 break;
5994
5995 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
5996 || GET_CODE (p) == CALL_INSN)
5997 {
5998 /* It is possible for the BIV increment to use the GIV if we
5999 have a cycle. Thus we must be sure to check each insn for
6000 both BIV and GIV uses, and we must check for BIV uses
6001 first. */
6002
6003 if (! biv_increment_seen
6004 && reg_set_p (v->src_reg, PATTERN (p)))
6005 biv_increment_seen = 1;
6006
6007 if (reg_mentioned_p (v->dest_reg, PATTERN (p)))
6008 {
6009 if (biv_increment_seen || before_giv_insn)
6010 {
6011 v->replaceable = 0;
6012 v->not_replaceable = 1;
6013 break;
6014 }
6015 last_giv_use = p;
6016 }
6017 }
6018 }
6019
6020 /* Now that the lifetime of the giv is known, check for branches
6021 from within the lifetime to outside the lifetime if it is still
6022 replaceable. */
6023
6024 if (v->replaceable)
6025 {
6026 p = v->insn;
6027 while (1)
6028 {
6029 p = NEXT_INSN (p);
6030 if (p == loop->end)
6031 p = NEXT_INSN (loop->start);
6032 if (p == last_giv_use)
6033 break;
6034
6035 if (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p)
6036 && LABEL_NAME (JUMP_LABEL (p))
6037 && ((loop_insn_first_p (JUMP_LABEL (p), v->insn)
6038 && loop_insn_first_p (loop->start, JUMP_LABEL (p)))
6039 || (loop_insn_first_p (last_giv_use, JUMP_LABEL (p))
6040 && loop_insn_first_p (JUMP_LABEL (p), loop->end))))
6041 {
6042 v->replaceable = 0;
6043 v->not_replaceable = 1;
6044
6045 if (loop_dump_stream)
6046 fprintf (loop_dump_stream,
6047 "Found branch outside giv lifetime.\n");
6048
6049 break;
6050 }
6051 }
6052 }
6053
6054 /* If it is replaceable, then save the final value. */
6055 if (v->replaceable)
6056 v->final_value = final_value;
6057 }
6058
6059 if (loop_dump_stream && v->replaceable)
6060 fprintf (loop_dump_stream, "Insn %d: giv reg %d final_value replaceable\n",
6061 INSN_UID (v->insn), REGNO (v->dest_reg));
6062 }
6063 \f
6064 /* Update the status of whether a giv can derive other givs.
6065
6066 We need to do something special if there is or may be an update to the biv
6067 between the time the giv is defined and the time it is used to derive
6068 another giv.
6069
6070 In addition, a giv that is only conditionally set is not allowed to
6071 derive another giv once a label has been passed.
6072
6073 The cases we look at are when a label or an update to a biv is passed. */
6074
6075 static void
6076 update_giv_derive (const struct loop *loop, rtx p)
6077 {
6078 struct loop_ivs *ivs = LOOP_IVS (loop);
6079 struct iv_class *bl;
6080 struct induction *biv, *giv;
6081 rtx tem;
6082 int dummy;
6083
6084 /* Search all IV classes, then all bivs, and finally all givs.
6085
6086 There are three cases we are concerned with. First we have the situation
6087 of a giv that is only updated conditionally. In that case, it may not
6088 derive any givs after a label is passed.
6089
6090 The second case is when a biv update occurs, or may occur, after the
6091 definition of a giv. For certain biv updates (see below) that are
6092 known to occur between the giv definition and use, we can adjust the
6093 giv definition. For others, or when the biv update is conditional,
6094 we must prevent the giv from deriving any other givs. There are two
6095 sub-cases within this case.
6096
6097 If this is a label, we are concerned with any biv update that is done
6098 conditionally, since it may be done after the giv is defined followed by
6099 a branch here (actually, we need to pass both a jump and a label, but
6100 this extra tracking doesn't seem worth it).
6101
6102 If this is a jump, we are concerned about any biv update that may be
6103 executed multiple times. We are actually only concerned about
6104 backward jumps, but it is probably not worth performing the test
6105 on the jump again here.
6106
6107 If this is a biv update, we must adjust the giv status to show that a
6108 subsequent biv update was performed. If this adjustment cannot be done,
6109 the giv cannot derive further givs. */
6110
6111 for (bl = ivs->list; bl; bl = bl->next)
6112 for (biv = bl->biv; biv; biv = biv->next_iv)
6113 if (GET_CODE (p) == CODE_LABEL || GET_CODE (p) == JUMP_INSN
6114 || biv->insn == p)
6115 {
6116 /* Skip if location is the same as a previous one. */
6117 if (biv->same)
6118 continue;
6119
6120 for (giv = bl->giv; giv; giv = giv->next_iv)
6121 {
6122 /* If cant_derive is already true, there is no point in
6123 checking all of these conditions again. */
6124 if (giv->cant_derive)
6125 continue;
6126
6127 /* If this giv is conditionally set and we have passed a label,
6128 it cannot derive anything. */
6129 if (GET_CODE (p) == CODE_LABEL && ! giv->always_computable)
6130 giv->cant_derive = 1;
6131
6132 /* Skip givs that have mult_val == 0, since
6133 they are really invariants. Also skip those that are
6134 replaceable, since we know their lifetime doesn't contain
6135 any biv update. */
6136 else if (giv->mult_val == const0_rtx || giv->replaceable)
6137 continue;
6138
6139 /* The only way we can allow this giv to derive another
6140 is if this is a biv increment and we can form the product
6141 of biv->add_val and giv->mult_val. In this case, we will
6142 be able to compute a compensation. */
6143 else if (biv->insn == p)
6144 {
6145 rtx ext_val_dummy;
6146
6147 tem = 0;
6148 if (biv->mult_val == const1_rtx)
6149 tem = simplify_giv_expr (loop,
6150 gen_rtx_MULT (giv->mode,
6151 biv->add_val,
6152 giv->mult_val),
6153 &ext_val_dummy, &dummy);
6154
6155 if (tem && giv->derive_adjustment)
6156 tem = simplify_giv_expr
6157 (loop,
6158 gen_rtx_PLUS (giv->mode, tem, giv->derive_adjustment),
6159 &ext_val_dummy, &dummy);
6160
6161 if (tem)
6162 giv->derive_adjustment = tem;
6163 else
6164 giv->cant_derive = 1;
6165 }
6166 else if ((GET_CODE (p) == CODE_LABEL && ! biv->always_computable)
6167 || (GET_CODE (p) == JUMP_INSN && biv->maybe_multiple))
6168 giv->cant_derive = 1;
6169 }
6170 }
6171 }
6172 \f
6173 /* Check whether an insn is an increment legitimate for a basic induction var.
6174 X is the source of insn P, or a part of it.
6175 MODE is the mode in which X should be interpreted.
6176
6177 DEST_REG is the putative biv, also the destination of the insn.
6178 We accept patterns of these forms:
6179 REG = REG + INVARIANT (includes REG = REG - CONSTANT)
6180 REG = INVARIANT + REG
6181
6182 If X is suitable, we return 1, set *MULT_VAL to CONST1_RTX,
6183 store the additive term into *INC_VAL, and store the place where
6184 we found the additive term into *LOCATION.
6185
6186 If X is an assignment of an invariant into DEST_REG, we set
6187 *MULT_VAL to CONST0_RTX, and store the invariant into *INC_VAL.
6188
6189 We also want to detect a BIV when it corresponds to a variable
6190 whose mode was promoted. In that case, an increment
6191 of the variable may be a PLUS that adds a SUBREG of that variable to
6192 an invariant and then sign- or zero-extends the result of the PLUS
6193 into the variable.
6194
6195 Most GIVs in such cases will be in the promoted mode, since that is the
6196 probably the natural computation mode (and almost certainly the mode
6197 used for addresses) on the machine. So we view the pseudo-reg containing
6198 the variable as the BIV, as if it were simply incremented.
6199
6200 Note that treating the entire pseudo as a BIV will result in making
6201 simple increments to any GIVs based on it. However, if the variable
6202 overflows in its declared mode but not its promoted mode, the result will
6203 be incorrect. This is acceptable if the variable is signed, since
6204 overflows in such cases are undefined, but not if it is unsigned, since
6205 those overflows are defined. So we only check for SIGN_EXTEND and
6206 not ZERO_EXTEND.
6207
6208 If we cannot find a biv, we return 0. */
6209
6210 static int
6211 basic_induction_var (const struct loop *loop, rtx x, enum machine_mode mode,
6212 rtx dest_reg, rtx p, rtx *inc_val, rtx *mult_val,
6213 rtx **location)
6214 {
6215 enum rtx_code code;
6216 rtx *argp, arg;
6217 rtx insn, set = 0, last, inc;
6218
6219 code = GET_CODE (x);
6220 *location = NULL;
6221 switch (code)
6222 {
6223 case PLUS:
6224 if (rtx_equal_p (XEXP (x, 0), dest_reg)
6225 || (GET_CODE (XEXP (x, 0)) == SUBREG
6226 && SUBREG_PROMOTED_VAR_P (XEXP (x, 0))
6227 && SUBREG_REG (XEXP (x, 0)) == dest_reg))
6228 {
6229 argp = &XEXP (x, 1);
6230 }
6231 else if (rtx_equal_p (XEXP (x, 1), dest_reg)
6232 || (GET_CODE (XEXP (x, 1)) == SUBREG
6233 && SUBREG_PROMOTED_VAR_P (XEXP (x, 1))
6234 && SUBREG_REG (XEXP (x, 1)) == dest_reg))
6235 {
6236 argp = &XEXP (x, 0);
6237 }
6238 else
6239 return 0;
6240
6241 arg = *argp;
6242 if (loop_invariant_p (loop, arg) != 1)
6243 return 0;
6244
6245 /* convert_modes can emit new instructions, e.g. when arg is a loop
6246 invariant MEM and dest_reg has a different mode.
6247 These instructions would be emitted after the end of the function
6248 and then *inc_val would be an uninitialized pseudo.
6249 Detect this and bail in this case.
6250 Other alternatives to solve this can be introducing a convert_modes
6251 variant which is allowed to fail but not allowed to emit new
6252 instructions, emit these instructions before loop start and let
6253 it be garbage collected if *inc_val is never used or saving the
6254 *inc_val initialization sequence generated here and when *inc_val
6255 is going to be actually used, emit it at some suitable place. */
6256 last = get_last_insn ();
6257 inc = convert_modes (GET_MODE (dest_reg), GET_MODE (x), arg, 0);
6258 if (get_last_insn () != last)
6259 {
6260 delete_insns_since (last);
6261 return 0;
6262 }
6263
6264 *inc_val = inc;
6265 *mult_val = const1_rtx;
6266 *location = argp;
6267 return 1;
6268
6269 case SUBREG:
6270 /* If what's inside the SUBREG is a BIV, then the SUBREG. This will
6271 handle addition of promoted variables.
6272 ??? The comment at the start of this function is wrong: promoted
6273 variable increments don't look like it says they do. */
6274 return basic_induction_var (loop, SUBREG_REG (x),
6275 GET_MODE (SUBREG_REG (x)),
6276 dest_reg, p, inc_val, mult_val, location);
6277
6278 case REG:
6279 /* If this register is assigned in a previous insn, look at its
6280 source, but don't go outside the loop or past a label. */
6281
6282 /* If this sets a register to itself, we would repeat any previous
6283 biv increment if we applied this strategy blindly. */
6284 if (rtx_equal_p (dest_reg, x))
6285 return 0;
6286
6287 insn = p;
6288 while (1)
6289 {
6290 rtx dest;
6291 do
6292 {
6293 insn = PREV_INSN (insn);
6294 }
6295 while (insn && GET_CODE (insn) == NOTE
6296 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG);
6297
6298 if (!insn)
6299 break;
6300 set = single_set (insn);
6301 if (set == 0)
6302 break;
6303 dest = SET_DEST (set);
6304 if (dest == x
6305 || (GET_CODE (dest) == SUBREG
6306 && (GET_MODE_SIZE (GET_MODE (dest)) <= UNITS_PER_WORD)
6307 && (GET_MODE_CLASS (GET_MODE (dest)) == MODE_INT)
6308 && SUBREG_REG (dest) == x))
6309 return basic_induction_var (loop, SET_SRC (set),
6310 (GET_MODE (SET_SRC (set)) == VOIDmode
6311 ? GET_MODE (x)
6312 : GET_MODE (SET_SRC (set))),
6313 dest_reg, insn,
6314 inc_val, mult_val, location);
6315
6316 while (GET_CODE (dest) == SIGN_EXTRACT
6317 || GET_CODE (dest) == ZERO_EXTRACT
6318 || GET_CODE (dest) == SUBREG
6319 || GET_CODE (dest) == STRICT_LOW_PART)
6320 dest = XEXP (dest, 0);
6321 if (dest == x)
6322 break;
6323 }
6324 /* Fall through. */
6325
6326 /* Can accept constant setting of biv only when inside inner most loop.
6327 Otherwise, a biv of an inner loop may be incorrectly recognized
6328 as a biv of the outer loop,
6329 causing code to be moved INTO the inner loop. */
6330 case MEM:
6331 if (loop_invariant_p (loop, x) != 1)
6332 return 0;
6333 case CONST_INT:
6334 case SYMBOL_REF:
6335 case CONST:
6336 /* convert_modes aborts if we try to convert to or from CCmode, so just
6337 exclude that case. It is very unlikely that a condition code value
6338 would be a useful iterator anyways. convert_modes aborts if we try to
6339 convert a float mode to non-float or vice versa too. */
6340 if (loop->level == 1
6341 && GET_MODE_CLASS (mode) == GET_MODE_CLASS (GET_MODE (dest_reg))
6342 && GET_MODE_CLASS (mode) != MODE_CC)
6343 {
6344 /* Possible bug here? Perhaps we don't know the mode of X. */
6345 last = get_last_insn ();
6346 inc = convert_modes (GET_MODE (dest_reg), mode, x, 0);
6347 if (get_last_insn () != last)
6348 {
6349 delete_insns_since (last);
6350 return 0;
6351 }
6352
6353 *inc_val = inc;
6354 *mult_val = const0_rtx;
6355 return 1;
6356 }
6357 else
6358 return 0;
6359
6360 case SIGN_EXTEND:
6361 /* Ignore this BIV if signed arithmetic overflow is defined. */
6362 if (flag_wrapv)
6363 return 0;
6364 return basic_induction_var (loop, XEXP (x, 0), GET_MODE (XEXP (x, 0)),
6365 dest_reg, p, inc_val, mult_val, location);
6366
6367 case ASHIFTRT:
6368 /* Similar, since this can be a sign extension. */
6369 for (insn = PREV_INSN (p);
6370 (insn && GET_CODE (insn) == NOTE
6371 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG);
6372 insn = PREV_INSN (insn))
6373 ;
6374
6375 if (insn)
6376 set = single_set (insn);
6377
6378 if (! rtx_equal_p (dest_reg, XEXP (x, 0))
6379 && set && SET_DEST (set) == XEXP (x, 0)
6380 && GET_CODE (XEXP (x, 1)) == CONST_INT
6381 && INTVAL (XEXP (x, 1)) >= 0
6382 && GET_CODE (SET_SRC (set)) == ASHIFT
6383 && XEXP (x, 1) == XEXP (SET_SRC (set), 1))
6384 return basic_induction_var (loop, XEXP (SET_SRC (set), 0),
6385 GET_MODE (XEXP (x, 0)),
6386 dest_reg, insn, inc_val, mult_val,
6387 location);
6388 return 0;
6389
6390 default:
6391 return 0;
6392 }
6393 }
6394 \f
6395 /* A general induction variable (giv) is any quantity that is a linear
6396 function of a basic induction variable,
6397 i.e. giv = biv * mult_val + add_val.
6398 The coefficients can be any loop invariant quantity.
6399 A giv need not be computed directly from the biv;
6400 it can be computed by way of other givs. */
6401
6402 /* Determine whether X computes a giv.
6403 If it does, return a nonzero value
6404 which is the benefit from eliminating the computation of X;
6405 set *SRC_REG to the register of the biv that it is computed from;
6406 set *ADD_VAL and *MULT_VAL to the coefficients,
6407 such that the value of X is biv * mult + add; */
6408
6409 static int
6410 general_induction_var (const struct loop *loop, rtx x, rtx *src_reg,
6411 rtx *add_val, rtx *mult_val, rtx *ext_val,
6412 int is_addr, int *pbenefit,
6413 enum machine_mode addr_mode)
6414 {
6415 struct loop_ivs *ivs = LOOP_IVS (loop);
6416 rtx orig_x = x;
6417
6418 /* If this is an invariant, forget it, it isn't a giv. */
6419 if (loop_invariant_p (loop, x) == 1)
6420 return 0;
6421
6422 *pbenefit = 0;
6423 *ext_val = NULL_RTX;
6424 x = simplify_giv_expr (loop, x, ext_val, pbenefit);
6425 if (x == 0)
6426 return 0;
6427
6428 switch (GET_CODE (x))
6429 {
6430 case USE:
6431 case CONST_INT:
6432 /* Since this is now an invariant and wasn't before, it must be a giv
6433 with MULT_VAL == 0. It doesn't matter which BIV we associate this
6434 with. */
6435 *src_reg = ivs->list->biv->dest_reg;
6436 *mult_val = const0_rtx;
6437 *add_val = x;
6438 break;
6439
6440 case REG:
6441 /* This is equivalent to a BIV. */
6442 *src_reg = x;
6443 *mult_val = const1_rtx;
6444 *add_val = const0_rtx;
6445 break;
6446
6447 case PLUS:
6448 /* Either (plus (biv) (invar)) or
6449 (plus (mult (biv) (invar_1)) (invar_2)). */
6450 if (GET_CODE (XEXP (x, 0)) == MULT)
6451 {
6452 *src_reg = XEXP (XEXP (x, 0), 0);
6453 *mult_val = XEXP (XEXP (x, 0), 1);
6454 }
6455 else
6456 {
6457 *src_reg = XEXP (x, 0);
6458 *mult_val = const1_rtx;
6459 }
6460 *add_val = XEXP (x, 1);
6461 break;
6462
6463 case MULT:
6464 /* ADD_VAL is zero. */
6465 *src_reg = XEXP (x, 0);
6466 *mult_val = XEXP (x, 1);
6467 *add_val = const0_rtx;
6468 break;
6469
6470 default:
6471 abort ();
6472 }
6473
6474 /* Remove any enclosing USE from ADD_VAL and MULT_VAL (there will be
6475 unless they are CONST_INT). */
6476 if (GET_CODE (*add_val) == USE)
6477 *add_val = XEXP (*add_val, 0);
6478 if (GET_CODE (*mult_val) == USE)
6479 *mult_val = XEXP (*mult_val, 0);
6480
6481 if (is_addr)
6482 *pbenefit += address_cost (orig_x, addr_mode) - reg_address_cost;
6483 else
6484 *pbenefit += rtx_cost (orig_x, SET);
6485
6486 /* Always return true if this is a giv so it will be detected as such,
6487 even if the benefit is zero or negative. This allows elimination
6488 of bivs that might otherwise not be eliminated. */
6489 return 1;
6490 }
6491 \f
6492 /* Given an expression, X, try to form it as a linear function of a biv.
6493 We will canonicalize it to be of the form
6494 (plus (mult (BIV) (invar_1))
6495 (invar_2))
6496 with possible degeneracies.
6497
6498 The invariant expressions must each be of a form that can be used as a
6499 machine operand. We surround then with a USE rtx (a hack, but localized
6500 and certainly unambiguous!) if not a CONST_INT for simplicity in this
6501 routine; it is the caller's responsibility to strip them.
6502
6503 If no such canonicalization is possible (i.e., two biv's are used or an
6504 expression that is neither invariant nor a biv or giv), this routine
6505 returns 0.
6506
6507 For a nonzero return, the result will have a code of CONST_INT, USE,
6508 REG (for a BIV), PLUS, or MULT. No other codes will occur.
6509
6510 *BENEFIT will be incremented by the benefit of any sub-giv encountered. */
6511
6512 static rtx sge_plus (enum machine_mode, rtx, rtx);
6513 static rtx sge_plus_constant (rtx, rtx);
6514
6515 static rtx
6516 simplify_giv_expr (const struct loop *loop, rtx x, rtx *ext_val, int *benefit)
6517 {
6518 struct loop_ivs *ivs = LOOP_IVS (loop);
6519 struct loop_regs *regs = LOOP_REGS (loop);
6520 enum machine_mode mode = GET_MODE (x);
6521 rtx arg0, arg1;
6522 rtx tem;
6523
6524 /* If this is not an integer mode, or if we cannot do arithmetic in this
6525 mode, this can't be a giv. */
6526 if (mode != VOIDmode
6527 && (GET_MODE_CLASS (mode) != MODE_INT
6528 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT))
6529 return NULL_RTX;
6530
6531 switch (GET_CODE (x))
6532 {
6533 case PLUS:
6534 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
6535 arg1 = simplify_giv_expr (loop, XEXP (x, 1), ext_val, benefit);
6536 if (arg0 == 0 || arg1 == 0)
6537 return NULL_RTX;
6538
6539 /* Put constant last, CONST_INT last if both constant. */
6540 if ((GET_CODE (arg0) == USE
6541 || GET_CODE (arg0) == CONST_INT)
6542 && ! ((GET_CODE (arg0) == USE
6543 && GET_CODE (arg1) == USE)
6544 || GET_CODE (arg1) == CONST_INT))
6545 tem = arg0, arg0 = arg1, arg1 = tem;
6546
6547 /* Handle addition of zero, then addition of an invariant. */
6548 if (arg1 == const0_rtx)
6549 return arg0;
6550 else if (GET_CODE (arg1) == CONST_INT || GET_CODE (arg1) == USE)
6551 switch (GET_CODE (arg0))
6552 {
6553 case CONST_INT:
6554 case USE:
6555 /* Adding two invariants must result in an invariant, so enclose
6556 addition operation inside a USE and return it. */
6557 if (GET_CODE (arg0) == USE)
6558 arg0 = XEXP (arg0, 0);
6559 if (GET_CODE (arg1) == USE)
6560 arg1 = XEXP (arg1, 0);
6561
6562 if (GET_CODE (arg0) == CONST_INT)
6563 tem = arg0, arg0 = arg1, arg1 = tem;
6564 if (GET_CODE (arg1) == CONST_INT)
6565 tem = sge_plus_constant (arg0, arg1);
6566 else
6567 tem = sge_plus (mode, arg0, arg1);
6568
6569 if (GET_CODE (tem) != CONST_INT)
6570 tem = gen_rtx_USE (mode, tem);
6571 return tem;
6572
6573 case REG:
6574 case MULT:
6575 /* biv + invar or mult + invar. Return sum. */
6576 return gen_rtx_PLUS (mode, arg0, arg1);
6577
6578 case PLUS:
6579 /* (a + invar_1) + invar_2. Associate. */
6580 return
6581 simplify_giv_expr (loop,
6582 gen_rtx_PLUS (mode,
6583 XEXP (arg0, 0),
6584 gen_rtx_PLUS (mode,
6585 XEXP (arg0, 1),
6586 arg1)),
6587 ext_val, benefit);
6588
6589 default:
6590 abort ();
6591 }
6592
6593 /* Each argument must be either REG, PLUS, or MULT. Convert REG to
6594 MULT to reduce cases. */
6595 if (REG_P (arg0))
6596 arg0 = gen_rtx_MULT (mode, arg0, const1_rtx);
6597 if (REG_P (arg1))
6598 arg1 = gen_rtx_MULT (mode, arg1, const1_rtx);
6599
6600 /* Now have PLUS + PLUS, PLUS + MULT, MULT + PLUS, or MULT + MULT.
6601 Put a MULT first, leaving PLUS + PLUS, MULT + PLUS, or MULT + MULT.
6602 Recurse to associate the second PLUS. */
6603 if (GET_CODE (arg1) == MULT)
6604 tem = arg0, arg0 = arg1, arg1 = tem;
6605
6606 if (GET_CODE (arg1) == PLUS)
6607 return
6608 simplify_giv_expr (loop,
6609 gen_rtx_PLUS (mode,
6610 gen_rtx_PLUS (mode, arg0,
6611 XEXP (arg1, 0)),
6612 XEXP (arg1, 1)),
6613 ext_val, benefit);
6614
6615 /* Now must have MULT + MULT. Distribute if same biv, else not giv. */
6616 if (GET_CODE (arg0) != MULT || GET_CODE (arg1) != MULT)
6617 return NULL_RTX;
6618
6619 if (!rtx_equal_p (arg0, arg1))
6620 return NULL_RTX;
6621
6622 return simplify_giv_expr (loop,
6623 gen_rtx_MULT (mode,
6624 XEXP (arg0, 0),
6625 gen_rtx_PLUS (mode,
6626 XEXP (arg0, 1),
6627 XEXP (arg1, 1))),
6628 ext_val, benefit);
6629
6630 case MINUS:
6631 /* Handle "a - b" as "a + b * (-1)". */
6632 return simplify_giv_expr (loop,
6633 gen_rtx_PLUS (mode,
6634 XEXP (x, 0),
6635 gen_rtx_MULT (mode,
6636 XEXP (x, 1),
6637 constm1_rtx)),
6638 ext_val, benefit);
6639
6640 case MULT:
6641 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
6642 arg1 = simplify_giv_expr (loop, XEXP (x, 1), ext_val, benefit);
6643 if (arg0 == 0 || arg1 == 0)
6644 return NULL_RTX;
6645
6646 /* Put constant last, CONST_INT last if both constant. */
6647 if ((GET_CODE (arg0) == USE || GET_CODE (arg0) == CONST_INT)
6648 && GET_CODE (arg1) != CONST_INT)
6649 tem = arg0, arg0 = arg1, arg1 = tem;
6650
6651 /* If second argument is not now constant, not giv. */
6652 if (GET_CODE (arg1) != USE && GET_CODE (arg1) != CONST_INT)
6653 return NULL_RTX;
6654
6655 /* Handle multiply by 0 or 1. */
6656 if (arg1 == const0_rtx)
6657 return const0_rtx;
6658
6659 else if (arg1 == const1_rtx)
6660 return arg0;
6661
6662 switch (GET_CODE (arg0))
6663 {
6664 case REG:
6665 /* biv * invar. Done. */
6666 return gen_rtx_MULT (mode, arg0, arg1);
6667
6668 case CONST_INT:
6669 /* Product of two constants. */
6670 return GEN_INT (INTVAL (arg0) * INTVAL (arg1));
6671
6672 case USE:
6673 /* invar * invar is a giv, but attempt to simplify it somehow. */
6674 if (GET_CODE (arg1) != CONST_INT)
6675 return NULL_RTX;
6676
6677 arg0 = XEXP (arg0, 0);
6678 if (GET_CODE (arg0) == MULT)
6679 {
6680 /* (invar_0 * invar_1) * invar_2. Associate. */
6681 return simplify_giv_expr (loop,
6682 gen_rtx_MULT (mode,
6683 XEXP (arg0, 0),
6684 gen_rtx_MULT (mode,
6685 XEXP (arg0,
6686 1),
6687 arg1)),
6688 ext_val, benefit);
6689 }
6690 /* Propagate the MULT expressions to the innermost nodes. */
6691 else if (GET_CODE (arg0) == PLUS)
6692 {
6693 /* (invar_0 + invar_1) * invar_2. Distribute. */
6694 return simplify_giv_expr (loop,
6695 gen_rtx_PLUS (mode,
6696 gen_rtx_MULT (mode,
6697 XEXP (arg0,
6698 0),
6699 arg1),
6700 gen_rtx_MULT (mode,
6701 XEXP (arg0,
6702 1),
6703 arg1)),
6704 ext_val, benefit);
6705 }
6706 return gen_rtx_USE (mode, gen_rtx_MULT (mode, arg0, arg1));
6707
6708 case MULT:
6709 /* (a * invar_1) * invar_2. Associate. */
6710 return simplify_giv_expr (loop,
6711 gen_rtx_MULT (mode,
6712 XEXP (arg0, 0),
6713 gen_rtx_MULT (mode,
6714 XEXP (arg0, 1),
6715 arg1)),
6716 ext_val, benefit);
6717
6718 case PLUS:
6719 /* (a + invar_1) * invar_2. Distribute. */
6720 return simplify_giv_expr (loop,
6721 gen_rtx_PLUS (mode,
6722 gen_rtx_MULT (mode,
6723 XEXP (arg0, 0),
6724 arg1),
6725 gen_rtx_MULT (mode,
6726 XEXP (arg0, 1),
6727 arg1)),
6728 ext_val, benefit);
6729
6730 default:
6731 abort ();
6732 }
6733
6734 case ASHIFT:
6735 /* Shift by constant is multiply by power of two. */
6736 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6737 return 0;
6738
6739 return
6740 simplify_giv_expr (loop,
6741 gen_rtx_MULT (mode,
6742 XEXP (x, 0),
6743 GEN_INT ((HOST_WIDE_INT) 1
6744 << INTVAL (XEXP (x, 1)))),
6745 ext_val, benefit);
6746
6747 case NEG:
6748 /* "-a" is "a * (-1)" */
6749 return simplify_giv_expr (loop,
6750 gen_rtx_MULT (mode, XEXP (x, 0), constm1_rtx),
6751 ext_val, benefit);
6752
6753 case NOT:
6754 /* "~a" is "-a - 1". Silly, but easy. */
6755 return simplify_giv_expr (loop,
6756 gen_rtx_MINUS (mode,
6757 gen_rtx_NEG (mode, XEXP (x, 0)),
6758 const1_rtx),
6759 ext_val, benefit);
6760
6761 case USE:
6762 /* Already in proper form for invariant. */
6763 return x;
6764
6765 case SIGN_EXTEND:
6766 case ZERO_EXTEND:
6767 case TRUNCATE:
6768 /* Conditionally recognize extensions of simple IVs. After we've
6769 computed loop traversal counts and verified the range of the
6770 source IV, we'll reevaluate this as a GIV. */
6771 if (*ext_val == NULL_RTX)
6772 {
6773 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
6774 if (arg0 && *ext_val == NULL_RTX && REG_P (arg0))
6775 {
6776 *ext_val = gen_rtx_fmt_e (GET_CODE (x), mode, arg0);
6777 return arg0;
6778 }
6779 }
6780 goto do_default;
6781
6782 case REG:
6783 /* If this is a new register, we can't deal with it. */
6784 if (REGNO (x) >= max_reg_before_loop)
6785 return 0;
6786
6787 /* Check for biv or giv. */
6788 switch (REG_IV_TYPE (ivs, REGNO (x)))
6789 {
6790 case BASIC_INDUCT:
6791 return x;
6792 case GENERAL_INDUCT:
6793 {
6794 struct induction *v = REG_IV_INFO (ivs, REGNO (x));
6795
6796 /* Form expression from giv and add benefit. Ensure this giv
6797 can derive another and subtract any needed adjustment if so. */
6798
6799 /* Increasing the benefit here is risky. The only case in which it
6800 is arguably correct is if this is the only use of V. In other
6801 cases, this will artificially inflate the benefit of the current
6802 giv, and lead to suboptimal code. Thus, it is disabled, since
6803 potentially not reducing an only marginally beneficial giv is
6804 less harmful than reducing many givs that are not really
6805 beneficial. */
6806 {
6807 rtx single_use = regs->array[REGNO (x)].single_usage;
6808 if (single_use && single_use != const0_rtx)
6809 *benefit += v->benefit;
6810 }
6811
6812 if (v->cant_derive)
6813 return 0;
6814
6815 tem = gen_rtx_PLUS (mode, gen_rtx_MULT (mode,
6816 v->src_reg, v->mult_val),
6817 v->add_val);
6818
6819 if (v->derive_adjustment)
6820 tem = gen_rtx_MINUS (mode, tem, v->derive_adjustment);
6821 arg0 = simplify_giv_expr (loop, tem, ext_val, benefit);
6822 if (*ext_val)
6823 {
6824 if (!v->ext_dependent)
6825 return arg0;
6826 }
6827 else
6828 {
6829 *ext_val = v->ext_dependent;
6830 return arg0;
6831 }
6832 return 0;
6833 }
6834
6835 default:
6836 do_default:
6837 /* If it isn't an induction variable, and it is invariant, we
6838 may be able to simplify things further by looking through
6839 the bits we just moved outside the loop. */
6840 if (loop_invariant_p (loop, x) == 1)
6841 {
6842 struct movable *m;
6843 struct loop_movables *movables = LOOP_MOVABLES (loop);
6844
6845 for (m = movables->head; m; m = m->next)
6846 if (rtx_equal_p (x, m->set_dest))
6847 {
6848 /* Ok, we found a match. Substitute and simplify. */
6849
6850 /* If we match another movable, we must use that, as
6851 this one is going away. */
6852 if (m->match)
6853 return simplify_giv_expr (loop, m->match->set_dest,
6854 ext_val, benefit);
6855
6856 /* If consec is nonzero, this is a member of a group of
6857 instructions that were moved together. We handle this
6858 case only to the point of seeking to the last insn and
6859 looking for a REG_EQUAL. Fail if we don't find one. */
6860 if (m->consec != 0)
6861 {
6862 int i = m->consec;
6863 tem = m->insn;
6864 do
6865 {
6866 tem = NEXT_INSN (tem);
6867 }
6868 while (--i > 0);
6869
6870 tem = find_reg_note (tem, REG_EQUAL, NULL_RTX);
6871 if (tem)
6872 tem = XEXP (tem, 0);
6873 }
6874 else
6875 {
6876 tem = single_set (m->insn);
6877 if (tem)
6878 tem = SET_SRC (tem);
6879 }
6880
6881 if (tem)
6882 {
6883 /* What we are most interested in is pointer
6884 arithmetic on invariants -- only take
6885 patterns we may be able to do something with. */
6886 if (GET_CODE (tem) == PLUS
6887 || GET_CODE (tem) == MULT
6888 || GET_CODE (tem) == ASHIFT
6889 || GET_CODE (tem) == CONST_INT
6890 || GET_CODE (tem) == SYMBOL_REF)
6891 {
6892 tem = simplify_giv_expr (loop, tem, ext_val,
6893 benefit);
6894 if (tem)
6895 return tem;
6896 }
6897 else if (GET_CODE (tem) == CONST
6898 && GET_CODE (XEXP (tem, 0)) == PLUS
6899 && GET_CODE (XEXP (XEXP (tem, 0), 0)) == SYMBOL_REF
6900 && GET_CODE (XEXP (XEXP (tem, 0), 1)) == CONST_INT)
6901 {
6902 tem = simplify_giv_expr (loop, XEXP (tem, 0),
6903 ext_val, benefit);
6904 if (tem)
6905 return tem;
6906 }
6907 }
6908 break;
6909 }
6910 }
6911 break;
6912 }
6913
6914 /* Fall through to general case. */
6915 default:
6916 /* If invariant, return as USE (unless CONST_INT).
6917 Otherwise, not giv. */
6918 if (GET_CODE (x) == USE)
6919 x = XEXP (x, 0);
6920
6921 if (loop_invariant_p (loop, x) == 1)
6922 {
6923 if (GET_CODE (x) == CONST_INT)
6924 return x;
6925 if (GET_CODE (x) == CONST
6926 && GET_CODE (XEXP (x, 0)) == PLUS
6927 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6928 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
6929 x = XEXP (x, 0);
6930 return gen_rtx_USE (mode, x);
6931 }
6932 else
6933 return 0;
6934 }
6935 }
6936
6937 /* This routine folds invariants such that there is only ever one
6938 CONST_INT in the summation. It is only used by simplify_giv_expr. */
6939
6940 static rtx
6941 sge_plus_constant (rtx x, rtx c)
6942 {
6943 if (GET_CODE (x) == CONST_INT)
6944 return GEN_INT (INTVAL (x) + INTVAL (c));
6945 else if (GET_CODE (x) != PLUS)
6946 return gen_rtx_PLUS (GET_MODE (x), x, c);
6947 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6948 {
6949 return gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
6950 GEN_INT (INTVAL (XEXP (x, 1)) + INTVAL (c)));
6951 }
6952 else if (GET_CODE (XEXP (x, 0)) == PLUS
6953 || GET_CODE (XEXP (x, 1)) != PLUS)
6954 {
6955 return gen_rtx_PLUS (GET_MODE (x),
6956 sge_plus_constant (XEXP (x, 0), c), XEXP (x, 1));
6957 }
6958 else
6959 {
6960 return gen_rtx_PLUS (GET_MODE (x),
6961 sge_plus_constant (XEXP (x, 1), c), XEXP (x, 0));
6962 }
6963 }
6964
6965 static rtx
6966 sge_plus (enum machine_mode mode, rtx x, rtx y)
6967 {
6968 while (GET_CODE (y) == PLUS)
6969 {
6970 rtx a = XEXP (y, 0);
6971 if (GET_CODE (a) == CONST_INT)
6972 x = sge_plus_constant (x, a);
6973 else
6974 x = gen_rtx_PLUS (mode, x, a);
6975 y = XEXP (y, 1);
6976 }
6977 if (GET_CODE (y) == CONST_INT)
6978 x = sge_plus_constant (x, y);
6979 else
6980 x = gen_rtx_PLUS (mode, x, y);
6981 return x;
6982 }
6983 \f
6984 /* Help detect a giv that is calculated by several consecutive insns;
6985 for example,
6986 giv = biv * M
6987 giv = giv + A
6988 The caller has already identified the first insn P as having a giv as dest;
6989 we check that all other insns that set the same register follow
6990 immediately after P, that they alter nothing else,
6991 and that the result of the last is still a giv.
6992
6993 The value is 0 if the reg set in P is not really a giv.
6994 Otherwise, the value is the amount gained by eliminating
6995 all the consecutive insns that compute the value.
6996
6997 FIRST_BENEFIT is the amount gained by eliminating the first insn, P.
6998 SRC_REG is the reg of the biv; DEST_REG is the reg of the giv.
6999
7000 The coefficients of the ultimate giv value are stored in
7001 *MULT_VAL and *ADD_VAL. */
7002
7003 static int
7004 consec_sets_giv (const struct loop *loop, int first_benefit, rtx p,
7005 rtx src_reg, rtx dest_reg, rtx *add_val, rtx *mult_val,
7006 rtx *ext_val, rtx *last_consec_insn)
7007 {
7008 struct loop_ivs *ivs = LOOP_IVS (loop);
7009 struct loop_regs *regs = LOOP_REGS (loop);
7010 int count;
7011 enum rtx_code code;
7012 int benefit;
7013 rtx temp;
7014 rtx set;
7015
7016 /* Indicate that this is a giv so that we can update the value produced in
7017 each insn of the multi-insn sequence.
7018
7019 This induction structure will be used only by the call to
7020 general_induction_var below, so we can allocate it on our stack.
7021 If this is a giv, our caller will replace the induct var entry with
7022 a new induction structure. */
7023 struct induction *v;
7024
7025 if (REG_IV_TYPE (ivs, REGNO (dest_reg)) != UNKNOWN_INDUCT)
7026 return 0;
7027
7028 v = alloca (sizeof (struct induction));
7029 v->src_reg = src_reg;
7030 v->mult_val = *mult_val;
7031 v->add_val = *add_val;
7032 v->benefit = first_benefit;
7033 v->cant_derive = 0;
7034 v->derive_adjustment = 0;
7035 v->ext_dependent = NULL_RTX;
7036
7037 REG_IV_TYPE (ivs, REGNO (dest_reg)) = GENERAL_INDUCT;
7038 REG_IV_INFO (ivs, REGNO (dest_reg)) = v;
7039
7040 count = regs->array[REGNO (dest_reg)].n_times_set - 1;
7041
7042 while (count > 0)
7043 {
7044 p = NEXT_INSN (p);
7045 code = GET_CODE (p);
7046
7047 /* If libcall, skip to end of call sequence. */
7048 if (code == INSN && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
7049 p = XEXP (temp, 0);
7050
7051 if (code == INSN
7052 && (set = single_set (p))
7053 && REG_P (SET_DEST (set))
7054 && SET_DEST (set) == dest_reg
7055 && (general_induction_var (loop, SET_SRC (set), &src_reg,
7056 add_val, mult_val, ext_val, 0,
7057 &benefit, VOIDmode)
7058 /* Giv created by equivalent expression. */
7059 || ((temp = find_reg_note (p, REG_EQUAL, NULL_RTX))
7060 && general_induction_var (loop, XEXP (temp, 0), &src_reg,
7061 add_val, mult_val, ext_val, 0,
7062 &benefit, VOIDmode)))
7063 && src_reg == v->src_reg)
7064 {
7065 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
7066 benefit += libcall_benefit (p);
7067
7068 count--;
7069 v->mult_val = *mult_val;
7070 v->add_val = *add_val;
7071 v->benefit += benefit;
7072 }
7073 else if (code != NOTE)
7074 {
7075 /* Allow insns that set something other than this giv to a
7076 constant. Such insns are needed on machines which cannot
7077 include long constants and should not disqualify a giv. */
7078 if (code == INSN
7079 && (set = single_set (p))
7080 && SET_DEST (set) != dest_reg
7081 && CONSTANT_P (SET_SRC (set)))
7082 continue;
7083
7084 REG_IV_TYPE (ivs, REGNO (dest_reg)) = UNKNOWN_INDUCT;
7085 return 0;
7086 }
7087 }
7088
7089 REG_IV_TYPE (ivs, REGNO (dest_reg)) = UNKNOWN_INDUCT;
7090 *last_consec_insn = p;
7091 return v->benefit;
7092 }
7093 \f
7094 /* Return an rtx, if any, that expresses giv G2 as a function of the register
7095 represented by G1. If no such expression can be found, or it is clear that
7096 it cannot possibly be a valid address, 0 is returned.
7097
7098 To perform the computation, we note that
7099 G1 = x * v + a and
7100 G2 = y * v + b
7101 where `v' is the biv.
7102
7103 So G2 = (y/b) * G1 + (b - a*y/x).
7104
7105 Note that MULT = y/x.
7106
7107 Update: A and B are now allowed to be additive expressions such that
7108 B contains all variables in A. That is, computing B-A will not require
7109 subtracting variables. */
7110
7111 static rtx
7112 express_from_1 (rtx a, rtx b, rtx mult)
7113 {
7114 /* If MULT is zero, then A*MULT is zero, and our expression is B. */
7115
7116 if (mult == const0_rtx)
7117 return b;
7118
7119 /* If MULT is not 1, we cannot handle A with non-constants, since we
7120 would then be required to subtract multiples of the registers in A.
7121 This is theoretically possible, and may even apply to some Fortran
7122 constructs, but it is a lot of work and we do not attempt it here. */
7123
7124 if (mult != const1_rtx && GET_CODE (a) != CONST_INT)
7125 return NULL_RTX;
7126
7127 /* In general these structures are sorted top to bottom (down the PLUS
7128 chain), but not left to right across the PLUS. If B is a higher
7129 order giv than A, we can strip one level and recurse. If A is higher
7130 order, we'll eventually bail out, but won't know that until the end.
7131 If they are the same, we'll strip one level around this loop. */
7132
7133 while (GET_CODE (a) == PLUS && GET_CODE (b) == PLUS)
7134 {
7135 rtx ra, rb, oa, ob, tmp;
7136
7137 ra = XEXP (a, 0), oa = XEXP (a, 1);
7138 if (GET_CODE (ra) == PLUS)
7139 tmp = ra, ra = oa, oa = tmp;
7140
7141 rb = XEXP (b, 0), ob = XEXP (b, 1);
7142 if (GET_CODE (rb) == PLUS)
7143 tmp = rb, rb = ob, ob = tmp;
7144
7145 if (rtx_equal_p (ra, rb))
7146 /* We matched: remove one reg completely. */
7147 a = oa, b = ob;
7148 else if (GET_CODE (ob) != PLUS && rtx_equal_p (ra, ob))
7149 /* An alternate match. */
7150 a = oa, b = rb;
7151 else if (GET_CODE (oa) != PLUS && rtx_equal_p (oa, rb))
7152 /* An alternate match. */
7153 a = ra, b = ob;
7154 else
7155 {
7156 /* Indicates an extra register in B. Strip one level from B and
7157 recurse, hoping B was the higher order expression. */
7158 ob = express_from_1 (a, ob, mult);
7159 if (ob == NULL_RTX)
7160 return NULL_RTX;
7161 return gen_rtx_PLUS (GET_MODE (b), rb, ob);
7162 }
7163 }
7164
7165 /* Here we are at the last level of A, go through the cases hoping to
7166 get rid of everything but a constant. */
7167
7168 if (GET_CODE (a) == PLUS)
7169 {
7170 rtx ra, oa;
7171
7172 ra = XEXP (a, 0), oa = XEXP (a, 1);
7173 if (rtx_equal_p (oa, b))
7174 oa = ra;
7175 else if (!rtx_equal_p (ra, b))
7176 return NULL_RTX;
7177
7178 if (GET_CODE (oa) != CONST_INT)
7179 return NULL_RTX;
7180
7181 return GEN_INT (-INTVAL (oa) * INTVAL (mult));
7182 }
7183 else if (GET_CODE (a) == CONST_INT)
7184 {
7185 return plus_constant (b, -INTVAL (a) * INTVAL (mult));
7186 }
7187 else if (CONSTANT_P (a))
7188 {
7189 enum machine_mode mode_a = GET_MODE (a);
7190 enum machine_mode mode_b = GET_MODE (b);
7191 enum machine_mode mode = mode_b == VOIDmode ? mode_a : mode_b;
7192 return simplify_gen_binary (MINUS, mode, b, a);
7193 }
7194 else if (GET_CODE (b) == PLUS)
7195 {
7196 if (rtx_equal_p (a, XEXP (b, 0)))
7197 return XEXP (b, 1);
7198 else if (rtx_equal_p (a, XEXP (b, 1)))
7199 return XEXP (b, 0);
7200 else
7201 return NULL_RTX;
7202 }
7203 else if (rtx_equal_p (a, b))
7204 return const0_rtx;
7205
7206 return NULL_RTX;
7207 }
7208
7209 rtx
7210 express_from (struct induction *g1, struct induction *g2)
7211 {
7212 rtx mult, add;
7213
7214 /* The value that G1 will be multiplied by must be a constant integer. Also,
7215 the only chance we have of getting a valid address is if b*c/a (see above
7216 for notation) is also an integer. */
7217 if (GET_CODE (g1->mult_val) == CONST_INT
7218 && GET_CODE (g2->mult_val) == CONST_INT)
7219 {
7220 if (g1->mult_val == const0_rtx
7221 || (g1->mult_val == constm1_rtx
7222 && INTVAL (g2->mult_val)
7223 == (HOST_WIDE_INT) 1 << (HOST_BITS_PER_WIDE_INT - 1))
7224 || INTVAL (g2->mult_val) % INTVAL (g1->mult_val) != 0)
7225 return NULL_RTX;
7226 mult = GEN_INT (INTVAL (g2->mult_val) / INTVAL (g1->mult_val));
7227 }
7228 else if (rtx_equal_p (g1->mult_val, g2->mult_val))
7229 mult = const1_rtx;
7230 else
7231 {
7232 /* ??? Find out if the one is a multiple of the other? */
7233 return NULL_RTX;
7234 }
7235
7236 add = express_from_1 (g1->add_val, g2->add_val, mult);
7237 if (add == NULL_RTX)
7238 {
7239 /* Failed. If we've got a multiplication factor between G1 and G2,
7240 scale G1's addend and try again. */
7241 if (INTVAL (mult) > 1)
7242 {
7243 rtx g1_add_val = g1->add_val;
7244 if (GET_CODE (g1_add_val) == MULT
7245 && GET_CODE (XEXP (g1_add_val, 1)) == CONST_INT)
7246 {
7247 HOST_WIDE_INT m;
7248 m = INTVAL (mult) * INTVAL (XEXP (g1_add_val, 1));
7249 g1_add_val = gen_rtx_MULT (GET_MODE (g1_add_val),
7250 XEXP (g1_add_val, 0), GEN_INT (m));
7251 }
7252 else
7253 {
7254 g1_add_val = gen_rtx_MULT (GET_MODE (g1_add_val), g1_add_val,
7255 mult);
7256 }
7257
7258 add = express_from_1 (g1_add_val, g2->add_val, const1_rtx);
7259 }
7260 }
7261 if (add == NULL_RTX)
7262 return NULL_RTX;
7263
7264 /* Form simplified final result. */
7265 if (mult == const0_rtx)
7266 return add;
7267 else if (mult == const1_rtx)
7268 mult = g1->dest_reg;
7269 else
7270 mult = gen_rtx_MULT (g2->mode, g1->dest_reg, mult);
7271
7272 if (add == const0_rtx)
7273 return mult;
7274 else
7275 {
7276 if (GET_CODE (add) == PLUS
7277 && CONSTANT_P (XEXP (add, 1)))
7278 {
7279 rtx tem = XEXP (add, 1);
7280 mult = gen_rtx_PLUS (g2->mode, mult, XEXP (add, 0));
7281 add = tem;
7282 }
7283
7284 return gen_rtx_PLUS (g2->mode, mult, add);
7285 }
7286 }
7287 \f
7288 /* Return an rtx, if any, that expresses giv G2 as a function of the register
7289 represented by G1. This indicates that G2 should be combined with G1 and
7290 that G2 can use (either directly or via an address expression) a register
7291 used to represent G1. */
7292
7293 static rtx
7294 combine_givs_p (struct induction *g1, struct induction *g2)
7295 {
7296 rtx comb, ret;
7297
7298 /* With the introduction of ext dependent givs, we must care for modes.
7299 G2 must not use a wider mode than G1. */
7300 if (GET_MODE_SIZE (g1->mode) < GET_MODE_SIZE (g2->mode))
7301 return NULL_RTX;
7302
7303 ret = comb = express_from (g1, g2);
7304 if (comb == NULL_RTX)
7305 return NULL_RTX;
7306 if (g1->mode != g2->mode)
7307 ret = gen_lowpart (g2->mode, comb);
7308
7309 /* If these givs are identical, they can be combined. We use the results
7310 of express_from because the addends are not in a canonical form, so
7311 rtx_equal_p is a weaker test. */
7312 /* But don't combine a DEST_REG giv with a DEST_ADDR giv; we want the
7313 combination to be the other way round. */
7314 if (comb == g1->dest_reg
7315 && (g1->giv_type == DEST_REG || g2->giv_type == DEST_ADDR))
7316 {
7317 return ret;
7318 }
7319
7320 /* If G2 can be expressed as a function of G1 and that function is valid
7321 as an address and no more expensive than using a register for G2,
7322 the expression of G2 in terms of G1 can be used. */
7323 if (ret != NULL_RTX
7324 && g2->giv_type == DEST_ADDR
7325 && memory_address_p (GET_MODE (g2->mem), ret))
7326 return ret;
7327
7328 return NULL_RTX;
7329 }
7330 \f
7331 /* Check each extension dependent giv in this class to see if its
7332 root biv is safe from wrapping in the interior mode, which would
7333 make the giv illegal. */
7334
7335 static void
7336 check_ext_dependent_givs (const struct loop *loop, struct iv_class *bl)
7337 {
7338 struct loop_info *loop_info = LOOP_INFO (loop);
7339 int ze_ok = 0, se_ok = 0, info_ok = 0;
7340 enum machine_mode biv_mode = GET_MODE (bl->biv->src_reg);
7341 HOST_WIDE_INT start_val;
7342 unsigned HOST_WIDE_INT u_end_val = 0;
7343 unsigned HOST_WIDE_INT u_start_val = 0;
7344 rtx incr = pc_rtx;
7345 struct induction *v;
7346
7347 /* Make sure the iteration data is available. We must have
7348 constants in order to be certain of no overflow. */
7349 if (loop_info->n_iterations > 0
7350 && bl->initial_value
7351 && GET_CODE (bl->initial_value) == CONST_INT
7352 && (incr = biv_total_increment (bl))
7353 && GET_CODE (incr) == CONST_INT
7354 /* Make sure the host can represent the arithmetic. */
7355 && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (biv_mode))
7356 {
7357 unsigned HOST_WIDE_INT abs_incr, total_incr;
7358 HOST_WIDE_INT s_end_val;
7359 int neg_incr;
7360
7361 info_ok = 1;
7362 start_val = INTVAL (bl->initial_value);
7363 u_start_val = start_val;
7364
7365 neg_incr = 0, abs_incr = INTVAL (incr);
7366 if (INTVAL (incr) < 0)
7367 neg_incr = 1, abs_incr = -abs_incr;
7368 total_incr = abs_incr * loop_info->n_iterations;
7369
7370 /* Check for host arithmetic overflow. */
7371 if (total_incr / loop_info->n_iterations == abs_incr)
7372 {
7373 unsigned HOST_WIDE_INT u_max;
7374 HOST_WIDE_INT s_max;
7375
7376 u_end_val = start_val + (neg_incr ? -total_incr : total_incr);
7377 s_end_val = u_end_val;
7378 u_max = GET_MODE_MASK (biv_mode);
7379 s_max = u_max >> 1;
7380
7381 /* Check zero extension of biv ok. */
7382 if (start_val >= 0
7383 /* Check for host arithmetic overflow. */
7384 && (neg_incr
7385 ? u_end_val < u_start_val
7386 : u_end_val > u_start_val)
7387 /* Check for target arithmetic overflow. */
7388 && (neg_incr
7389 ? 1 /* taken care of with host overflow */
7390 : u_end_val <= u_max))
7391 {
7392 ze_ok = 1;
7393 }
7394
7395 /* Check sign extension of biv ok. */
7396 /* ??? While it is true that overflow with signed and pointer
7397 arithmetic is undefined, I fear too many programmers don't
7398 keep this fact in mind -- myself included on occasion.
7399 So leave alone with the signed overflow optimizations. */
7400 if (start_val >= -s_max - 1
7401 /* Check for host arithmetic overflow. */
7402 && (neg_incr
7403 ? s_end_val < start_val
7404 : s_end_val > start_val)
7405 /* Check for target arithmetic overflow. */
7406 && (neg_incr
7407 ? s_end_val >= -s_max - 1
7408 : s_end_val <= s_max))
7409 {
7410 se_ok = 1;
7411 }
7412 }
7413 }
7414
7415 /* If we know the BIV is compared at run-time against an
7416 invariant value, and the increment is +/- 1, we may also
7417 be able to prove that the BIV cannot overflow. */
7418 else if (bl->biv->src_reg == loop_info->iteration_var
7419 && loop_info->comparison_value
7420 && loop_invariant_p (loop, loop_info->comparison_value)
7421 && (incr = biv_total_increment (bl))
7422 && GET_CODE (incr) == CONST_INT)
7423 {
7424 /* If the increment is +1, and the exit test is a <,
7425 the BIV cannot overflow. (For <=, we have the
7426 problematic case that the comparison value might
7427 be the maximum value of the range.) */
7428 if (INTVAL (incr) == 1)
7429 {
7430 if (loop_info->comparison_code == LT)
7431 se_ok = ze_ok = 1;
7432 else if (loop_info->comparison_code == LTU)
7433 ze_ok = 1;
7434 }
7435
7436 /* Likewise for increment -1 and exit test >. */
7437 if (INTVAL (incr) == -1)
7438 {
7439 if (loop_info->comparison_code == GT)
7440 se_ok = ze_ok = 1;
7441 else if (loop_info->comparison_code == GTU)
7442 ze_ok = 1;
7443 }
7444 }
7445
7446 /* Invalidate givs that fail the tests. */
7447 for (v = bl->giv; v; v = v->next_iv)
7448 if (v->ext_dependent)
7449 {
7450 enum rtx_code code = GET_CODE (v->ext_dependent);
7451 int ok = 0;
7452
7453 switch (code)
7454 {
7455 case SIGN_EXTEND:
7456 ok = se_ok;
7457 break;
7458 case ZERO_EXTEND:
7459 ok = ze_ok;
7460 break;
7461
7462 case TRUNCATE:
7463 /* We don't know whether this value is being used as either
7464 signed or unsigned, so to safely truncate we must satisfy
7465 both. The initial check here verifies the BIV itself;
7466 once that is successful we may check its range wrt the
7467 derived GIV. This works only if we were able to determine
7468 constant start and end values above. */
7469 if (se_ok && ze_ok && info_ok)
7470 {
7471 enum machine_mode outer_mode = GET_MODE (v->ext_dependent);
7472 unsigned HOST_WIDE_INT max = GET_MODE_MASK (outer_mode) >> 1;
7473
7474 /* We know from the above that both endpoints are nonnegative,
7475 and that there is no wrapping. Verify that both endpoints
7476 are within the (signed) range of the outer mode. */
7477 if (u_start_val <= max && u_end_val <= max)
7478 ok = 1;
7479 }
7480 break;
7481
7482 default:
7483 abort ();
7484 }
7485
7486 if (ok)
7487 {
7488 if (loop_dump_stream)
7489 {
7490 fprintf (loop_dump_stream,
7491 "Verified ext dependent giv at %d of reg %d\n",
7492 INSN_UID (v->insn), bl->regno);
7493 }
7494 }
7495 else
7496 {
7497 if (loop_dump_stream)
7498 {
7499 const char *why;
7500
7501 if (info_ok)
7502 why = "biv iteration values overflowed";
7503 else
7504 {
7505 if (incr == pc_rtx)
7506 incr = biv_total_increment (bl);
7507 if (incr == const1_rtx)
7508 why = "biv iteration info incomplete; incr by 1";
7509 else
7510 why = "biv iteration info incomplete";
7511 }
7512
7513 fprintf (loop_dump_stream,
7514 "Failed ext dependent giv at %d, %s\n",
7515 INSN_UID (v->insn), why);
7516 }
7517 v->ignore = 1;
7518 bl->all_reduced = 0;
7519 }
7520 }
7521 }
7522
7523 /* Generate a version of VALUE in a mode appropriate for initializing V. */
7524
7525 rtx
7526 extend_value_for_giv (struct induction *v, rtx value)
7527 {
7528 rtx ext_dep = v->ext_dependent;
7529
7530 if (! ext_dep)
7531 return value;
7532
7533 /* Recall that check_ext_dependent_givs verified that the known bounds
7534 of a biv did not overflow or wrap with respect to the extension for
7535 the giv. Therefore, constants need no additional adjustment. */
7536 if (CONSTANT_P (value) && GET_MODE (value) == VOIDmode)
7537 return value;
7538
7539 /* Otherwise, we must adjust the value to compensate for the
7540 differing modes of the biv and the giv. */
7541 return gen_rtx_fmt_e (GET_CODE (ext_dep), GET_MODE (ext_dep), value);
7542 }
7543 \f
7544 struct combine_givs_stats
7545 {
7546 int giv_number;
7547 int total_benefit;
7548 };
7549
7550 static int
7551 cmp_combine_givs_stats (const void *xp, const void *yp)
7552 {
7553 const struct combine_givs_stats * const x =
7554 (const struct combine_givs_stats *) xp;
7555 const struct combine_givs_stats * const y =
7556 (const struct combine_givs_stats *) yp;
7557 int d;
7558 d = y->total_benefit - x->total_benefit;
7559 /* Stabilize the sort. */
7560 if (!d)
7561 d = x->giv_number - y->giv_number;
7562 return d;
7563 }
7564
7565 /* Check all pairs of givs for iv_class BL and see if any can be combined with
7566 any other. If so, point SAME to the giv combined with and set NEW_REG to
7567 be an expression (in terms of the other giv's DEST_REG) equivalent to the
7568 giv. Also, update BENEFIT and related fields for cost/benefit analysis. */
7569
7570 static void
7571 combine_givs (struct loop_regs *regs, struct iv_class *bl)
7572 {
7573 /* Additional benefit to add for being combined multiple times. */
7574 const int extra_benefit = 3;
7575
7576 struct induction *g1, *g2, **giv_array;
7577 int i, j, k, giv_count;
7578 struct combine_givs_stats *stats;
7579 rtx *can_combine;
7580
7581 /* Count givs, because bl->giv_count is incorrect here. */
7582 giv_count = 0;
7583 for (g1 = bl->giv; g1; g1 = g1->next_iv)
7584 if (!g1->ignore)
7585 giv_count++;
7586
7587 giv_array = alloca (giv_count * sizeof (struct induction *));
7588 i = 0;
7589 for (g1 = bl->giv; g1; g1 = g1->next_iv)
7590 if (!g1->ignore)
7591 giv_array[i++] = g1;
7592
7593 stats = xcalloc (giv_count, sizeof (*stats));
7594 can_combine = xcalloc (giv_count, giv_count * sizeof (rtx));
7595
7596 for (i = 0; i < giv_count; i++)
7597 {
7598 int this_benefit;
7599 rtx single_use;
7600
7601 g1 = giv_array[i];
7602 stats[i].giv_number = i;
7603
7604 /* If a DEST_REG GIV is used only once, do not allow it to combine
7605 with anything, for in doing so we will gain nothing that cannot
7606 be had by simply letting the GIV with which we would have combined
7607 to be reduced on its own. The losage shows up in particular with
7608 DEST_ADDR targets on hosts with reg+reg addressing, though it can
7609 be seen elsewhere as well. */
7610 if (g1->giv_type == DEST_REG
7611 && (single_use = regs->array[REGNO (g1->dest_reg)].single_usage)
7612 && single_use != const0_rtx)
7613 continue;
7614
7615 this_benefit = g1->benefit;
7616 /* Add an additional weight for zero addends. */
7617 if (g1->no_const_addval)
7618 this_benefit += 1;
7619
7620 for (j = 0; j < giv_count; j++)
7621 {
7622 rtx this_combine;
7623
7624 g2 = giv_array[j];
7625 if (g1 != g2
7626 && (this_combine = combine_givs_p (g1, g2)) != NULL_RTX)
7627 {
7628 can_combine[i * giv_count + j] = this_combine;
7629 this_benefit += g2->benefit + extra_benefit;
7630 }
7631 }
7632 stats[i].total_benefit = this_benefit;
7633 }
7634
7635 /* Iterate, combining until we can't. */
7636 restart:
7637 qsort (stats, giv_count, sizeof (*stats), cmp_combine_givs_stats);
7638
7639 if (loop_dump_stream)
7640 {
7641 fprintf (loop_dump_stream, "Sorted combine statistics:\n");
7642 for (k = 0; k < giv_count; k++)
7643 {
7644 g1 = giv_array[stats[k].giv_number];
7645 if (!g1->combined_with && !g1->same)
7646 fprintf (loop_dump_stream, " {%d, %d}",
7647 INSN_UID (giv_array[stats[k].giv_number]->insn),
7648 stats[k].total_benefit);
7649 }
7650 putc ('\n', loop_dump_stream);
7651 }
7652
7653 for (k = 0; k < giv_count; k++)
7654 {
7655 int g1_add_benefit = 0;
7656
7657 i = stats[k].giv_number;
7658 g1 = giv_array[i];
7659
7660 /* If it has already been combined, skip. */
7661 if (g1->combined_with || g1->same)
7662 continue;
7663
7664 for (j = 0; j < giv_count; j++)
7665 {
7666 g2 = giv_array[j];
7667 if (g1 != g2 && can_combine[i * giv_count + j]
7668 /* If it has already been combined, skip. */
7669 && ! g2->same && ! g2->combined_with)
7670 {
7671 int l;
7672
7673 g2->new_reg = can_combine[i * giv_count + j];
7674 g2->same = g1;
7675 /* For destination, we now may replace by mem expression instead
7676 of register. This changes the costs considerably, so add the
7677 compensation. */
7678 if (g2->giv_type == DEST_ADDR)
7679 g2->benefit = (g2->benefit + reg_address_cost
7680 - address_cost (g2->new_reg,
7681 GET_MODE (g2->mem)));
7682 g1->combined_with++;
7683 g1->lifetime += g2->lifetime;
7684
7685 g1_add_benefit += g2->benefit;
7686
7687 /* ??? The new final_[bg]iv_value code does a much better job
7688 of finding replaceable giv's, and hence this code may no
7689 longer be necessary. */
7690 if (! g2->replaceable && REG_USERVAR_P (g2->dest_reg))
7691 g1_add_benefit -= copy_cost;
7692
7693 /* To help optimize the next set of combinations, remove
7694 this giv from the benefits of other potential mates. */
7695 for (l = 0; l < giv_count; ++l)
7696 {
7697 int m = stats[l].giv_number;
7698 if (can_combine[m * giv_count + j])
7699 stats[l].total_benefit -= g2->benefit + extra_benefit;
7700 }
7701
7702 if (loop_dump_stream)
7703 fprintf (loop_dump_stream,
7704 "giv at %d combined with giv at %d; new benefit %d + %d, lifetime %d\n",
7705 INSN_UID (g2->insn), INSN_UID (g1->insn),
7706 g1->benefit, g1_add_benefit, g1->lifetime);
7707 }
7708 }
7709
7710 /* To help optimize the next set of combinations, remove
7711 this giv from the benefits of other potential mates. */
7712 if (g1->combined_with)
7713 {
7714 for (j = 0; j < giv_count; ++j)
7715 {
7716 int m = stats[j].giv_number;
7717 if (can_combine[m * giv_count + i])
7718 stats[j].total_benefit -= g1->benefit + extra_benefit;
7719 }
7720
7721 g1->benefit += g1_add_benefit;
7722
7723 /* We've finished with this giv, and everything it touched.
7724 Restart the combination so that proper weights for the
7725 rest of the givs are properly taken into account. */
7726 /* ??? Ideally we would compact the arrays at this point, so
7727 as to not cover old ground. But sanely compacting
7728 can_combine is tricky. */
7729 goto restart;
7730 }
7731 }
7732
7733 /* Clean up. */
7734 free (stats);
7735 free (can_combine);
7736 }
7737 \f
7738 /* Generate sequence for REG = B * M + A. B is the initial value of
7739 the basic induction variable, M a multiplicative constant, A an
7740 additive constant and REG the destination register. */
7741
7742 static rtx
7743 gen_add_mult (rtx b, rtx m, rtx a, rtx reg)
7744 {
7745 rtx seq;
7746 rtx result;
7747
7748 start_sequence ();
7749 /* Use unsigned arithmetic. */
7750 result = expand_mult_add (b, reg, m, a, GET_MODE (reg), 1);
7751 if (reg != result)
7752 emit_move_insn (reg, result);
7753 seq = get_insns ();
7754 end_sequence ();
7755
7756 return seq;
7757 }
7758
7759
7760 /* Update registers created in insn sequence SEQ. */
7761
7762 static void
7763 loop_regs_update (const struct loop *loop ATTRIBUTE_UNUSED, rtx seq)
7764 {
7765 rtx insn;
7766
7767 /* Update register info for alias analysis. */
7768
7769 insn = seq;
7770 while (insn != NULL_RTX)
7771 {
7772 rtx set = single_set (insn);
7773
7774 if (set && REG_P (SET_DEST (set)))
7775 record_base_value (REGNO (SET_DEST (set)), SET_SRC (set), 0);
7776
7777 insn = NEXT_INSN (insn);
7778 }
7779 }
7780
7781
7782 /* EMIT code before BEFORE_BB/BEFORE_INSN to set REG = B * M + A. B
7783 is the initial value of the basic induction variable, M a
7784 multiplicative constant, A an additive constant and REG the
7785 destination register. */
7786
7787 void
7788 loop_iv_add_mult_emit_before (const struct loop *loop, rtx b, rtx m, rtx a,
7789 rtx reg, basic_block before_bb, rtx before_insn)
7790 {
7791 rtx seq;
7792
7793 if (! before_insn)
7794 {
7795 loop_iv_add_mult_hoist (loop, b, m, a, reg);
7796 return;
7797 }
7798
7799 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
7800 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
7801
7802 /* Increase the lifetime of any invariants moved further in code. */
7803 update_reg_last_use (a, before_insn);
7804 update_reg_last_use (b, before_insn);
7805 update_reg_last_use (m, before_insn);
7806
7807 /* It is possible that the expansion created lots of new registers.
7808 Iterate over the sequence we just created and record them all. We
7809 must do this before inserting the sequence. */
7810 loop_regs_update (loop, seq);
7811
7812 loop_insn_emit_before (loop, before_bb, before_insn, seq);
7813 }
7814
7815
7816 /* Emit insns in loop pre-header to set REG = B * M + A. B is the
7817 initial value of the basic induction variable, M a multiplicative
7818 constant, A an additive constant and REG the destination
7819 register. */
7820
7821 void
7822 loop_iv_add_mult_sink (const struct loop *loop, rtx b, rtx m, rtx a, rtx reg)
7823 {
7824 rtx seq;
7825
7826 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
7827 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
7828
7829 /* Increase the lifetime of any invariants moved further in code.
7830 ???? Is this really necessary? */
7831 update_reg_last_use (a, loop->sink);
7832 update_reg_last_use (b, loop->sink);
7833 update_reg_last_use (m, loop->sink);
7834
7835 /* It is possible that the expansion created lots of new registers.
7836 Iterate over the sequence we just created and record them all. We
7837 must do this before inserting the sequence. */
7838 loop_regs_update (loop, seq);
7839
7840 loop_insn_sink (loop, seq);
7841 }
7842
7843
7844 /* Emit insns after loop to set REG = B * M + A. B is the initial
7845 value of the basic induction variable, M a multiplicative constant,
7846 A an additive constant and REG the destination register. */
7847
7848 void
7849 loop_iv_add_mult_hoist (const struct loop *loop, rtx b, rtx m, rtx a, rtx reg)
7850 {
7851 rtx seq;
7852
7853 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
7854 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
7855
7856 /* It is possible that the expansion created lots of new registers.
7857 Iterate over the sequence we just created and record them all. We
7858 must do this before inserting the sequence. */
7859 loop_regs_update (loop, seq);
7860
7861 loop_insn_hoist (loop, seq);
7862 }
7863
7864
7865
7866 /* Similar to gen_add_mult, but compute cost rather than generating
7867 sequence. */
7868
7869 static int
7870 iv_add_mult_cost (rtx b, rtx m, rtx a, rtx reg)
7871 {
7872 int cost = 0;
7873 rtx last, result;
7874
7875 start_sequence ();
7876 result = expand_mult_add (b, reg, m, a, GET_MODE (reg), 1);
7877 if (reg != result)
7878 emit_move_insn (reg, result);
7879 last = get_last_insn ();
7880 while (last)
7881 {
7882 rtx t = single_set (last);
7883 if (t)
7884 cost += rtx_cost (SET_SRC (t), SET);
7885 last = PREV_INSN (last);
7886 }
7887 end_sequence ();
7888 return cost;
7889 }
7890 \f
7891 /* Test whether A * B can be computed without
7892 an actual multiply insn. Value is 1 if so.
7893
7894 ??? This function stinks because it generates a ton of wasted RTL
7895 ??? and as a result fragments GC memory to no end. There are other
7896 ??? places in the compiler which are invoked a lot and do the same
7897 ??? thing, generate wasted RTL just to see if something is possible. */
7898
7899 static int
7900 product_cheap_p (rtx a, rtx b)
7901 {
7902 rtx tmp;
7903 int win, n_insns;
7904
7905 /* If only one is constant, make it B. */
7906 if (GET_CODE (a) == CONST_INT)
7907 tmp = a, a = b, b = tmp;
7908
7909 /* If first constant, both constant, so don't need multiply. */
7910 if (GET_CODE (a) == CONST_INT)
7911 return 1;
7912
7913 /* If second not constant, neither is constant, so would need multiply. */
7914 if (GET_CODE (b) != CONST_INT)
7915 return 0;
7916
7917 /* One operand is constant, so might not need multiply insn. Generate the
7918 code for the multiply and see if a call or multiply, or long sequence
7919 of insns is generated. */
7920
7921 start_sequence ();
7922 expand_mult (GET_MODE (a), a, b, NULL_RTX, 1);
7923 tmp = get_insns ();
7924 end_sequence ();
7925
7926 win = 1;
7927 if (INSN_P (tmp))
7928 {
7929 n_insns = 0;
7930 while (tmp != NULL_RTX)
7931 {
7932 rtx next = NEXT_INSN (tmp);
7933
7934 if (++n_insns > 3
7935 || GET_CODE (tmp) != INSN
7936 || (GET_CODE (PATTERN (tmp)) == SET
7937 && GET_CODE (SET_SRC (PATTERN (tmp))) == MULT)
7938 || (GET_CODE (PATTERN (tmp)) == PARALLEL
7939 && GET_CODE (XVECEXP (PATTERN (tmp), 0, 0)) == SET
7940 && GET_CODE (SET_SRC (XVECEXP (PATTERN (tmp), 0, 0))) == MULT))
7941 {
7942 win = 0;
7943 break;
7944 }
7945
7946 tmp = next;
7947 }
7948 }
7949 else if (GET_CODE (tmp) == SET
7950 && GET_CODE (SET_SRC (tmp)) == MULT)
7951 win = 0;
7952 else if (GET_CODE (tmp) == PARALLEL
7953 && GET_CODE (XVECEXP (tmp, 0, 0)) == SET
7954 && GET_CODE (SET_SRC (XVECEXP (tmp, 0, 0))) == MULT)
7955 win = 0;
7956
7957 return win;
7958 }
7959 \f
7960 /* Check to see if loop can be terminated by a "decrement and branch until
7961 zero" instruction. If so, add a REG_NONNEG note to the branch insn if so.
7962 Also try reversing an increment loop to a decrement loop
7963 to see if the optimization can be performed.
7964 Value is nonzero if optimization was performed. */
7965
7966 /* This is useful even if the architecture doesn't have such an insn,
7967 because it might change a loops which increments from 0 to n to a loop
7968 which decrements from n to 0. A loop that decrements to zero is usually
7969 faster than one that increments from zero. */
7970
7971 /* ??? This could be rewritten to use some of the loop unrolling procedures,
7972 such as approx_final_value, biv_total_increment, loop_iterations, and
7973 final_[bg]iv_value. */
7974
7975 static int
7976 check_dbra_loop (struct loop *loop, int insn_count)
7977 {
7978 struct loop_info *loop_info = LOOP_INFO (loop);
7979 struct loop_regs *regs = LOOP_REGS (loop);
7980 struct loop_ivs *ivs = LOOP_IVS (loop);
7981 struct iv_class *bl;
7982 rtx reg;
7983 enum machine_mode mode;
7984 rtx jump_label;
7985 rtx final_value;
7986 rtx start_value;
7987 rtx new_add_val;
7988 rtx comparison;
7989 rtx before_comparison;
7990 rtx p;
7991 rtx jump;
7992 rtx first_compare;
7993 int compare_and_branch;
7994 rtx loop_start = loop->start;
7995 rtx loop_end = loop->end;
7996
7997 /* If last insn is a conditional branch, and the insn before tests a
7998 register value, try to optimize it. Otherwise, we can't do anything. */
7999
8000 jump = PREV_INSN (loop_end);
8001 comparison = get_condition_for_loop (loop, jump);
8002 if (comparison == 0)
8003 return 0;
8004 if (!onlyjump_p (jump))
8005 return 0;
8006
8007 /* Try to compute whether the compare/branch at the loop end is one or
8008 two instructions. */
8009 get_condition (jump, &first_compare, false);
8010 if (first_compare == jump)
8011 compare_and_branch = 1;
8012 else if (first_compare == prev_nonnote_insn (jump))
8013 compare_and_branch = 2;
8014 else
8015 return 0;
8016
8017 {
8018 /* If more than one condition is present to control the loop, then
8019 do not proceed, as this function does not know how to rewrite
8020 loop tests with more than one condition.
8021
8022 Look backwards from the first insn in the last comparison
8023 sequence and see if we've got another comparison sequence. */
8024
8025 rtx jump1;
8026 if ((jump1 = prev_nonnote_insn (first_compare)) != loop->cont)
8027 if (GET_CODE (jump1) == JUMP_INSN)
8028 return 0;
8029 }
8030
8031 /* Check all of the bivs to see if the compare uses one of them.
8032 Skip biv's set more than once because we can't guarantee that
8033 it will be zero on the last iteration. Also skip if the biv is
8034 used between its update and the test insn. */
8035
8036 for (bl = ivs->list; bl; bl = bl->next)
8037 {
8038 if (bl->biv_count == 1
8039 && ! bl->biv->maybe_multiple
8040 && bl->biv->dest_reg == XEXP (comparison, 0)
8041 && ! reg_used_between_p (regno_reg_rtx[bl->regno], bl->biv->insn,
8042 first_compare))
8043 break;
8044 }
8045
8046 /* Try swapping the comparison to identify a suitable biv. */
8047 if (!bl)
8048 for (bl = ivs->list; bl; bl = bl->next)
8049 if (bl->biv_count == 1
8050 && ! bl->biv->maybe_multiple
8051 && bl->biv->dest_reg == XEXP (comparison, 1)
8052 && ! reg_used_between_p (regno_reg_rtx[bl->regno], bl->biv->insn,
8053 first_compare))
8054 {
8055 comparison = gen_rtx_fmt_ee (swap_condition (GET_CODE (comparison)),
8056 VOIDmode,
8057 XEXP (comparison, 1),
8058 XEXP (comparison, 0));
8059 break;
8060 }
8061
8062 if (! bl)
8063 return 0;
8064
8065 /* Look for the case where the basic induction variable is always
8066 nonnegative, and equals zero on the last iteration.
8067 In this case, add a reg_note REG_NONNEG, which allows the
8068 m68k DBRA instruction to be used. */
8069
8070 if (((GET_CODE (comparison) == GT && XEXP (comparison, 1) == constm1_rtx)
8071 || (GET_CODE (comparison) == NE && XEXP (comparison, 1) == const0_rtx))
8072 && GET_CODE (bl->biv->add_val) == CONST_INT
8073 && INTVAL (bl->biv->add_val) < 0)
8074 {
8075 /* Initial value must be greater than 0,
8076 init_val % -dec_value == 0 to ensure that it equals zero on
8077 the last iteration */
8078
8079 if (GET_CODE (bl->initial_value) == CONST_INT
8080 && INTVAL (bl->initial_value) > 0
8081 && (INTVAL (bl->initial_value)
8082 % (-INTVAL (bl->biv->add_val))) == 0)
8083 {
8084 /* Register always nonnegative, add REG_NOTE to branch. */
8085 if (! find_reg_note (jump, REG_NONNEG, NULL_RTX))
8086 REG_NOTES (jump)
8087 = gen_rtx_EXPR_LIST (REG_NONNEG, bl->biv->dest_reg,
8088 REG_NOTES (jump));
8089 bl->nonneg = 1;
8090
8091 return 1;
8092 }
8093
8094 /* If the decrement is 1 and the value was tested as >= 0 before
8095 the loop, then we can safely optimize. */
8096 for (p = loop_start; p; p = PREV_INSN (p))
8097 {
8098 if (GET_CODE (p) == CODE_LABEL)
8099 break;
8100 if (GET_CODE (p) != JUMP_INSN)
8101 continue;
8102
8103 before_comparison = get_condition_for_loop (loop, p);
8104 if (before_comparison
8105 && XEXP (before_comparison, 0) == bl->biv->dest_reg
8106 && (GET_CODE (before_comparison) == LT
8107 || GET_CODE (before_comparison) == LTU)
8108 && XEXP (before_comparison, 1) == const0_rtx
8109 && ! reg_set_between_p (bl->biv->dest_reg, p, loop_start)
8110 && INTVAL (bl->biv->add_val) == -1)
8111 {
8112 if (! find_reg_note (jump, REG_NONNEG, NULL_RTX))
8113 REG_NOTES (jump)
8114 = gen_rtx_EXPR_LIST (REG_NONNEG, bl->biv->dest_reg,
8115 REG_NOTES (jump));
8116 bl->nonneg = 1;
8117
8118 return 1;
8119 }
8120 }
8121 }
8122 else if (GET_CODE (bl->biv->add_val) == CONST_INT
8123 && INTVAL (bl->biv->add_val) > 0)
8124 {
8125 /* Try to change inc to dec, so can apply above optimization. */
8126 /* Can do this if:
8127 all registers modified are induction variables or invariant,
8128 all memory references have non-overlapping addresses
8129 (obviously true if only one write)
8130 allow 2 insns for the compare/jump at the end of the loop. */
8131 /* Also, we must avoid any instructions which use both the reversed
8132 biv and another biv. Such instructions will fail if the loop is
8133 reversed. We meet this condition by requiring that either
8134 no_use_except_counting is true, or else that there is only
8135 one biv. */
8136 int num_nonfixed_reads = 0;
8137 /* 1 if the iteration var is used only to count iterations. */
8138 int no_use_except_counting = 0;
8139 /* 1 if the loop has no memory store, or it has a single memory store
8140 which is reversible. */
8141 int reversible_mem_store = 1;
8142
8143 if (bl->giv_count == 0
8144 && !loop->exit_count
8145 && !loop_info->has_multiple_exit_targets)
8146 {
8147 rtx bivreg = regno_reg_rtx[bl->regno];
8148 struct iv_class *blt;
8149
8150 /* If there are no givs for this biv, and the only exit is the
8151 fall through at the end of the loop, then
8152 see if perhaps there are no uses except to count. */
8153 no_use_except_counting = 1;
8154 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
8155 if (INSN_P (p))
8156 {
8157 rtx set = single_set (p);
8158
8159 if (set && REG_P (SET_DEST (set))
8160 && REGNO (SET_DEST (set)) == bl->regno)
8161 /* An insn that sets the biv is okay. */
8162 ;
8163 else if (!reg_mentioned_p (bivreg, PATTERN (p)))
8164 /* An insn that doesn't mention the biv is okay. */
8165 ;
8166 else if (p == prev_nonnote_insn (prev_nonnote_insn (loop_end))
8167 || p == prev_nonnote_insn (loop_end))
8168 {
8169 /* If either of these insns uses the biv and sets a pseudo
8170 that has more than one usage, then the biv has uses
8171 other than counting since it's used to derive a value
8172 that is used more than one time. */
8173 note_stores (PATTERN (p), note_set_pseudo_multiple_uses,
8174 regs);
8175 if (regs->multiple_uses)
8176 {
8177 no_use_except_counting = 0;
8178 break;
8179 }
8180 }
8181 else
8182 {
8183 no_use_except_counting = 0;
8184 break;
8185 }
8186 }
8187
8188 /* A biv has uses besides counting if it is used to set
8189 another biv. */
8190 for (blt = ivs->list; blt; blt = blt->next)
8191 if (blt->init_set
8192 && reg_mentioned_p (bivreg, SET_SRC (blt->init_set)))
8193 {
8194 no_use_except_counting = 0;
8195 break;
8196 }
8197 }
8198
8199 if (no_use_except_counting)
8200 /* No need to worry about MEMs. */
8201 ;
8202 else if (loop_info->num_mem_sets <= 1)
8203 {
8204 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
8205 if (INSN_P (p))
8206 num_nonfixed_reads += count_nonfixed_reads (loop, PATTERN (p));
8207
8208 /* If the loop has a single store, and the destination address is
8209 invariant, then we can't reverse the loop, because this address
8210 might then have the wrong value at loop exit.
8211 This would work if the source was invariant also, however, in that
8212 case, the insn should have been moved out of the loop. */
8213
8214 if (loop_info->num_mem_sets == 1)
8215 {
8216 struct induction *v;
8217
8218 /* If we could prove that each of the memory locations
8219 written to was different, then we could reverse the
8220 store -- but we don't presently have any way of
8221 knowing that. */
8222 reversible_mem_store = 0;
8223
8224 /* If the store depends on a register that is set after the
8225 store, it depends on the initial value, and is thus not
8226 reversible. */
8227 for (v = bl->giv; reversible_mem_store && v; v = v->next_iv)
8228 {
8229 if (v->giv_type == DEST_REG
8230 && reg_mentioned_p (v->dest_reg,
8231 PATTERN (loop_info->first_loop_store_insn))
8232 && loop_insn_first_p (loop_info->first_loop_store_insn,
8233 v->insn))
8234 reversible_mem_store = 0;
8235 }
8236 }
8237 }
8238 else
8239 return 0;
8240
8241 /* This code only acts for innermost loops. Also it simplifies
8242 the memory address check by only reversing loops with
8243 zero or one memory access.
8244 Two memory accesses could involve parts of the same array,
8245 and that can't be reversed.
8246 If the biv is used only for counting, than we don't need to worry
8247 about all these things. */
8248
8249 if ((num_nonfixed_reads <= 1
8250 && ! loop_info->has_nonconst_call
8251 && ! loop_info->has_prefetch
8252 && ! loop_info->has_volatile
8253 && reversible_mem_store
8254 && (bl->giv_count + bl->biv_count + loop_info->num_mem_sets
8255 + num_unmoved_movables (loop) + compare_and_branch == insn_count)
8256 && (bl == ivs->list && bl->next == 0))
8257 || (no_use_except_counting && ! loop_info->has_prefetch))
8258 {
8259 rtx tem;
8260
8261 /* Loop can be reversed. */
8262 if (loop_dump_stream)
8263 fprintf (loop_dump_stream, "Can reverse loop\n");
8264
8265 /* Now check other conditions:
8266
8267 The increment must be a constant, as must the initial value,
8268 and the comparison code must be LT.
8269
8270 This test can probably be improved since +/- 1 in the constant
8271 can be obtained by changing LT to LE and vice versa; this is
8272 confusing. */
8273
8274 if (comparison
8275 /* for constants, LE gets turned into LT */
8276 && (GET_CODE (comparison) == LT
8277 || (GET_CODE (comparison) == LE
8278 && no_use_except_counting)
8279 || GET_CODE (comparison) == LTU))
8280 {
8281 HOST_WIDE_INT add_val, add_adjust, comparison_val = 0;
8282 rtx initial_value, comparison_value;
8283 int nonneg = 0;
8284 enum rtx_code cmp_code;
8285 int comparison_const_width;
8286 unsigned HOST_WIDE_INT comparison_sign_mask;
8287
8288 add_val = INTVAL (bl->biv->add_val);
8289 comparison_value = XEXP (comparison, 1);
8290 if (GET_MODE (comparison_value) == VOIDmode)
8291 comparison_const_width
8292 = GET_MODE_BITSIZE (GET_MODE (XEXP (comparison, 0)));
8293 else
8294 comparison_const_width
8295 = GET_MODE_BITSIZE (GET_MODE (comparison_value));
8296 if (comparison_const_width > HOST_BITS_PER_WIDE_INT)
8297 comparison_const_width = HOST_BITS_PER_WIDE_INT;
8298 comparison_sign_mask
8299 = (unsigned HOST_WIDE_INT) 1 << (comparison_const_width - 1);
8300
8301 /* If the comparison value is not a loop invariant, then we
8302 can not reverse this loop.
8303
8304 ??? If the insns which initialize the comparison value as
8305 a whole compute an invariant result, then we could move
8306 them out of the loop and proceed with loop reversal. */
8307 if (! loop_invariant_p (loop, comparison_value))
8308 return 0;
8309
8310 if (GET_CODE (comparison_value) == CONST_INT)
8311 comparison_val = INTVAL (comparison_value);
8312 initial_value = bl->initial_value;
8313
8314 /* Normalize the initial value if it is an integer and
8315 has no other use except as a counter. This will allow
8316 a few more loops to be reversed. */
8317 if (no_use_except_counting
8318 && GET_CODE (comparison_value) == CONST_INT
8319 && GET_CODE (initial_value) == CONST_INT)
8320 {
8321 comparison_val = comparison_val - INTVAL (bl->initial_value);
8322 /* The code below requires comparison_val to be a multiple
8323 of add_val in order to do the loop reversal, so
8324 round up comparison_val to a multiple of add_val.
8325 Since comparison_value is constant, we know that the
8326 current comparison code is LT. */
8327 comparison_val = comparison_val + add_val - 1;
8328 comparison_val
8329 -= (unsigned HOST_WIDE_INT) comparison_val % add_val;
8330 /* We postpone overflow checks for COMPARISON_VAL here;
8331 even if there is an overflow, we might still be able to
8332 reverse the loop, if converting the loop exit test to
8333 NE is possible. */
8334 initial_value = const0_rtx;
8335 }
8336
8337 /* First check if we can do a vanilla loop reversal. */
8338 if (initial_value == const0_rtx
8339 /* If we have a decrement_and_branch_on_count,
8340 prefer the NE test, since this will allow that
8341 instruction to be generated. Note that we must
8342 use a vanilla loop reversal if the biv is used to
8343 calculate a giv or has a non-counting use. */
8344 #if ! defined (HAVE_decrement_and_branch_until_zero) \
8345 && defined (HAVE_decrement_and_branch_on_count)
8346 && (! (add_val == 1 && loop->vtop
8347 && (bl->biv_count == 0
8348 || no_use_except_counting)))
8349 #endif
8350 && GET_CODE (comparison_value) == CONST_INT
8351 /* Now do postponed overflow checks on COMPARISON_VAL. */
8352 && ! (((comparison_val - add_val) ^ INTVAL (comparison_value))
8353 & comparison_sign_mask))
8354 {
8355 /* Register will always be nonnegative, with value
8356 0 on last iteration */
8357 add_adjust = add_val;
8358 nonneg = 1;
8359 cmp_code = GE;
8360 }
8361 else if (add_val == 1 && loop->vtop
8362 && (bl->biv_count == 0
8363 || no_use_except_counting))
8364 {
8365 add_adjust = 0;
8366 cmp_code = NE;
8367 }
8368 else
8369 return 0;
8370
8371 if (GET_CODE (comparison) == LE)
8372 add_adjust -= add_val;
8373
8374 /* If the initial value is not zero, or if the comparison
8375 value is not an exact multiple of the increment, then we
8376 can not reverse this loop. */
8377 if (initial_value == const0_rtx
8378 && GET_CODE (comparison_value) == CONST_INT)
8379 {
8380 if (((unsigned HOST_WIDE_INT) comparison_val % add_val) != 0)
8381 return 0;
8382 }
8383 else
8384 {
8385 if (! no_use_except_counting || add_val != 1)
8386 return 0;
8387 }
8388
8389 final_value = comparison_value;
8390
8391 /* Reset these in case we normalized the initial value
8392 and comparison value above. */
8393 if (GET_CODE (comparison_value) == CONST_INT
8394 && GET_CODE (initial_value) == CONST_INT)
8395 {
8396 comparison_value = GEN_INT (comparison_val);
8397 final_value
8398 = GEN_INT (comparison_val + INTVAL (bl->initial_value));
8399 }
8400 bl->initial_value = initial_value;
8401
8402 /* Save some info needed to produce the new insns. */
8403 reg = bl->biv->dest_reg;
8404 mode = GET_MODE (reg);
8405 jump_label = condjump_label (PREV_INSN (loop_end));
8406 new_add_val = GEN_INT (-INTVAL (bl->biv->add_val));
8407
8408 /* Set start_value; if this is not a CONST_INT, we need
8409 to generate a SUB.
8410 Initialize biv to start_value before loop start.
8411 The old initializing insn will be deleted as a
8412 dead store by flow.c. */
8413 if (initial_value == const0_rtx
8414 && GET_CODE (comparison_value) == CONST_INT)
8415 {
8416 start_value
8417 = gen_int_mode (comparison_val - add_adjust, mode);
8418 loop_insn_hoist (loop, gen_move_insn (reg, start_value));
8419 }
8420 else if (GET_CODE (initial_value) == CONST_INT)
8421 {
8422 rtx offset = GEN_INT (-INTVAL (initial_value) - add_adjust);
8423 rtx add_insn = gen_add3_insn (reg, comparison_value, offset);
8424
8425 if (add_insn == 0)
8426 return 0;
8427
8428 start_value
8429 = gen_rtx_PLUS (mode, comparison_value, offset);
8430 loop_insn_hoist (loop, add_insn);
8431 if (GET_CODE (comparison) == LE)
8432 final_value = gen_rtx_PLUS (mode, comparison_value,
8433 GEN_INT (add_val));
8434 }
8435 else if (! add_adjust)
8436 {
8437 rtx sub_insn = gen_sub3_insn (reg, comparison_value,
8438 initial_value);
8439
8440 if (sub_insn == 0)
8441 return 0;
8442 start_value
8443 = gen_rtx_MINUS (mode, comparison_value, initial_value);
8444 loop_insn_hoist (loop, sub_insn);
8445 }
8446 else
8447 /* We could handle the other cases too, but it'll be
8448 better to have a testcase first. */
8449 return 0;
8450
8451 /* We may not have a single insn which can increment a reg, so
8452 create a sequence to hold all the insns from expand_inc. */
8453 start_sequence ();
8454 expand_inc (reg, new_add_val);
8455 tem = get_insns ();
8456 end_sequence ();
8457
8458 p = loop_insn_emit_before (loop, 0, bl->biv->insn, tem);
8459 delete_insn (bl->biv->insn);
8460
8461 /* Update biv info to reflect its new status. */
8462 bl->biv->insn = p;
8463 bl->initial_value = start_value;
8464 bl->biv->add_val = new_add_val;
8465
8466 /* Update loop info. */
8467 loop_info->initial_value = reg;
8468 loop_info->initial_equiv_value = reg;
8469 loop_info->final_value = const0_rtx;
8470 loop_info->final_equiv_value = const0_rtx;
8471 loop_info->comparison_value = const0_rtx;
8472 loop_info->comparison_code = cmp_code;
8473 loop_info->increment = new_add_val;
8474
8475 /* Inc LABEL_NUSES so that delete_insn will
8476 not delete the label. */
8477 LABEL_NUSES (XEXP (jump_label, 0))++;
8478
8479 /* Emit an insn after the end of the loop to set the biv's
8480 proper exit value if it is used anywhere outside the loop. */
8481 if ((REGNO_LAST_UID (bl->regno) != INSN_UID (first_compare))
8482 || ! bl->init_insn
8483 || REGNO_FIRST_UID (bl->regno) != INSN_UID (bl->init_insn))
8484 loop_insn_sink (loop, gen_load_of_final_value (reg, final_value));
8485
8486 /* Delete compare/branch at end of loop. */
8487 delete_related_insns (PREV_INSN (loop_end));
8488 if (compare_and_branch == 2)
8489 delete_related_insns (first_compare);
8490
8491 /* Add new compare/branch insn at end of loop. */
8492 start_sequence ();
8493 emit_cmp_and_jump_insns (reg, const0_rtx, cmp_code, NULL_RTX,
8494 mode, 0,
8495 XEXP (jump_label, 0));
8496 tem = get_insns ();
8497 end_sequence ();
8498 emit_jump_insn_before (tem, loop_end);
8499
8500 for (tem = PREV_INSN (loop_end);
8501 tem && GET_CODE (tem) != JUMP_INSN;
8502 tem = PREV_INSN (tem))
8503 ;
8504
8505 if (tem)
8506 JUMP_LABEL (tem) = XEXP (jump_label, 0);
8507
8508 if (nonneg)
8509 {
8510 if (tem)
8511 {
8512 /* Increment of LABEL_NUSES done above. */
8513 /* Register is now always nonnegative,
8514 so add REG_NONNEG note to the branch. */
8515 REG_NOTES (tem) = gen_rtx_EXPR_LIST (REG_NONNEG, reg,
8516 REG_NOTES (tem));
8517 }
8518 bl->nonneg = 1;
8519 }
8520
8521 /* No insn may reference both the reversed and another biv or it
8522 will fail (see comment near the top of the loop reversal
8523 code).
8524 Earlier on, we have verified that the biv has no use except
8525 counting, or it is the only biv in this function.
8526 However, the code that computes no_use_except_counting does
8527 not verify reg notes. It's possible to have an insn that
8528 references another biv, and has a REG_EQUAL note with an
8529 expression based on the reversed biv. To avoid this case,
8530 remove all REG_EQUAL notes based on the reversed biv
8531 here. */
8532 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
8533 if (INSN_P (p))
8534 {
8535 rtx *pnote;
8536 rtx set = single_set (p);
8537 /* If this is a set of a GIV based on the reversed biv, any
8538 REG_EQUAL notes should still be correct. */
8539 if (! set
8540 || !REG_P (SET_DEST (set))
8541 || (size_t) REGNO (SET_DEST (set)) >= ivs->n_regs
8542 || REG_IV_TYPE (ivs, REGNO (SET_DEST (set))) != GENERAL_INDUCT
8543 || REG_IV_INFO (ivs, REGNO (SET_DEST (set)))->src_reg != bl->biv->src_reg)
8544 for (pnote = &REG_NOTES (p); *pnote;)
8545 {
8546 if (REG_NOTE_KIND (*pnote) == REG_EQUAL
8547 && reg_mentioned_p (regno_reg_rtx[bl->regno],
8548 XEXP (*pnote, 0)))
8549 *pnote = XEXP (*pnote, 1);
8550 else
8551 pnote = &XEXP (*pnote, 1);
8552 }
8553 }
8554
8555 /* Mark that this biv has been reversed. Each giv which depends
8556 on this biv, and which is also live past the end of the loop
8557 will have to be fixed up. */
8558
8559 bl->reversed = 1;
8560
8561 if (loop_dump_stream)
8562 {
8563 fprintf (loop_dump_stream, "Reversed loop");
8564 if (bl->nonneg)
8565 fprintf (loop_dump_stream, " and added reg_nonneg\n");
8566 else
8567 fprintf (loop_dump_stream, "\n");
8568 }
8569
8570 return 1;
8571 }
8572 }
8573 }
8574
8575 return 0;
8576 }
8577 \f
8578 /* Verify whether the biv BL appears to be eliminable,
8579 based on the insns in the loop that refer to it.
8580
8581 If ELIMINATE_P is nonzero, actually do the elimination.
8582
8583 THRESHOLD and INSN_COUNT are from loop_optimize and are used to
8584 determine whether invariant insns should be placed inside or at the
8585 start of the loop. */
8586
8587 static int
8588 maybe_eliminate_biv (const struct loop *loop, struct iv_class *bl,
8589 int eliminate_p, int threshold, int insn_count)
8590 {
8591 struct loop_ivs *ivs = LOOP_IVS (loop);
8592 rtx reg = bl->biv->dest_reg;
8593 rtx p;
8594
8595 /* Scan all insns in the loop, stopping if we find one that uses the
8596 biv in a way that we cannot eliminate. */
8597
8598 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
8599 {
8600 enum rtx_code code = GET_CODE (p);
8601 basic_block where_bb = 0;
8602 rtx where_insn = threshold >= insn_count ? 0 : p;
8603 rtx note;
8604
8605 /* If this is a libcall that sets a giv, skip ahead to its end. */
8606 if (INSN_P (p))
8607 {
8608 note = find_reg_note (p, REG_LIBCALL, NULL_RTX);
8609
8610 if (note)
8611 {
8612 rtx last = XEXP (note, 0);
8613 rtx set = single_set (last);
8614
8615 if (set && REG_P (SET_DEST (set)))
8616 {
8617 unsigned int regno = REGNO (SET_DEST (set));
8618
8619 if (regno < ivs->n_regs
8620 && REG_IV_TYPE (ivs, regno) == GENERAL_INDUCT
8621 && REG_IV_INFO (ivs, regno)->src_reg == bl->biv->src_reg)
8622 p = last;
8623 }
8624 }
8625 }
8626
8627 /* Closely examine the insn if the biv is mentioned. */
8628 if ((code == INSN || code == JUMP_INSN || code == CALL_INSN)
8629 && reg_mentioned_p (reg, PATTERN (p))
8630 && ! maybe_eliminate_biv_1 (loop, PATTERN (p), p, bl,
8631 eliminate_p, where_bb, where_insn))
8632 {
8633 if (loop_dump_stream)
8634 fprintf (loop_dump_stream,
8635 "Cannot eliminate biv %d: biv used in insn %d.\n",
8636 bl->regno, INSN_UID (p));
8637 break;
8638 }
8639
8640 /* If we are eliminating, kill REG_EQUAL notes mentioning the biv. */
8641 if (eliminate_p
8642 && (note = find_reg_note (p, REG_EQUAL, NULL_RTX)) != NULL_RTX
8643 && reg_mentioned_p (reg, XEXP (note, 0)))
8644 remove_note (p, note);
8645 }
8646
8647 if (p == loop->end)
8648 {
8649 if (loop_dump_stream)
8650 fprintf (loop_dump_stream, "biv %d %s eliminated.\n",
8651 bl->regno, eliminate_p ? "was" : "can be");
8652 return 1;
8653 }
8654
8655 return 0;
8656 }
8657 \f
8658 /* INSN and REFERENCE are instructions in the same insn chain.
8659 Return nonzero if INSN is first. */
8660
8661 int
8662 loop_insn_first_p (rtx insn, rtx reference)
8663 {
8664 rtx p, q;
8665
8666 for (p = insn, q = reference;;)
8667 {
8668 /* Start with test for not first so that INSN == REFERENCE yields not
8669 first. */
8670 if (q == insn || ! p)
8671 return 0;
8672 if (p == reference || ! q)
8673 return 1;
8674
8675 /* Either of P or Q might be a NOTE. Notes have the same LUID as the
8676 previous insn, hence the <= comparison below does not work if
8677 P is a note. */
8678 if (INSN_UID (p) < max_uid_for_loop
8679 && INSN_UID (q) < max_uid_for_loop
8680 && GET_CODE (p) != NOTE)
8681 return INSN_LUID (p) <= INSN_LUID (q);
8682
8683 if (INSN_UID (p) >= max_uid_for_loop
8684 || GET_CODE (p) == NOTE)
8685 p = NEXT_INSN (p);
8686 if (INSN_UID (q) >= max_uid_for_loop)
8687 q = NEXT_INSN (q);
8688 }
8689 }
8690
8691 /* We are trying to eliminate BIV in INSN using GIV. Return nonzero if
8692 the offset that we have to take into account due to auto-increment /
8693 div derivation is zero. */
8694 static int
8695 biv_elimination_giv_has_0_offset (struct induction *biv,
8696 struct induction *giv, rtx insn)
8697 {
8698 /* If the giv V had the auto-inc address optimization applied
8699 to it, and INSN occurs between the giv insn and the biv
8700 insn, then we'd have to adjust the value used here.
8701 This is rare, so we don't bother to make this possible. */
8702 if (giv->auto_inc_opt
8703 && ((loop_insn_first_p (giv->insn, insn)
8704 && loop_insn_first_p (insn, biv->insn))
8705 || (loop_insn_first_p (biv->insn, insn)
8706 && loop_insn_first_p (insn, giv->insn))))
8707 return 0;
8708
8709 return 1;
8710 }
8711
8712 /* If BL appears in X (part of the pattern of INSN), see if we can
8713 eliminate its use. If so, return 1. If not, return 0.
8714
8715 If BIV does not appear in X, return 1.
8716
8717 If ELIMINATE_P is nonzero, actually do the elimination.
8718 WHERE_INSN/WHERE_BB indicate where extra insns should be added.
8719 Depending on how many items have been moved out of the loop, it
8720 will either be before INSN (when WHERE_INSN is nonzero) or at the
8721 start of the loop (when WHERE_INSN is zero). */
8722
8723 static int
8724 maybe_eliminate_biv_1 (const struct loop *loop, rtx x, rtx insn,
8725 struct iv_class *bl, int eliminate_p,
8726 basic_block where_bb, rtx where_insn)
8727 {
8728 enum rtx_code code = GET_CODE (x);
8729 rtx reg = bl->biv->dest_reg;
8730 enum machine_mode mode = GET_MODE (reg);
8731 struct induction *v;
8732 rtx arg, tem;
8733 #ifdef HAVE_cc0
8734 rtx new;
8735 #endif
8736 int arg_operand;
8737 const char *fmt;
8738 int i, j;
8739
8740 switch (code)
8741 {
8742 case REG:
8743 /* If we haven't already been able to do something with this BIV,
8744 we can't eliminate it. */
8745 if (x == reg)
8746 return 0;
8747 return 1;
8748
8749 case SET:
8750 /* If this sets the BIV, it is not a problem. */
8751 if (SET_DEST (x) == reg)
8752 return 1;
8753
8754 /* If this is an insn that defines a giv, it is also ok because
8755 it will go away when the giv is reduced. */
8756 for (v = bl->giv; v; v = v->next_iv)
8757 if (v->giv_type == DEST_REG && SET_DEST (x) == v->dest_reg)
8758 return 1;
8759
8760 #ifdef HAVE_cc0
8761 if (SET_DEST (x) == cc0_rtx && SET_SRC (x) == reg)
8762 {
8763 /* Can replace with any giv that was reduced and
8764 that has (MULT_VAL != 0) and (ADD_VAL == 0).
8765 Require a constant for MULT_VAL, so we know it's nonzero.
8766 ??? We disable this optimization to avoid potential
8767 overflows. */
8768
8769 for (v = bl->giv; v; v = v->next_iv)
8770 if (GET_CODE (v->mult_val) == CONST_INT && v->mult_val != const0_rtx
8771 && v->add_val == const0_rtx
8772 && ! v->ignore && ! v->maybe_dead && v->always_computable
8773 && v->mode == mode
8774 && 0)
8775 {
8776 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8777 continue;
8778
8779 if (! eliminate_p)
8780 return 1;
8781
8782 /* If the giv has the opposite direction of change,
8783 then reverse the comparison. */
8784 if (INTVAL (v->mult_val) < 0)
8785 new = gen_rtx_COMPARE (GET_MODE (v->new_reg),
8786 const0_rtx, v->new_reg);
8787 else
8788 new = v->new_reg;
8789
8790 /* We can probably test that giv's reduced reg. */
8791 if (validate_change (insn, &SET_SRC (x), new, 0))
8792 return 1;
8793 }
8794
8795 /* Look for a giv with (MULT_VAL != 0) and (ADD_VAL != 0);
8796 replace test insn with a compare insn (cmp REDUCED_GIV ADD_VAL).
8797 Require a constant for MULT_VAL, so we know it's nonzero.
8798 ??? Do this only if ADD_VAL is a pointer to avoid a potential
8799 overflow problem. */
8800
8801 for (v = bl->giv; v; v = v->next_iv)
8802 if (GET_CODE (v->mult_val) == CONST_INT
8803 && v->mult_val != const0_rtx
8804 && ! v->ignore && ! v->maybe_dead && v->always_computable
8805 && v->mode == mode
8806 && (GET_CODE (v->add_val) == SYMBOL_REF
8807 || GET_CODE (v->add_val) == LABEL_REF
8808 || GET_CODE (v->add_val) == CONST
8809 || (REG_P (v->add_val)
8810 && REG_POINTER (v->add_val))))
8811 {
8812 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8813 continue;
8814
8815 if (! eliminate_p)
8816 return 1;
8817
8818 /* If the giv has the opposite direction of change,
8819 then reverse the comparison. */
8820 if (INTVAL (v->mult_val) < 0)
8821 new = gen_rtx_COMPARE (VOIDmode, copy_rtx (v->add_val),
8822 v->new_reg);
8823 else
8824 new = gen_rtx_COMPARE (VOIDmode, v->new_reg,
8825 copy_rtx (v->add_val));
8826
8827 /* Replace biv with the giv's reduced register. */
8828 update_reg_last_use (v->add_val, insn);
8829 if (validate_change (insn, &SET_SRC (PATTERN (insn)), new, 0))
8830 return 1;
8831
8832 /* Insn doesn't support that constant or invariant. Copy it
8833 into a register (it will be a loop invariant.) */
8834 tem = gen_reg_rtx (GET_MODE (v->new_reg));
8835
8836 loop_insn_emit_before (loop, 0, where_insn,
8837 gen_move_insn (tem,
8838 copy_rtx (v->add_val)));
8839
8840 /* Substitute the new register for its invariant value in
8841 the compare expression. */
8842 XEXP (new, (INTVAL (v->mult_val) < 0) ? 0 : 1) = tem;
8843 if (validate_change (insn, &SET_SRC (PATTERN (insn)), new, 0))
8844 return 1;
8845 }
8846 }
8847 #endif
8848 break;
8849
8850 case COMPARE:
8851 case EQ: case NE:
8852 case GT: case GE: case GTU: case GEU:
8853 case LT: case LE: case LTU: case LEU:
8854 /* See if either argument is the biv. */
8855 if (XEXP (x, 0) == reg)
8856 arg = XEXP (x, 1), arg_operand = 1;
8857 else if (XEXP (x, 1) == reg)
8858 arg = XEXP (x, 0), arg_operand = 0;
8859 else
8860 break;
8861
8862 if (CONSTANT_P (arg))
8863 {
8864 /* First try to replace with any giv that has constant positive
8865 mult_val and constant add_val. We might be able to support
8866 negative mult_val, but it seems complex to do it in general. */
8867
8868 for (v = bl->giv; v; v = v->next_iv)
8869 if (GET_CODE (v->mult_val) == CONST_INT
8870 && INTVAL (v->mult_val) > 0
8871 && (GET_CODE (v->add_val) == SYMBOL_REF
8872 || GET_CODE (v->add_val) == LABEL_REF
8873 || GET_CODE (v->add_val) == CONST
8874 || (REG_P (v->add_val)
8875 && REG_POINTER (v->add_val)))
8876 && ! v->ignore && ! v->maybe_dead && v->always_computable
8877 && v->mode == mode)
8878 {
8879 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8880 continue;
8881
8882 /* Don't eliminate if the linear combination that makes up
8883 the giv overflows when it is applied to ARG. */
8884 if (GET_CODE (arg) == CONST_INT)
8885 {
8886 rtx add_val;
8887
8888 if (GET_CODE (v->add_val) == CONST_INT)
8889 add_val = v->add_val;
8890 else
8891 add_val = const0_rtx;
8892
8893 if (const_mult_add_overflow_p (arg, v->mult_val,
8894 add_val, mode, 1))
8895 continue;
8896 }
8897
8898 if (! eliminate_p)
8899 return 1;
8900
8901 /* Replace biv with the giv's reduced reg. */
8902 validate_change (insn, &XEXP (x, 1 - arg_operand), v->new_reg, 1);
8903
8904 /* If all constants are actually constant integers and
8905 the derived constant can be directly placed in the COMPARE,
8906 do so. */
8907 if (GET_CODE (arg) == CONST_INT
8908 && GET_CODE (v->add_val) == CONST_INT)
8909 {
8910 tem = expand_mult_add (arg, NULL_RTX, v->mult_val,
8911 v->add_val, mode, 1);
8912 }
8913 else
8914 {
8915 /* Otherwise, load it into a register. */
8916 tem = gen_reg_rtx (mode);
8917 loop_iv_add_mult_emit_before (loop, arg,
8918 v->mult_val, v->add_val,
8919 tem, where_bb, where_insn);
8920 }
8921
8922 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
8923
8924 if (apply_change_group ())
8925 return 1;
8926 }
8927
8928 /* Look for giv with positive constant mult_val and nonconst add_val.
8929 Insert insns to calculate new compare value.
8930 ??? Turn this off due to possible overflow. */
8931
8932 for (v = bl->giv; v; v = v->next_iv)
8933 if (GET_CODE (v->mult_val) == CONST_INT
8934 && INTVAL (v->mult_val) > 0
8935 && ! v->ignore && ! v->maybe_dead && v->always_computable
8936 && v->mode == mode
8937 && 0)
8938 {
8939 rtx tem;
8940
8941 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8942 continue;
8943
8944 if (! eliminate_p)
8945 return 1;
8946
8947 tem = gen_reg_rtx (mode);
8948
8949 /* Replace biv with giv's reduced register. */
8950 validate_change (insn, &XEXP (x, 1 - arg_operand),
8951 v->new_reg, 1);
8952
8953 /* Compute value to compare against. */
8954 loop_iv_add_mult_emit_before (loop, arg,
8955 v->mult_val, v->add_val,
8956 tem, where_bb, where_insn);
8957 /* Use it in this insn. */
8958 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
8959 if (apply_change_group ())
8960 return 1;
8961 }
8962 }
8963 else if (REG_P (arg) || MEM_P (arg))
8964 {
8965 if (loop_invariant_p (loop, arg) == 1)
8966 {
8967 /* Look for giv with constant positive mult_val and nonconst
8968 add_val. Insert insns to compute new compare value.
8969 ??? Turn this off due to possible overflow. */
8970
8971 for (v = bl->giv; v; v = v->next_iv)
8972 if (GET_CODE (v->mult_val) == CONST_INT && INTVAL (v->mult_val) > 0
8973 && ! v->ignore && ! v->maybe_dead && v->always_computable
8974 && v->mode == mode
8975 && 0)
8976 {
8977 rtx tem;
8978
8979 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8980 continue;
8981
8982 if (! eliminate_p)
8983 return 1;
8984
8985 tem = gen_reg_rtx (mode);
8986
8987 /* Replace biv with giv's reduced register. */
8988 validate_change (insn, &XEXP (x, 1 - arg_operand),
8989 v->new_reg, 1);
8990
8991 /* Compute value to compare against. */
8992 loop_iv_add_mult_emit_before (loop, arg,
8993 v->mult_val, v->add_val,
8994 tem, where_bb, where_insn);
8995 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
8996 if (apply_change_group ())
8997 return 1;
8998 }
8999 }
9000
9001 /* This code has problems. Basically, you can't know when
9002 seeing if we will eliminate BL, whether a particular giv
9003 of ARG will be reduced. If it isn't going to be reduced,
9004 we can't eliminate BL. We can try forcing it to be reduced,
9005 but that can generate poor code.
9006
9007 The problem is that the benefit of reducing TV, below should
9008 be increased if BL can actually be eliminated, but this means
9009 we might have to do a topological sort of the order in which
9010 we try to process biv. It doesn't seem worthwhile to do
9011 this sort of thing now. */
9012
9013 #if 0
9014 /* Otherwise the reg compared with had better be a biv. */
9015 if (!REG_P (arg)
9016 || REG_IV_TYPE (ivs, REGNO (arg)) != BASIC_INDUCT)
9017 return 0;
9018
9019 /* Look for a pair of givs, one for each biv,
9020 with identical coefficients. */
9021 for (v = bl->giv; v; v = v->next_iv)
9022 {
9023 struct induction *tv;
9024
9025 if (v->ignore || v->maybe_dead || v->mode != mode)
9026 continue;
9027
9028 for (tv = REG_IV_CLASS (ivs, REGNO (arg))->giv; tv;
9029 tv = tv->next_iv)
9030 if (! tv->ignore && ! tv->maybe_dead
9031 && rtx_equal_p (tv->mult_val, v->mult_val)
9032 && rtx_equal_p (tv->add_val, v->add_val)
9033 && tv->mode == mode)
9034 {
9035 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
9036 continue;
9037
9038 if (! eliminate_p)
9039 return 1;
9040
9041 /* Replace biv with its giv's reduced reg. */
9042 XEXP (x, 1 - arg_operand) = v->new_reg;
9043 /* Replace other operand with the other giv's
9044 reduced reg. */
9045 XEXP (x, arg_operand) = tv->new_reg;
9046 return 1;
9047 }
9048 }
9049 #endif
9050 }
9051
9052 /* If we get here, the biv can't be eliminated. */
9053 return 0;
9054
9055 case MEM:
9056 /* If this address is a DEST_ADDR giv, it doesn't matter if the
9057 biv is used in it, since it will be replaced. */
9058 for (v = bl->giv; v; v = v->next_iv)
9059 if (v->giv_type == DEST_ADDR && v->location == &XEXP (x, 0))
9060 return 1;
9061 break;
9062
9063 default:
9064 break;
9065 }
9066
9067 /* See if any subexpression fails elimination. */
9068 fmt = GET_RTX_FORMAT (code);
9069 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
9070 {
9071 switch (fmt[i])
9072 {
9073 case 'e':
9074 if (! maybe_eliminate_biv_1 (loop, XEXP (x, i), insn, bl,
9075 eliminate_p, where_bb, where_insn))
9076 return 0;
9077 break;
9078
9079 case 'E':
9080 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9081 if (! maybe_eliminate_biv_1 (loop, XVECEXP (x, i, j), insn, bl,
9082 eliminate_p, where_bb, where_insn))
9083 return 0;
9084 break;
9085 }
9086 }
9087
9088 return 1;
9089 }
9090 \f
9091 /* Return nonzero if the last use of REG
9092 is in an insn following INSN in the same basic block. */
9093
9094 static int
9095 last_use_this_basic_block (rtx reg, rtx insn)
9096 {
9097 rtx n;
9098 for (n = insn;
9099 n && GET_CODE (n) != CODE_LABEL && GET_CODE (n) != JUMP_INSN;
9100 n = NEXT_INSN (n))
9101 {
9102 if (REGNO_LAST_UID (REGNO (reg)) == INSN_UID (n))
9103 return 1;
9104 }
9105 return 0;
9106 }
9107 \f
9108 /* Called via `note_stores' to record the initial value of a biv. Here we
9109 just record the location of the set and process it later. */
9110
9111 static void
9112 record_initial (rtx dest, rtx set, void *data ATTRIBUTE_UNUSED)
9113 {
9114 struct loop_ivs *ivs = (struct loop_ivs *) data;
9115 struct iv_class *bl;
9116
9117 if (!REG_P (dest)
9118 || REGNO (dest) >= ivs->n_regs
9119 || REG_IV_TYPE (ivs, REGNO (dest)) != BASIC_INDUCT)
9120 return;
9121
9122 bl = REG_IV_CLASS (ivs, REGNO (dest));
9123
9124 /* If this is the first set found, record it. */
9125 if (bl->init_insn == 0)
9126 {
9127 bl->init_insn = note_insn;
9128 bl->init_set = set;
9129 }
9130 }
9131 \f
9132 /* If any of the registers in X are "old" and currently have a last use earlier
9133 than INSN, update them to have a last use of INSN. Their actual last use
9134 will be the previous insn but it will not have a valid uid_luid so we can't
9135 use it. X must be a source expression only. */
9136
9137 static void
9138 update_reg_last_use (rtx x, rtx insn)
9139 {
9140 /* Check for the case where INSN does not have a valid luid. In this case,
9141 there is no need to modify the regno_last_uid, as this can only happen
9142 when code is inserted after the loop_end to set a pseudo's final value,
9143 and hence this insn will never be the last use of x.
9144 ???? This comment is not correct. See for example loop_givs_reduce.
9145 This may insert an insn before another new insn. */
9146 if (REG_P (x) && REGNO (x) < max_reg_before_loop
9147 && INSN_UID (insn) < max_uid_for_loop
9148 && REGNO_LAST_LUID (REGNO (x)) < INSN_LUID (insn))
9149 {
9150 REGNO_LAST_UID (REGNO (x)) = INSN_UID (insn);
9151 }
9152 else
9153 {
9154 int i, j;
9155 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
9156 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9157 {
9158 if (fmt[i] == 'e')
9159 update_reg_last_use (XEXP (x, i), insn);
9160 else if (fmt[i] == 'E')
9161 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9162 update_reg_last_use (XVECEXP (x, i, j), insn);
9163 }
9164 }
9165 }
9166 \f
9167 /* Given an insn INSN and condition COND, return the condition in a
9168 canonical form to simplify testing by callers. Specifically:
9169
9170 (1) The code will always be a comparison operation (EQ, NE, GT, etc.).
9171 (2) Both operands will be machine operands; (cc0) will have been replaced.
9172 (3) If an operand is a constant, it will be the second operand.
9173 (4) (LE x const) will be replaced with (LT x <const+1>) and similarly
9174 for GE, GEU, and LEU.
9175
9176 If the condition cannot be understood, or is an inequality floating-point
9177 comparison which needs to be reversed, 0 will be returned.
9178
9179 If REVERSE is nonzero, then reverse the condition prior to canonizing it.
9180
9181 If EARLIEST is nonzero, it is a pointer to a place where the earliest
9182 insn used in locating the condition was found. If a replacement test
9183 of the condition is desired, it should be placed in front of that
9184 insn and we will be sure that the inputs are still valid.
9185
9186 If WANT_REG is nonzero, we wish the condition to be relative to that
9187 register, if possible. Therefore, do not canonicalize the condition
9188 further. If ALLOW_CC_MODE is nonzero, allow the condition returned
9189 to be a compare to a CC mode register. */
9190
9191 rtx
9192 canonicalize_condition (rtx insn, rtx cond, int reverse, rtx *earliest,
9193 rtx want_reg, int allow_cc_mode)
9194 {
9195 enum rtx_code code;
9196 rtx prev = insn;
9197 rtx set;
9198 rtx tem;
9199 rtx op0, op1;
9200 int reverse_code = 0;
9201 enum machine_mode mode;
9202
9203 code = GET_CODE (cond);
9204 mode = GET_MODE (cond);
9205 op0 = XEXP (cond, 0);
9206 op1 = XEXP (cond, 1);
9207
9208 if (reverse)
9209 code = reversed_comparison_code (cond, insn);
9210 if (code == UNKNOWN)
9211 return 0;
9212
9213 if (earliest)
9214 *earliest = insn;
9215
9216 /* If we are comparing a register with zero, see if the register is set
9217 in the previous insn to a COMPARE or a comparison operation. Perform
9218 the same tests as a function of STORE_FLAG_VALUE as find_comparison_args
9219 in cse.c */
9220
9221 while ((GET_RTX_CLASS (code) == RTX_COMPARE
9222 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
9223 && op1 == CONST0_RTX (GET_MODE (op0))
9224 && op0 != want_reg)
9225 {
9226 /* Set nonzero when we find something of interest. */
9227 rtx x = 0;
9228
9229 #ifdef HAVE_cc0
9230 /* If comparison with cc0, import actual comparison from compare
9231 insn. */
9232 if (op0 == cc0_rtx)
9233 {
9234 if ((prev = prev_nonnote_insn (prev)) == 0
9235 || GET_CODE (prev) != INSN
9236 || (set = single_set (prev)) == 0
9237 || SET_DEST (set) != cc0_rtx)
9238 return 0;
9239
9240 op0 = SET_SRC (set);
9241 op1 = CONST0_RTX (GET_MODE (op0));
9242 if (earliest)
9243 *earliest = prev;
9244 }
9245 #endif
9246
9247 /* If this is a COMPARE, pick up the two things being compared. */
9248 if (GET_CODE (op0) == COMPARE)
9249 {
9250 op1 = XEXP (op0, 1);
9251 op0 = XEXP (op0, 0);
9252 continue;
9253 }
9254 else if (!REG_P (op0))
9255 break;
9256
9257 /* Go back to the previous insn. Stop if it is not an INSN. We also
9258 stop if it isn't a single set or if it has a REG_INC note because
9259 we don't want to bother dealing with it. */
9260
9261 if ((prev = prev_nonnote_insn (prev)) == 0
9262 || GET_CODE (prev) != INSN
9263 || FIND_REG_INC_NOTE (prev, NULL_RTX))
9264 break;
9265
9266 set = set_of (op0, prev);
9267
9268 if (set
9269 && (GET_CODE (set) != SET
9270 || !rtx_equal_p (SET_DEST (set), op0)))
9271 break;
9272
9273 /* If this is setting OP0, get what it sets it to if it looks
9274 relevant. */
9275 if (set)
9276 {
9277 enum machine_mode inner_mode = GET_MODE (SET_DEST (set));
9278 #ifdef FLOAT_STORE_FLAG_VALUE
9279 REAL_VALUE_TYPE fsfv;
9280 #endif
9281
9282 /* ??? We may not combine comparisons done in a CCmode with
9283 comparisons not done in a CCmode. This is to aid targets
9284 like Alpha that have an IEEE compliant EQ instruction, and
9285 a non-IEEE compliant BEQ instruction. The use of CCmode is
9286 actually artificial, simply to prevent the combination, but
9287 should not affect other platforms.
9288
9289 However, we must allow VOIDmode comparisons to match either
9290 CCmode or non-CCmode comparison, because some ports have
9291 modeless comparisons inside branch patterns.
9292
9293 ??? This mode check should perhaps look more like the mode check
9294 in simplify_comparison in combine. */
9295
9296 if ((GET_CODE (SET_SRC (set)) == COMPARE
9297 || (((code == NE
9298 || (code == LT
9299 && GET_MODE_CLASS (inner_mode) == MODE_INT
9300 && (GET_MODE_BITSIZE (inner_mode)
9301 <= HOST_BITS_PER_WIDE_INT)
9302 && (STORE_FLAG_VALUE
9303 & ((HOST_WIDE_INT) 1
9304 << (GET_MODE_BITSIZE (inner_mode) - 1))))
9305 #ifdef FLOAT_STORE_FLAG_VALUE
9306 || (code == LT
9307 && GET_MODE_CLASS (inner_mode) == MODE_FLOAT
9308 && (fsfv = FLOAT_STORE_FLAG_VALUE (inner_mode),
9309 REAL_VALUE_NEGATIVE (fsfv)))
9310 #endif
9311 ))
9312 && COMPARISON_P (SET_SRC (set))))
9313 && (((GET_MODE_CLASS (mode) == MODE_CC)
9314 == (GET_MODE_CLASS (inner_mode) == MODE_CC))
9315 || mode == VOIDmode || inner_mode == VOIDmode))
9316 x = SET_SRC (set);
9317 else if (((code == EQ
9318 || (code == GE
9319 && (GET_MODE_BITSIZE (inner_mode)
9320 <= HOST_BITS_PER_WIDE_INT)
9321 && GET_MODE_CLASS (inner_mode) == MODE_INT
9322 && (STORE_FLAG_VALUE
9323 & ((HOST_WIDE_INT) 1
9324 << (GET_MODE_BITSIZE (inner_mode) - 1))))
9325 #ifdef FLOAT_STORE_FLAG_VALUE
9326 || (code == GE
9327 && GET_MODE_CLASS (inner_mode) == MODE_FLOAT
9328 && (fsfv = FLOAT_STORE_FLAG_VALUE (inner_mode),
9329 REAL_VALUE_NEGATIVE (fsfv)))
9330 #endif
9331 ))
9332 && COMPARISON_P (SET_SRC (set))
9333 && (((GET_MODE_CLASS (mode) == MODE_CC)
9334 == (GET_MODE_CLASS (inner_mode) == MODE_CC))
9335 || mode == VOIDmode || inner_mode == VOIDmode))
9336
9337 {
9338 reverse_code = 1;
9339 x = SET_SRC (set);
9340 }
9341 else
9342 break;
9343 }
9344
9345 else if (reg_set_p (op0, prev))
9346 /* If this sets OP0, but not directly, we have to give up. */
9347 break;
9348
9349 if (x)
9350 {
9351 if (COMPARISON_P (x))
9352 code = GET_CODE (x);
9353 if (reverse_code)
9354 {
9355 code = reversed_comparison_code (x, prev);
9356 if (code == UNKNOWN)
9357 return 0;
9358 reverse_code = 0;
9359 }
9360
9361 op0 = XEXP (x, 0), op1 = XEXP (x, 1);
9362 if (earliest)
9363 *earliest = prev;
9364 }
9365 }
9366
9367 /* If constant is first, put it last. */
9368 if (CONSTANT_P (op0))
9369 code = swap_condition (code), tem = op0, op0 = op1, op1 = tem;
9370
9371 /* If OP0 is the result of a comparison, we weren't able to find what
9372 was really being compared, so fail. */
9373 if (!allow_cc_mode
9374 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
9375 return 0;
9376
9377 /* Canonicalize any ordered comparison with integers involving equality
9378 if we can do computations in the relevant mode and we do not
9379 overflow. */
9380
9381 if (GET_MODE_CLASS (GET_MODE (op0)) != MODE_CC
9382 && GET_CODE (op1) == CONST_INT
9383 && GET_MODE (op0) != VOIDmode
9384 && GET_MODE_BITSIZE (GET_MODE (op0)) <= HOST_BITS_PER_WIDE_INT)
9385 {
9386 HOST_WIDE_INT const_val = INTVAL (op1);
9387 unsigned HOST_WIDE_INT uconst_val = const_val;
9388 unsigned HOST_WIDE_INT max_val
9389 = (unsigned HOST_WIDE_INT) GET_MODE_MASK (GET_MODE (op0));
9390
9391 switch (code)
9392 {
9393 case LE:
9394 if ((unsigned HOST_WIDE_INT) const_val != max_val >> 1)
9395 code = LT, op1 = gen_int_mode (const_val + 1, GET_MODE (op0));
9396 break;
9397
9398 /* When cross-compiling, const_val might be sign-extended from
9399 BITS_PER_WORD to HOST_BITS_PER_WIDE_INT */
9400 case GE:
9401 if ((HOST_WIDE_INT) (const_val & max_val)
9402 != (((HOST_WIDE_INT) 1
9403 << (GET_MODE_BITSIZE (GET_MODE (op0)) - 1))))
9404 code = GT, op1 = gen_int_mode (const_val - 1, GET_MODE (op0));
9405 break;
9406
9407 case LEU:
9408 if (uconst_val < max_val)
9409 code = LTU, op1 = gen_int_mode (uconst_val + 1, GET_MODE (op0));
9410 break;
9411
9412 case GEU:
9413 if (uconst_val != 0)
9414 code = GTU, op1 = gen_int_mode (uconst_val - 1, GET_MODE (op0));
9415 break;
9416
9417 default:
9418 break;
9419 }
9420 }
9421
9422 /* Never return CC0; return zero instead. */
9423 if (CC0_P (op0))
9424 return 0;
9425
9426 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
9427 }
9428
9429 /* Given a jump insn JUMP, return the condition that will cause it to branch
9430 to its JUMP_LABEL. If the condition cannot be understood, or is an
9431 inequality floating-point comparison which needs to be reversed, 0 will
9432 be returned.
9433
9434 If EARLIEST is nonzero, it is a pointer to a place where the earliest
9435 insn used in locating the condition was found. If a replacement test
9436 of the condition is desired, it should be placed in front of that
9437 insn and we will be sure that the inputs are still valid.
9438
9439 If ALLOW_CC_MODE is nonzero, allow the condition returned to be a
9440 compare CC mode register. */
9441
9442 rtx
9443 get_condition (rtx jump, rtx *earliest, int allow_cc_mode)
9444 {
9445 rtx cond;
9446 int reverse;
9447 rtx set;
9448
9449 /* If this is not a standard conditional jump, we can't parse it. */
9450 if (GET_CODE (jump) != JUMP_INSN
9451 || ! any_condjump_p (jump))
9452 return 0;
9453 set = pc_set (jump);
9454
9455 cond = XEXP (SET_SRC (set), 0);
9456
9457 /* If this branches to JUMP_LABEL when the condition is false, reverse
9458 the condition. */
9459 reverse
9460 = GET_CODE (XEXP (SET_SRC (set), 2)) == LABEL_REF
9461 && XEXP (XEXP (SET_SRC (set), 2), 0) == JUMP_LABEL (jump);
9462
9463 return canonicalize_condition (jump, cond, reverse, earliest, NULL_RTX,
9464 allow_cc_mode);
9465 }
9466
9467 /* Similar to above routine, except that we also put an invariant last
9468 unless both operands are invariants. */
9469
9470 rtx
9471 get_condition_for_loop (const struct loop *loop, rtx x)
9472 {
9473 rtx comparison = get_condition (x, (rtx*) 0, false);
9474
9475 if (comparison == 0
9476 || ! loop_invariant_p (loop, XEXP (comparison, 0))
9477 || loop_invariant_p (loop, XEXP (comparison, 1)))
9478 return comparison;
9479
9480 return gen_rtx_fmt_ee (swap_condition (GET_CODE (comparison)), VOIDmode,
9481 XEXP (comparison, 1), XEXP (comparison, 0));
9482 }
9483
9484 /* Scan the function and determine whether it has indirect (computed) jumps.
9485
9486 This is taken mostly from flow.c; similar code exists elsewhere
9487 in the compiler. It may be useful to put this into rtlanal.c. */
9488 static int
9489 indirect_jump_in_function_p (rtx start)
9490 {
9491 rtx insn;
9492
9493 for (insn = start; insn; insn = NEXT_INSN (insn))
9494 if (computed_jump_p (insn))
9495 return 1;
9496
9497 return 0;
9498 }
9499
9500 /* Add MEM to the LOOP_MEMS array, if appropriate. See the
9501 documentation for LOOP_MEMS for the definition of `appropriate'.
9502 This function is called from prescan_loop via for_each_rtx. */
9503
9504 static int
9505 insert_loop_mem (rtx *mem, void *data ATTRIBUTE_UNUSED)
9506 {
9507 struct loop_info *loop_info = data;
9508 int i;
9509 rtx m = *mem;
9510
9511 if (m == NULL_RTX)
9512 return 0;
9513
9514 switch (GET_CODE (m))
9515 {
9516 case MEM:
9517 break;
9518
9519 case CLOBBER:
9520 /* We're not interested in MEMs that are only clobbered. */
9521 return -1;
9522
9523 case CONST_DOUBLE:
9524 /* We're not interested in the MEM associated with a
9525 CONST_DOUBLE, so there's no need to traverse into this. */
9526 return -1;
9527
9528 case EXPR_LIST:
9529 /* We're not interested in any MEMs that only appear in notes. */
9530 return -1;
9531
9532 default:
9533 /* This is not a MEM. */
9534 return 0;
9535 }
9536
9537 /* See if we've already seen this MEM. */
9538 for (i = 0; i < loop_info->mems_idx; ++i)
9539 if (rtx_equal_p (m, loop_info->mems[i].mem))
9540 {
9541 if (MEM_VOLATILE_P (m) && !MEM_VOLATILE_P (loop_info->mems[i].mem))
9542 loop_info->mems[i].mem = m;
9543 if (GET_MODE (m) != GET_MODE (loop_info->mems[i].mem))
9544 /* The modes of the two memory accesses are different. If
9545 this happens, something tricky is going on, and we just
9546 don't optimize accesses to this MEM. */
9547 loop_info->mems[i].optimize = 0;
9548
9549 return 0;
9550 }
9551
9552 /* Resize the array, if necessary. */
9553 if (loop_info->mems_idx == loop_info->mems_allocated)
9554 {
9555 if (loop_info->mems_allocated != 0)
9556 loop_info->mems_allocated *= 2;
9557 else
9558 loop_info->mems_allocated = 32;
9559
9560 loop_info->mems = xrealloc (loop_info->mems,
9561 loop_info->mems_allocated * sizeof (loop_mem_info));
9562 }
9563
9564 /* Actually insert the MEM. */
9565 loop_info->mems[loop_info->mems_idx].mem = m;
9566 /* We can't hoist this MEM out of the loop if it's a BLKmode MEM
9567 because we can't put it in a register. We still store it in the
9568 table, though, so that if we see the same address later, but in a
9569 non-BLK mode, we'll not think we can optimize it at that point. */
9570 loop_info->mems[loop_info->mems_idx].optimize = (GET_MODE (m) != BLKmode);
9571 loop_info->mems[loop_info->mems_idx].reg = NULL_RTX;
9572 ++loop_info->mems_idx;
9573
9574 return 0;
9575 }
9576
9577
9578 /* Allocate REGS->ARRAY or reallocate it if it is too small.
9579
9580 Increment REGS->ARRAY[I].SET_IN_LOOP at the index I of each
9581 register that is modified by an insn between FROM and TO. If the
9582 value of an element of REGS->array[I].SET_IN_LOOP becomes 127 or
9583 more, stop incrementing it, to avoid overflow.
9584
9585 Store in REGS->ARRAY[I].SINGLE_USAGE the single insn in which
9586 register I is used, if it is only used once. Otherwise, it is set
9587 to 0 (for no uses) or const0_rtx for more than one use. This
9588 parameter may be zero, in which case this processing is not done.
9589
9590 Set REGS->ARRAY[I].MAY_NOT_OPTIMIZE nonzero if we should not
9591 optimize register I. */
9592
9593 static void
9594 loop_regs_scan (const struct loop *loop, int extra_size)
9595 {
9596 struct loop_regs *regs = LOOP_REGS (loop);
9597 int old_nregs;
9598 /* last_set[n] is nonzero iff reg n has been set in the current
9599 basic block. In that case, it is the insn that last set reg n. */
9600 rtx *last_set;
9601 rtx insn;
9602 int i;
9603
9604 old_nregs = regs->num;
9605 regs->num = max_reg_num ();
9606
9607 /* Grow the regs array if not allocated or too small. */
9608 if (regs->num >= regs->size)
9609 {
9610 regs->size = regs->num + extra_size;
9611
9612 regs->array = xrealloc (regs->array, regs->size * sizeof (*regs->array));
9613
9614 /* Zero the new elements. */
9615 memset (regs->array + old_nregs, 0,
9616 (regs->size - old_nregs) * sizeof (*regs->array));
9617 }
9618
9619 /* Clear previously scanned fields but do not clear n_times_set. */
9620 for (i = 0; i < old_nregs; i++)
9621 {
9622 regs->array[i].set_in_loop = 0;
9623 regs->array[i].may_not_optimize = 0;
9624 regs->array[i].single_usage = NULL_RTX;
9625 }
9626
9627 last_set = xcalloc (regs->num, sizeof (rtx));
9628
9629 /* Scan the loop, recording register usage. */
9630 for (insn = loop->top ? loop->top : loop->start; insn != loop->end;
9631 insn = NEXT_INSN (insn))
9632 {
9633 if (INSN_P (insn))
9634 {
9635 /* Record registers that have exactly one use. */
9636 find_single_use_in_loop (regs, insn, PATTERN (insn));
9637
9638 /* Include uses in REG_EQUAL notes. */
9639 if (REG_NOTES (insn))
9640 find_single_use_in_loop (regs, insn, REG_NOTES (insn));
9641
9642 if (GET_CODE (PATTERN (insn)) == SET
9643 || GET_CODE (PATTERN (insn)) == CLOBBER)
9644 count_one_set (regs, insn, PATTERN (insn), last_set);
9645 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
9646 {
9647 int i;
9648 for (i = XVECLEN (PATTERN (insn), 0) - 1; i >= 0; i--)
9649 count_one_set (regs, insn, XVECEXP (PATTERN (insn), 0, i),
9650 last_set);
9651 }
9652 }
9653
9654 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9655 memset (last_set, 0, regs->num * sizeof (rtx));
9656
9657 /* Invalidate all registers used for function argument passing.
9658 We check rtx_varies_p for the same reason as below, to allow
9659 optimizing PIC calculations. */
9660 if (GET_CODE (insn) == CALL_INSN)
9661 {
9662 rtx link;
9663 for (link = CALL_INSN_FUNCTION_USAGE (insn);
9664 link;
9665 link = XEXP (link, 1))
9666 {
9667 rtx op, reg;
9668
9669 if (GET_CODE (op = XEXP (link, 0)) == USE
9670 && REG_P (reg = XEXP (op, 0))
9671 && rtx_varies_p (reg, 1))
9672 regs->array[REGNO (reg)].may_not_optimize = 1;
9673 }
9674 }
9675 }
9676
9677 /* Invalidate all hard registers clobbered by calls. With one exception:
9678 a call-clobbered PIC register is still function-invariant for our
9679 purposes, since we can hoist any PIC calculations out of the loop.
9680 Thus the call to rtx_varies_p. */
9681 if (LOOP_INFO (loop)->has_call)
9682 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
9683 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i)
9684 && rtx_varies_p (regno_reg_rtx[i], 1))
9685 {
9686 regs->array[i].may_not_optimize = 1;
9687 regs->array[i].set_in_loop = 1;
9688 }
9689
9690 #ifdef AVOID_CCMODE_COPIES
9691 /* Don't try to move insns which set CC registers if we should not
9692 create CCmode register copies. */
9693 for (i = regs->num - 1; i >= FIRST_PSEUDO_REGISTER; i--)
9694 if (GET_MODE_CLASS (GET_MODE (regno_reg_rtx[i])) == MODE_CC)
9695 regs->array[i].may_not_optimize = 1;
9696 #endif
9697
9698 /* Set regs->array[I].n_times_set for the new registers. */
9699 for (i = old_nregs; i < regs->num; i++)
9700 regs->array[i].n_times_set = regs->array[i].set_in_loop;
9701
9702 free (last_set);
9703 }
9704
9705 /* Returns the number of real INSNs in the LOOP. */
9706
9707 static int
9708 count_insns_in_loop (const struct loop *loop)
9709 {
9710 int count = 0;
9711 rtx insn;
9712
9713 for (insn = loop->top ? loop->top : loop->start; insn != loop->end;
9714 insn = NEXT_INSN (insn))
9715 if (INSN_P (insn))
9716 ++count;
9717
9718 return count;
9719 }
9720
9721 /* Move MEMs into registers for the duration of the loop. */
9722
9723 static void
9724 load_mems (const struct loop *loop)
9725 {
9726 struct loop_info *loop_info = LOOP_INFO (loop);
9727 struct loop_regs *regs = LOOP_REGS (loop);
9728 int maybe_never = 0;
9729 int i;
9730 rtx p, prev_ebb_head;
9731 rtx label = NULL_RTX;
9732 rtx end_label;
9733 /* Nonzero if the next instruction may never be executed. */
9734 int next_maybe_never = 0;
9735 unsigned int last_max_reg = max_reg_num ();
9736
9737 if (loop_info->mems_idx == 0)
9738 return;
9739
9740 /* We cannot use next_label here because it skips over normal insns. */
9741 end_label = next_nonnote_insn (loop->end);
9742 if (end_label && GET_CODE (end_label) != CODE_LABEL)
9743 end_label = NULL_RTX;
9744
9745 /* Check to see if it's possible that some instructions in the loop are
9746 never executed. Also check if there is a goto out of the loop other
9747 than right after the end of the loop. */
9748 for (p = next_insn_in_loop (loop, loop->scan_start);
9749 p != NULL_RTX;
9750 p = next_insn_in_loop (loop, p))
9751 {
9752 if (GET_CODE (p) == CODE_LABEL)
9753 maybe_never = 1;
9754 else if (GET_CODE (p) == JUMP_INSN
9755 /* If we enter the loop in the middle, and scan
9756 around to the beginning, don't set maybe_never
9757 for that. This must be an unconditional jump,
9758 otherwise the code at the top of the loop might
9759 never be executed. Unconditional jumps are
9760 followed a by barrier then loop end. */
9761 && ! (GET_CODE (p) == JUMP_INSN
9762 && JUMP_LABEL (p) == loop->top
9763 && NEXT_INSN (NEXT_INSN (p)) == loop->end
9764 && any_uncondjump_p (p)))
9765 {
9766 /* If this is a jump outside of the loop but not right
9767 after the end of the loop, we would have to emit new fixup
9768 sequences for each such label. */
9769 if (/* If we can't tell where control might go when this
9770 JUMP_INSN is executed, we must be conservative. */
9771 !JUMP_LABEL (p)
9772 || (JUMP_LABEL (p) != end_label
9773 && (INSN_UID (JUMP_LABEL (p)) >= max_uid_for_loop
9774 || INSN_LUID (JUMP_LABEL (p)) < INSN_LUID (loop->start)
9775 || INSN_LUID (JUMP_LABEL (p)) > INSN_LUID (loop->end))))
9776 return;
9777
9778 if (!any_condjump_p (p))
9779 /* Something complicated. */
9780 maybe_never = 1;
9781 else
9782 /* If there are any more instructions in the loop, they
9783 might not be reached. */
9784 next_maybe_never = 1;
9785 }
9786 else if (next_maybe_never)
9787 maybe_never = 1;
9788 }
9789
9790 /* Find start of the extended basic block that enters the loop. */
9791 for (p = loop->start;
9792 PREV_INSN (p) && GET_CODE (p) != CODE_LABEL;
9793 p = PREV_INSN (p))
9794 ;
9795 prev_ebb_head = p;
9796
9797 cselib_init (true);
9798
9799 /* Build table of mems that get set to constant values before the
9800 loop. */
9801 for (; p != loop->start; p = NEXT_INSN (p))
9802 cselib_process_insn (p);
9803
9804 /* Actually move the MEMs. */
9805 for (i = 0; i < loop_info->mems_idx; ++i)
9806 {
9807 regset_head load_copies;
9808 regset_head store_copies;
9809 int written = 0;
9810 rtx reg;
9811 rtx mem = loop_info->mems[i].mem;
9812 rtx mem_list_entry;
9813
9814 if (MEM_VOLATILE_P (mem)
9815 || loop_invariant_p (loop, XEXP (mem, 0)) != 1)
9816 /* There's no telling whether or not MEM is modified. */
9817 loop_info->mems[i].optimize = 0;
9818
9819 /* Go through the MEMs written to in the loop to see if this
9820 one is aliased by one of them. */
9821 mem_list_entry = loop_info->store_mems;
9822 while (mem_list_entry)
9823 {
9824 if (rtx_equal_p (mem, XEXP (mem_list_entry, 0)))
9825 written = 1;
9826 else if (true_dependence (XEXP (mem_list_entry, 0), VOIDmode,
9827 mem, rtx_varies_p))
9828 {
9829 /* MEM is indeed aliased by this store. */
9830 loop_info->mems[i].optimize = 0;
9831 break;
9832 }
9833 mem_list_entry = XEXP (mem_list_entry, 1);
9834 }
9835
9836 if (flag_float_store && written
9837 && GET_MODE_CLASS (GET_MODE (mem)) == MODE_FLOAT)
9838 loop_info->mems[i].optimize = 0;
9839
9840 /* If this MEM is written to, we must be sure that there
9841 are no reads from another MEM that aliases this one. */
9842 if (loop_info->mems[i].optimize && written)
9843 {
9844 int j;
9845
9846 for (j = 0; j < loop_info->mems_idx; ++j)
9847 {
9848 if (j == i)
9849 continue;
9850 else if (true_dependence (mem,
9851 VOIDmode,
9852 loop_info->mems[j].mem,
9853 rtx_varies_p))
9854 {
9855 /* It's not safe to hoist loop_info->mems[i] out of
9856 the loop because writes to it might not be
9857 seen by reads from loop_info->mems[j]. */
9858 loop_info->mems[i].optimize = 0;
9859 break;
9860 }
9861 }
9862 }
9863
9864 if (maybe_never && may_trap_p (mem))
9865 /* We can't access the MEM outside the loop; it might
9866 cause a trap that wouldn't have happened otherwise. */
9867 loop_info->mems[i].optimize = 0;
9868
9869 if (!loop_info->mems[i].optimize)
9870 /* We thought we were going to lift this MEM out of the
9871 loop, but later discovered that we could not. */
9872 continue;
9873
9874 INIT_REG_SET (&load_copies);
9875 INIT_REG_SET (&store_copies);
9876
9877 /* Allocate a pseudo for this MEM. We set REG_USERVAR_P in
9878 order to keep scan_loop from moving stores to this MEM
9879 out of the loop just because this REG is neither a
9880 user-variable nor used in the loop test. */
9881 reg = gen_reg_rtx (GET_MODE (mem));
9882 REG_USERVAR_P (reg) = 1;
9883 loop_info->mems[i].reg = reg;
9884
9885 /* Now, replace all references to the MEM with the
9886 corresponding pseudos. */
9887 maybe_never = 0;
9888 for (p = next_insn_in_loop (loop, loop->scan_start);
9889 p != NULL_RTX;
9890 p = next_insn_in_loop (loop, p))
9891 {
9892 if (INSN_P (p))
9893 {
9894 rtx set;
9895
9896 set = single_set (p);
9897
9898 /* See if this copies the mem into a register that isn't
9899 modified afterwards. We'll try to do copy propagation
9900 a little further on. */
9901 if (set
9902 /* @@@ This test is _way_ too conservative. */
9903 && ! maybe_never
9904 && REG_P (SET_DEST (set))
9905 && REGNO (SET_DEST (set)) >= FIRST_PSEUDO_REGISTER
9906 && REGNO (SET_DEST (set)) < last_max_reg
9907 && regs->array[REGNO (SET_DEST (set))].n_times_set == 1
9908 && rtx_equal_p (SET_SRC (set), mem))
9909 SET_REGNO_REG_SET (&load_copies, REGNO (SET_DEST (set)));
9910
9911 /* See if this copies the mem from a register that isn't
9912 modified afterwards. We'll try to remove the
9913 redundant copy later on by doing a little register
9914 renaming and copy propagation. This will help
9915 to untangle things for the BIV detection code. */
9916 if (set
9917 && ! maybe_never
9918 && REG_P (SET_SRC (set))
9919 && REGNO (SET_SRC (set)) >= FIRST_PSEUDO_REGISTER
9920 && REGNO (SET_SRC (set)) < last_max_reg
9921 && regs->array[REGNO (SET_SRC (set))].n_times_set == 1
9922 && rtx_equal_p (SET_DEST (set), mem))
9923 SET_REGNO_REG_SET (&store_copies, REGNO (SET_SRC (set)));
9924
9925 /* If this is a call which uses / clobbers this memory
9926 location, we must not change the interface here. */
9927 if (GET_CODE (p) == CALL_INSN
9928 && reg_mentioned_p (loop_info->mems[i].mem,
9929 CALL_INSN_FUNCTION_USAGE (p)))
9930 {
9931 cancel_changes (0);
9932 loop_info->mems[i].optimize = 0;
9933 break;
9934 }
9935 else
9936 /* Replace the memory reference with the shadow register. */
9937 replace_loop_mems (p, loop_info->mems[i].mem,
9938 loop_info->mems[i].reg, written);
9939 }
9940
9941 if (GET_CODE (p) == CODE_LABEL
9942 || GET_CODE (p) == JUMP_INSN)
9943 maybe_never = 1;
9944 }
9945
9946 if (! loop_info->mems[i].optimize)
9947 ; /* We found we couldn't do the replacement, so do nothing. */
9948 else if (! apply_change_group ())
9949 /* We couldn't replace all occurrences of the MEM. */
9950 loop_info->mems[i].optimize = 0;
9951 else
9952 {
9953 /* Load the memory immediately before LOOP->START, which is
9954 the NOTE_LOOP_BEG. */
9955 cselib_val *e = cselib_lookup (mem, VOIDmode, 0);
9956 rtx set;
9957 rtx best = mem;
9958 int j;
9959 struct elt_loc_list *const_equiv = 0;
9960
9961 if (e)
9962 {
9963 struct elt_loc_list *equiv;
9964 struct elt_loc_list *best_equiv = 0;
9965 for (equiv = e->locs; equiv; equiv = equiv->next)
9966 {
9967 if (CONSTANT_P (equiv->loc))
9968 const_equiv = equiv;
9969 else if (REG_P (equiv->loc)
9970 /* Extending hard register lifetimes causes crash
9971 on SRC targets. Doing so on non-SRC is
9972 probably also not good idea, since we most
9973 probably have pseudoregister equivalence as
9974 well. */
9975 && REGNO (equiv->loc) >= FIRST_PSEUDO_REGISTER)
9976 best_equiv = equiv;
9977 }
9978 /* Use the constant equivalence if that is cheap enough. */
9979 if (! best_equiv)
9980 best_equiv = const_equiv;
9981 else if (const_equiv
9982 && (rtx_cost (const_equiv->loc, SET)
9983 <= rtx_cost (best_equiv->loc, SET)))
9984 {
9985 best_equiv = const_equiv;
9986 const_equiv = 0;
9987 }
9988
9989 /* If best_equiv is nonzero, we know that MEM is set to a
9990 constant or register before the loop. We will use this
9991 knowledge to initialize the shadow register with that
9992 constant or reg rather than by loading from MEM. */
9993 if (best_equiv)
9994 best = copy_rtx (best_equiv->loc);
9995 }
9996
9997 set = gen_move_insn (reg, best);
9998 set = loop_insn_hoist (loop, set);
9999 if (REG_P (best))
10000 {
10001 for (p = prev_ebb_head; p != loop->start; p = NEXT_INSN (p))
10002 if (REGNO_LAST_UID (REGNO (best)) == INSN_UID (p))
10003 {
10004 REGNO_LAST_UID (REGNO (best)) = INSN_UID (set);
10005 break;
10006 }
10007 }
10008
10009 if (const_equiv)
10010 set_unique_reg_note (set, REG_EQUAL, copy_rtx (const_equiv->loc));
10011
10012 if (written)
10013 {
10014 if (label == NULL_RTX)
10015 {
10016 label = gen_label_rtx ();
10017 emit_label_after (label, loop->end);
10018 }
10019
10020 /* Store the memory immediately after END, which is
10021 the NOTE_LOOP_END. */
10022 set = gen_move_insn (copy_rtx (mem), reg);
10023 loop_insn_emit_after (loop, 0, label, set);
10024 }
10025
10026 if (loop_dump_stream)
10027 {
10028 fprintf (loop_dump_stream, "Hoisted regno %d %s from ",
10029 REGNO (reg), (written ? "r/w" : "r/o"));
10030 print_rtl (loop_dump_stream, mem);
10031 fputc ('\n', loop_dump_stream);
10032 }
10033
10034 /* Attempt a bit of copy propagation. This helps untangle the
10035 data flow, and enables {basic,general}_induction_var to find
10036 more bivs/givs. */
10037 EXECUTE_IF_SET_IN_REG_SET
10038 (&load_copies, FIRST_PSEUDO_REGISTER, j,
10039 {
10040 try_copy_prop (loop, reg, j);
10041 });
10042 CLEAR_REG_SET (&load_copies);
10043
10044 EXECUTE_IF_SET_IN_REG_SET
10045 (&store_copies, FIRST_PSEUDO_REGISTER, j,
10046 {
10047 try_swap_copy_prop (loop, reg, j);
10048 });
10049 CLEAR_REG_SET (&store_copies);
10050 }
10051 }
10052
10053 /* Now, we need to replace all references to the previous exit
10054 label with the new one. */
10055 if (label != NULL_RTX && end_label != NULL_RTX)
10056 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
10057 if (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p) == end_label)
10058 redirect_jump (p, label, false);
10059
10060 cselib_finish ();
10061 }
10062
10063 /* For communication between note_reg_stored and its caller. */
10064 struct note_reg_stored_arg
10065 {
10066 int set_seen;
10067 rtx reg;
10068 };
10069
10070 /* Called via note_stores, record in SET_SEEN whether X, which is written,
10071 is equal to ARG. */
10072 static void
10073 note_reg_stored (rtx x, rtx setter ATTRIBUTE_UNUSED, void *arg)
10074 {
10075 struct note_reg_stored_arg *t = (struct note_reg_stored_arg *) arg;
10076 if (t->reg == x)
10077 t->set_seen = 1;
10078 }
10079
10080 /* Try to replace every occurrence of pseudo REGNO with REPLACEMENT.
10081 There must be exactly one insn that sets this pseudo; it will be
10082 deleted if all replacements succeed and we can prove that the register
10083 is not used after the loop. */
10084
10085 static void
10086 try_copy_prop (const struct loop *loop, rtx replacement, unsigned int regno)
10087 {
10088 /* This is the reg that we are copying from. */
10089 rtx reg_rtx = regno_reg_rtx[regno];
10090 rtx init_insn = 0;
10091 rtx insn;
10092 /* These help keep track of whether we replaced all uses of the reg. */
10093 int replaced_last = 0;
10094 int store_is_first = 0;
10095
10096 for (insn = next_insn_in_loop (loop, loop->scan_start);
10097 insn != NULL_RTX;
10098 insn = next_insn_in_loop (loop, insn))
10099 {
10100 rtx set;
10101
10102 /* Only substitute within one extended basic block from the initializing
10103 insn. */
10104 if (GET_CODE (insn) == CODE_LABEL && init_insn)
10105 break;
10106
10107 if (! INSN_P (insn))
10108 continue;
10109
10110 /* Is this the initializing insn? */
10111 set = single_set (insn);
10112 if (set
10113 && REG_P (SET_DEST (set))
10114 && REGNO (SET_DEST (set)) == regno)
10115 {
10116 if (init_insn)
10117 abort ();
10118
10119 init_insn = insn;
10120 if (REGNO_FIRST_UID (regno) == INSN_UID (insn))
10121 store_is_first = 1;
10122 }
10123
10124 /* Only substitute after seeing the initializing insn. */
10125 if (init_insn && insn != init_insn)
10126 {
10127 struct note_reg_stored_arg arg;
10128
10129 replace_loop_regs (insn, reg_rtx, replacement);
10130 if (REGNO_LAST_UID (regno) == INSN_UID (insn))
10131 replaced_last = 1;
10132
10133 /* Stop replacing when REPLACEMENT is modified. */
10134 arg.reg = replacement;
10135 arg.set_seen = 0;
10136 note_stores (PATTERN (insn), note_reg_stored, &arg);
10137 if (arg.set_seen)
10138 {
10139 rtx note = find_reg_note (insn, REG_EQUAL, NULL);
10140
10141 /* It is possible that we've turned previously valid REG_EQUAL to
10142 invalid, as we change the REGNO to REPLACEMENT and unlike REGNO,
10143 REPLACEMENT is modified, we get different meaning. */
10144 if (note && reg_mentioned_p (replacement, XEXP (note, 0)))
10145 remove_note (insn, note);
10146 break;
10147 }
10148 }
10149 }
10150 if (! init_insn)
10151 abort ();
10152 if (apply_change_group ())
10153 {
10154 if (loop_dump_stream)
10155 fprintf (loop_dump_stream, " Replaced reg %d", regno);
10156 if (store_is_first && replaced_last)
10157 {
10158 rtx first;
10159 rtx retval_note;
10160
10161 /* Assume we're just deleting INIT_INSN. */
10162 first = init_insn;
10163 /* Look for REG_RETVAL note. If we're deleting the end of
10164 the libcall sequence, the whole sequence can go. */
10165 retval_note = find_reg_note (init_insn, REG_RETVAL, NULL_RTX);
10166 /* If we found a REG_RETVAL note, find the first instruction
10167 in the sequence. */
10168 if (retval_note)
10169 first = XEXP (retval_note, 0);
10170
10171 /* Delete the instructions. */
10172 loop_delete_insns (first, init_insn);
10173 }
10174 if (loop_dump_stream)
10175 fprintf (loop_dump_stream, ".\n");
10176 }
10177 }
10178
10179 /* Replace all the instructions from FIRST up to and including LAST
10180 with NOTE_INSN_DELETED notes. */
10181
10182 static void
10183 loop_delete_insns (rtx first, rtx last)
10184 {
10185 while (1)
10186 {
10187 if (loop_dump_stream)
10188 fprintf (loop_dump_stream, ", deleting init_insn (%d)",
10189 INSN_UID (first));
10190 delete_insn (first);
10191
10192 /* If this was the LAST instructions we're supposed to delete,
10193 we're done. */
10194 if (first == last)
10195 break;
10196
10197 first = NEXT_INSN (first);
10198 }
10199 }
10200
10201 /* Try to replace occurrences of pseudo REGNO with REPLACEMENT within
10202 loop LOOP if the order of the sets of these registers can be
10203 swapped. There must be exactly one insn within the loop that sets
10204 this pseudo followed immediately by a move insn that sets
10205 REPLACEMENT with REGNO. */
10206 static void
10207 try_swap_copy_prop (const struct loop *loop, rtx replacement,
10208 unsigned int regno)
10209 {
10210 rtx insn;
10211 rtx set = NULL_RTX;
10212 unsigned int new_regno;
10213
10214 new_regno = REGNO (replacement);
10215
10216 for (insn = next_insn_in_loop (loop, loop->scan_start);
10217 insn != NULL_RTX;
10218 insn = next_insn_in_loop (loop, insn))
10219 {
10220 /* Search for the insn that copies REGNO to NEW_REGNO? */
10221 if (INSN_P (insn)
10222 && (set = single_set (insn))
10223 && REG_P (SET_DEST (set))
10224 && REGNO (SET_DEST (set)) == new_regno
10225 && REG_P (SET_SRC (set))
10226 && REGNO (SET_SRC (set)) == regno)
10227 break;
10228 }
10229
10230 if (insn != NULL_RTX)
10231 {
10232 rtx prev_insn;
10233 rtx prev_set;
10234
10235 /* Some DEF-USE info would come in handy here to make this
10236 function more general. For now, just check the previous insn
10237 which is the most likely candidate for setting REGNO. */
10238
10239 prev_insn = PREV_INSN (insn);
10240
10241 if (INSN_P (insn)
10242 && (prev_set = single_set (prev_insn))
10243 && REG_P (SET_DEST (prev_set))
10244 && REGNO (SET_DEST (prev_set)) == regno)
10245 {
10246 /* We have:
10247 (set (reg regno) (expr))
10248 (set (reg new_regno) (reg regno))
10249
10250 so try converting this to:
10251 (set (reg new_regno) (expr))
10252 (set (reg regno) (reg new_regno))
10253
10254 The former construct is often generated when a global
10255 variable used for an induction variable is shadowed by a
10256 register (NEW_REGNO). The latter construct improves the
10257 chances of GIV replacement and BIV elimination. */
10258
10259 validate_change (prev_insn, &SET_DEST (prev_set),
10260 replacement, 1);
10261 validate_change (insn, &SET_DEST (set),
10262 SET_SRC (set), 1);
10263 validate_change (insn, &SET_SRC (set),
10264 replacement, 1);
10265
10266 if (apply_change_group ())
10267 {
10268 if (loop_dump_stream)
10269 fprintf (loop_dump_stream,
10270 " Swapped set of reg %d at %d with reg %d at %d.\n",
10271 regno, INSN_UID (insn),
10272 new_regno, INSN_UID (prev_insn));
10273
10274 /* Update first use of REGNO. */
10275 if (REGNO_FIRST_UID (regno) == INSN_UID (prev_insn))
10276 REGNO_FIRST_UID (regno) = INSN_UID (insn);
10277
10278 /* Now perform copy propagation to hopefully
10279 remove all uses of REGNO within the loop. */
10280 try_copy_prop (loop, replacement, regno);
10281 }
10282 }
10283 }
10284 }
10285
10286 /* Worker function for find_mem_in_note, called via for_each_rtx. */
10287
10288 static int
10289 find_mem_in_note_1 (rtx *x, void *data)
10290 {
10291 if (*x != NULL_RTX && MEM_P (*x))
10292 {
10293 rtx *res = (rtx *) data;
10294 *res = *x;
10295 return 1;
10296 }
10297 return 0;
10298 }
10299
10300 /* Returns the first MEM found in NOTE by depth-first search. */
10301
10302 static rtx
10303 find_mem_in_note (rtx note)
10304 {
10305 if (note && for_each_rtx (&note, find_mem_in_note_1, &note))
10306 return note;
10307 return NULL_RTX;
10308 }
10309
10310 /* Replace MEM with its associated pseudo register. This function is
10311 called from load_mems via for_each_rtx. DATA is actually a pointer
10312 to a structure describing the instruction currently being scanned
10313 and the MEM we are currently replacing. */
10314
10315 static int
10316 replace_loop_mem (rtx *mem, void *data)
10317 {
10318 loop_replace_args *args = (loop_replace_args *) data;
10319 rtx m = *mem;
10320
10321 if (m == NULL_RTX)
10322 return 0;
10323
10324 switch (GET_CODE (m))
10325 {
10326 case MEM:
10327 break;
10328
10329 case CONST_DOUBLE:
10330 /* We're not interested in the MEM associated with a
10331 CONST_DOUBLE, so there's no need to traverse into one. */
10332 return -1;
10333
10334 default:
10335 /* This is not a MEM. */
10336 return 0;
10337 }
10338
10339 if (!rtx_equal_p (args->match, m))
10340 /* This is not the MEM we are currently replacing. */
10341 return 0;
10342
10343 /* Actually replace the MEM. */
10344 validate_change (args->insn, mem, args->replacement, 1);
10345
10346 return 0;
10347 }
10348
10349 static void
10350 replace_loop_mems (rtx insn, rtx mem, rtx reg, int written)
10351 {
10352 loop_replace_args args;
10353
10354 args.insn = insn;
10355 args.match = mem;
10356 args.replacement = reg;
10357
10358 for_each_rtx (&insn, replace_loop_mem, &args);
10359
10360 /* If we hoist a mem write out of the loop, then REG_EQUAL
10361 notes referring to the mem are no longer valid. */
10362 if (written)
10363 {
10364 rtx note, sub;
10365 rtx *link;
10366
10367 for (link = &REG_NOTES (insn); (note = *link); link = &XEXP (note, 1))
10368 {
10369 if (REG_NOTE_KIND (note) == REG_EQUAL
10370 && (sub = find_mem_in_note (note))
10371 && true_dependence (mem, VOIDmode, sub, rtx_varies_p))
10372 {
10373 /* Remove the note. */
10374 validate_change (NULL_RTX, link, XEXP (note, 1), 1);
10375 break;
10376 }
10377 }
10378 }
10379 }
10380
10381 /* Replace one register with another. Called through for_each_rtx; PX points
10382 to the rtx being scanned. DATA is actually a pointer to
10383 a structure of arguments. */
10384
10385 static int
10386 replace_loop_reg (rtx *px, void *data)
10387 {
10388 rtx x = *px;
10389 loop_replace_args *args = (loop_replace_args *) data;
10390
10391 if (x == NULL_RTX)
10392 return 0;
10393
10394 if (x == args->match)
10395 validate_change (args->insn, px, args->replacement, 1);
10396
10397 return 0;
10398 }
10399
10400 static void
10401 replace_loop_regs (rtx insn, rtx reg, rtx replacement)
10402 {
10403 loop_replace_args args;
10404
10405 args.insn = insn;
10406 args.match = reg;
10407 args.replacement = replacement;
10408
10409 for_each_rtx (&insn, replace_loop_reg, &args);
10410 }
10411 \f
10412 /* Emit insn for PATTERN after WHERE_INSN in basic block WHERE_BB
10413 (ignored in the interim). */
10414
10415 static rtx
10416 loop_insn_emit_after (const struct loop *loop ATTRIBUTE_UNUSED,
10417 basic_block where_bb ATTRIBUTE_UNUSED, rtx where_insn,
10418 rtx pattern)
10419 {
10420 return emit_insn_after (pattern, where_insn);
10421 }
10422
10423
10424 /* If WHERE_INSN is nonzero emit insn for PATTERN before WHERE_INSN
10425 in basic block WHERE_BB (ignored in the interim) within the loop
10426 otherwise hoist PATTERN into the loop pre-header. */
10427
10428 rtx
10429 loop_insn_emit_before (const struct loop *loop,
10430 basic_block where_bb ATTRIBUTE_UNUSED,
10431 rtx where_insn, rtx pattern)
10432 {
10433 if (! where_insn)
10434 return loop_insn_hoist (loop, pattern);
10435 return emit_insn_before (pattern, where_insn);
10436 }
10437
10438
10439 /* Emit call insn for PATTERN before WHERE_INSN in basic block
10440 WHERE_BB (ignored in the interim) within the loop. */
10441
10442 static rtx
10443 loop_call_insn_emit_before (const struct loop *loop ATTRIBUTE_UNUSED,
10444 basic_block where_bb ATTRIBUTE_UNUSED,
10445 rtx where_insn, rtx pattern)
10446 {
10447 return emit_call_insn_before (pattern, where_insn);
10448 }
10449
10450
10451 /* Hoist insn for PATTERN into the loop pre-header. */
10452
10453 rtx
10454 loop_insn_hoist (const struct loop *loop, rtx pattern)
10455 {
10456 return loop_insn_emit_before (loop, 0, loop->start, pattern);
10457 }
10458
10459
10460 /* Hoist call insn for PATTERN into the loop pre-header. */
10461
10462 static rtx
10463 loop_call_insn_hoist (const struct loop *loop, rtx pattern)
10464 {
10465 return loop_call_insn_emit_before (loop, 0, loop->start, pattern);
10466 }
10467
10468
10469 /* Sink insn for PATTERN after the loop end. */
10470
10471 rtx
10472 loop_insn_sink (const struct loop *loop, rtx pattern)
10473 {
10474 return loop_insn_emit_before (loop, 0, loop->sink, pattern);
10475 }
10476
10477 /* bl->final_value can be either general_operand or PLUS of general_operand
10478 and constant. Emit sequence of instructions to load it into REG. */
10479 static rtx
10480 gen_load_of_final_value (rtx reg, rtx final_value)
10481 {
10482 rtx seq;
10483 start_sequence ();
10484 final_value = force_operand (final_value, reg);
10485 if (final_value != reg)
10486 emit_move_insn (reg, final_value);
10487 seq = get_insns ();
10488 end_sequence ();
10489 return seq;
10490 }
10491
10492 /* If the loop has multiple exits, emit insn for PATTERN before the
10493 loop to ensure that it will always be executed no matter how the
10494 loop exits. Otherwise, emit the insn for PATTERN after the loop,
10495 since this is slightly more efficient. */
10496
10497 static rtx
10498 loop_insn_sink_or_swim (const struct loop *loop, rtx pattern)
10499 {
10500 if (loop->exit_count)
10501 return loop_insn_hoist (loop, pattern);
10502 else
10503 return loop_insn_sink (loop, pattern);
10504 }
10505 \f
10506 static void
10507 loop_ivs_dump (const struct loop *loop, FILE *file, int verbose)
10508 {
10509 struct iv_class *bl;
10510 int iv_num = 0;
10511
10512 if (! loop || ! file)
10513 return;
10514
10515 for (bl = LOOP_IVS (loop)->list; bl; bl = bl->next)
10516 iv_num++;
10517
10518 fprintf (file, "Loop %d: %d IV classes\n", loop->num, iv_num);
10519
10520 for (bl = LOOP_IVS (loop)->list; bl; bl = bl->next)
10521 {
10522 loop_iv_class_dump (bl, file, verbose);
10523 fputc ('\n', file);
10524 }
10525 }
10526
10527
10528 static void
10529 loop_iv_class_dump (const struct iv_class *bl, FILE *file,
10530 int verbose ATTRIBUTE_UNUSED)
10531 {
10532 struct induction *v;
10533 rtx incr;
10534 int i;
10535
10536 if (! bl || ! file)
10537 return;
10538
10539 fprintf (file, "IV class for reg %d, benefit %d\n",
10540 bl->regno, bl->total_benefit);
10541
10542 fprintf (file, " Init insn %d", INSN_UID (bl->init_insn));
10543 if (bl->initial_value)
10544 {
10545 fprintf (file, ", init val: ");
10546 print_simple_rtl (file, bl->initial_value);
10547 }
10548 if (bl->initial_test)
10549 {
10550 fprintf (file, ", init test: ");
10551 print_simple_rtl (file, bl->initial_test);
10552 }
10553 fputc ('\n', file);
10554
10555 if (bl->final_value)
10556 {
10557 fprintf (file, " Final val: ");
10558 print_simple_rtl (file, bl->final_value);
10559 fputc ('\n', file);
10560 }
10561
10562 if ((incr = biv_total_increment (bl)))
10563 {
10564 fprintf (file, " Total increment: ");
10565 print_simple_rtl (file, incr);
10566 fputc ('\n', file);
10567 }
10568
10569 /* List the increments. */
10570 for (i = 0, v = bl->biv; v; v = v->next_iv, i++)
10571 {
10572 fprintf (file, " Inc%d: insn %d, incr: ", i, INSN_UID (v->insn));
10573 print_simple_rtl (file, v->add_val);
10574 fputc ('\n', file);
10575 }
10576
10577 /* List the givs. */
10578 for (i = 0, v = bl->giv; v; v = v->next_iv, i++)
10579 {
10580 fprintf (file, " Giv%d: insn %d, benefit %d, ",
10581 i, INSN_UID (v->insn), v->benefit);
10582 if (v->giv_type == DEST_ADDR)
10583 print_simple_rtl (file, v->mem);
10584 else
10585 print_simple_rtl (file, single_set (v->insn));
10586 fputc ('\n', file);
10587 }
10588 }
10589
10590
10591 static void
10592 loop_biv_dump (const struct induction *v, FILE *file, int verbose)
10593 {
10594 if (! v || ! file)
10595 return;
10596
10597 fprintf (file,
10598 "Biv %d: insn %d",
10599 REGNO (v->dest_reg), INSN_UID (v->insn));
10600 fprintf (file, " const ");
10601 print_simple_rtl (file, v->add_val);
10602
10603 if (verbose && v->final_value)
10604 {
10605 fputc ('\n', file);
10606 fprintf (file, " final ");
10607 print_simple_rtl (file, v->final_value);
10608 }
10609
10610 fputc ('\n', file);
10611 }
10612
10613
10614 static void
10615 loop_giv_dump (const struct induction *v, FILE *file, int verbose)
10616 {
10617 if (! v || ! file)
10618 return;
10619
10620 if (v->giv_type == DEST_REG)
10621 fprintf (file, "Giv %d: insn %d",
10622 REGNO (v->dest_reg), INSN_UID (v->insn));
10623 else
10624 fprintf (file, "Dest address: insn %d",
10625 INSN_UID (v->insn));
10626
10627 fprintf (file, " src reg %d benefit %d",
10628 REGNO (v->src_reg), v->benefit);
10629 fprintf (file, " lifetime %d",
10630 v->lifetime);
10631
10632 if (v->replaceable)
10633 fprintf (file, " replaceable");
10634
10635 if (v->no_const_addval)
10636 fprintf (file, " ncav");
10637
10638 if (v->ext_dependent)
10639 {
10640 switch (GET_CODE (v->ext_dependent))
10641 {
10642 case SIGN_EXTEND:
10643 fprintf (file, " ext se");
10644 break;
10645 case ZERO_EXTEND:
10646 fprintf (file, " ext ze");
10647 break;
10648 case TRUNCATE:
10649 fprintf (file, " ext tr");
10650 break;
10651 default:
10652 abort ();
10653 }
10654 }
10655
10656 fputc ('\n', file);
10657 fprintf (file, " mult ");
10658 print_simple_rtl (file, v->mult_val);
10659
10660 fputc ('\n', file);
10661 fprintf (file, " add ");
10662 print_simple_rtl (file, v->add_val);
10663
10664 if (verbose && v->final_value)
10665 {
10666 fputc ('\n', file);
10667 fprintf (file, " final ");
10668 print_simple_rtl (file, v->final_value);
10669 }
10670
10671 fputc ('\n', file);
10672 }
10673
10674
10675 void
10676 debug_ivs (const struct loop *loop)
10677 {
10678 loop_ivs_dump (loop, stderr, 1);
10679 }
10680
10681
10682 void
10683 debug_iv_class (const struct iv_class *bl)
10684 {
10685 loop_iv_class_dump (bl, stderr, 1);
10686 }
10687
10688
10689 void
10690 debug_biv (const struct induction *v)
10691 {
10692 loop_biv_dump (v, stderr, 1);
10693 }
10694
10695
10696 void
10697 debug_giv (const struct induction *v)
10698 {
10699 loop_giv_dump (v, stderr, 1);
10700 }
10701
10702
10703 #define LOOP_BLOCK_NUM_1(INSN) \
10704 ((INSN) ? (BLOCK_FOR_INSN (INSN) ? BLOCK_NUM (INSN) : - 1) : -1)
10705
10706 /* The notes do not have an assigned block, so look at the next insn. */
10707 #define LOOP_BLOCK_NUM(INSN) \
10708 ((INSN) ? (GET_CODE (INSN) == NOTE \
10709 ? LOOP_BLOCK_NUM_1 (next_nonnote_insn (INSN)) \
10710 : LOOP_BLOCK_NUM_1 (INSN)) \
10711 : -1)
10712
10713 #define LOOP_INSN_UID(INSN) ((INSN) ? INSN_UID (INSN) : -1)
10714
10715 static void
10716 loop_dump_aux (const struct loop *loop, FILE *file,
10717 int verbose ATTRIBUTE_UNUSED)
10718 {
10719 rtx label;
10720
10721 if (! loop || ! file)
10722 return;
10723
10724 /* Print diagnostics to compare our concept of a loop with
10725 what the loop notes say. */
10726 if (! PREV_INSN (BB_HEAD (loop->first))
10727 || GET_CODE (PREV_INSN (BB_HEAD (loop->first))) != NOTE
10728 || NOTE_LINE_NUMBER (PREV_INSN (BB_HEAD (loop->first)))
10729 != NOTE_INSN_LOOP_BEG)
10730 fprintf (file, ";; No NOTE_INSN_LOOP_BEG at %d\n",
10731 INSN_UID (PREV_INSN (BB_HEAD (loop->first))));
10732 if (! NEXT_INSN (BB_END (loop->last))
10733 || GET_CODE (NEXT_INSN (BB_END (loop->last))) != NOTE
10734 || NOTE_LINE_NUMBER (NEXT_INSN (BB_END (loop->last)))
10735 != NOTE_INSN_LOOP_END)
10736 fprintf (file, ";; No NOTE_INSN_LOOP_END at %d\n",
10737 INSN_UID (NEXT_INSN (BB_END (loop->last))));
10738
10739 if (loop->start)
10740 {
10741 fprintf (file,
10742 ";; start %d (%d), cont dom %d (%d), cont %d (%d), vtop %d (%d), end %d (%d)\n",
10743 LOOP_BLOCK_NUM (loop->start),
10744 LOOP_INSN_UID (loop->start),
10745 LOOP_BLOCK_NUM (loop->cont),
10746 LOOP_INSN_UID (loop->cont),
10747 LOOP_BLOCK_NUM (loop->cont),
10748 LOOP_INSN_UID (loop->cont),
10749 LOOP_BLOCK_NUM (loop->vtop),
10750 LOOP_INSN_UID (loop->vtop),
10751 LOOP_BLOCK_NUM (loop->end),
10752 LOOP_INSN_UID (loop->end));
10753 fprintf (file, ";; top %d (%d), scan start %d (%d)\n",
10754 LOOP_BLOCK_NUM (loop->top),
10755 LOOP_INSN_UID (loop->top),
10756 LOOP_BLOCK_NUM (loop->scan_start),
10757 LOOP_INSN_UID (loop->scan_start));
10758 fprintf (file, ";; exit_count %d", loop->exit_count);
10759 if (loop->exit_count)
10760 {
10761 fputs (", labels:", file);
10762 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
10763 {
10764 fprintf (file, " %d ",
10765 LOOP_INSN_UID (XEXP (label, 0)));
10766 }
10767 }
10768 fputs ("\n", file);
10769
10770 /* This can happen when a marked loop appears as two nested loops,
10771 say from while (a || b) {}. The inner loop won't match
10772 the loop markers but the outer one will. */
10773 if (LOOP_BLOCK_NUM (loop->cont) != loop->latch->index)
10774 fprintf (file, ";; NOTE_INSN_LOOP_CONT not in loop latch\n");
10775 }
10776 }
10777
10778 /* Call this function from the debugger to dump LOOP. */
10779
10780 void
10781 debug_loop (const struct loop *loop)
10782 {
10783 flow_loop_dump (loop, stderr, loop_dump_aux, 1);
10784 }
10785
10786 /* Call this function from the debugger to dump LOOPS. */
10787
10788 void
10789 debug_loops (const struct loops *loops)
10790 {
10791 flow_loops_dump (loops, stderr, loop_dump_aux, 1);
10792 }