coretypes.h: Include machmode.h...
[gcc.git] / gcc / loop-unroll.c
1 /* Loop unrolling.
2 Copyright (C) 2002-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "hash-set.h"
26 #include "vec.h"
27 #include "input.h"
28 #include "alias.h"
29 #include "symtab.h"
30 #include "inchash.h"
31 #include "tree.h"
32 #include "hard-reg-set.h"
33 #include "obstack.h"
34 #include "profile.h"
35 #include "predict.h"
36 #include "function.h"
37 #include "dominance.h"
38 #include "cfg.h"
39 #include "cfgrtl.h"
40 #include "basic-block.h"
41 #include "cfgloop.h"
42 #include "params.h"
43 #include "insn-codes.h"
44 #include "optabs.h"
45 #include "hashtab.h"
46 #include "flags.h"
47 #include "statistics.h"
48 #include "insn-config.h"
49 #include "expmed.h"
50 #include "dojump.h"
51 #include "explow.h"
52 #include "calls.h"
53 #include "emit-rtl.h"
54 #include "varasm.h"
55 #include "stmt.h"
56 #include "expr.h"
57 #include "hash-table.h"
58 #include "recog.h"
59 #include "target.h"
60 #include "dumpfile.h"
61
62 /* This pass performs loop unrolling. We only perform this
63 optimization on innermost loops (with single exception) because
64 the impact on performance is greatest here, and we want to avoid
65 unnecessary code size growth. The gain is caused by greater sequentiality
66 of code, better code to optimize for further passes and in some cases
67 by fewer testings of exit conditions. The main problem is code growth,
68 that impacts performance negatively due to effect of caches.
69
70 What we do:
71
72 -- unrolling of loops that roll constant times; this is almost always
73 win, as we get rid of exit condition tests.
74 -- unrolling of loops that roll number of times that we can compute
75 in runtime; we also get rid of exit condition tests here, but there
76 is the extra expense for calculating the number of iterations
77 -- simple unrolling of remaining loops; this is performed only if we
78 are asked to, as the gain is questionable in this case and often
79 it may even slow down the code
80 For more detailed descriptions of each of those, see comments at
81 appropriate function below.
82
83 There is a lot of parameters (defined and described in params.def) that
84 control how much we unroll.
85
86 ??? A great problem is that we don't have a good way how to determine
87 how many times we should unroll the loop; the experiments I have made
88 showed that this choice may affect performance in order of several %.
89 */
90
91 /* Information about induction variables to split. */
92
93 struct iv_to_split
94 {
95 rtx_insn *insn; /* The insn in that the induction variable occurs. */
96 rtx orig_var; /* The variable (register) for the IV before split. */
97 rtx base_var; /* The variable on that the values in the further
98 iterations are based. */
99 rtx step; /* Step of the induction variable. */
100 struct iv_to_split *next; /* Next entry in walking order. */
101 };
102
103 /* Information about accumulators to expand. */
104
105 struct var_to_expand
106 {
107 rtx_insn *insn; /* The insn in that the variable expansion occurs. */
108 rtx reg; /* The accumulator which is expanded. */
109 vec<rtx> var_expansions; /* The copies of the accumulator which is expanded. */
110 struct var_to_expand *next; /* Next entry in walking order. */
111 enum rtx_code op; /* The type of the accumulation - addition, subtraction
112 or multiplication. */
113 int expansion_count; /* Count the number of expansions generated so far. */
114 int reuse_expansion; /* The expansion we intend to reuse to expand
115 the accumulator. If REUSE_EXPANSION is 0 reuse
116 the original accumulator. Else use
117 var_expansions[REUSE_EXPANSION - 1]. */
118 };
119
120 /* Hashtable helper for iv_to_split. */
121
122 struct iv_split_hasher : typed_free_remove <iv_to_split>
123 {
124 typedef iv_to_split *value_type;
125 typedef iv_to_split *compare_type;
126 static inline hashval_t hash (const iv_to_split *);
127 static inline bool equal (const iv_to_split *, const iv_to_split *);
128 };
129
130
131 /* A hash function for information about insns to split. */
132
133 inline hashval_t
134 iv_split_hasher::hash (const iv_to_split *ivts)
135 {
136 return (hashval_t) INSN_UID (ivts->insn);
137 }
138
139 /* An equality functions for information about insns to split. */
140
141 inline bool
142 iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
143 {
144 return i1->insn == i2->insn;
145 }
146
147 /* Hashtable helper for iv_to_split. */
148
149 struct var_expand_hasher : typed_free_remove <var_to_expand>
150 {
151 typedef var_to_expand *value_type;
152 typedef var_to_expand *compare_type;
153 static inline hashval_t hash (const var_to_expand *);
154 static inline bool equal (const var_to_expand *, const var_to_expand *);
155 };
156
157 /* Return a hash for VES. */
158
159 inline hashval_t
160 var_expand_hasher::hash (const var_to_expand *ves)
161 {
162 return (hashval_t) INSN_UID (ves->insn);
163 }
164
165 /* Return true if I1 and I2 refer to the same instruction. */
166
167 inline bool
168 var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
169 {
170 return i1->insn == i2->insn;
171 }
172
173 /* Information about optimization applied in
174 the unrolled loop. */
175
176 struct opt_info
177 {
178 hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
179 split. */
180 struct iv_to_split *iv_to_split_head; /* The first iv to split. */
181 struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */
182 hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
183 insns with accumulators to expand. */
184 struct var_to_expand *var_to_expand_head; /* The first var to expand. */
185 struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */
186 unsigned first_new_block; /* The first basic block that was
187 duplicated. */
188 basic_block loop_exit; /* The loop exit basic block. */
189 basic_block loop_preheader; /* The loop preheader basic block. */
190 };
191
192 static void decide_unroll_stupid (struct loop *, int);
193 static void decide_unroll_constant_iterations (struct loop *, int);
194 static void decide_unroll_runtime_iterations (struct loop *, int);
195 static void unroll_loop_stupid (struct loop *);
196 static void decide_unrolling (int);
197 static void unroll_loop_constant_iterations (struct loop *);
198 static void unroll_loop_runtime_iterations (struct loop *);
199 static struct opt_info *analyze_insns_in_loop (struct loop *);
200 static void opt_info_start_duplication (struct opt_info *);
201 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
202 static void free_opt_info (struct opt_info *);
203 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
204 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
205 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
206 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
207 static void insert_var_expansion_initialization (struct var_to_expand *,
208 basic_block);
209 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
210 basic_block);
211 static rtx get_expansion (struct var_to_expand *);
212
213 /* Emit a message summarizing the unroll that will be
214 performed for LOOP, along with the loop's location LOCUS, if
215 appropriate given the dump or -fopt-info settings. */
216
217 static void
218 report_unroll (struct loop *loop, location_t locus)
219 {
220 int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
221
222 if (loop->lpt_decision.decision == LPT_NONE)
223 return;
224
225 if (!dump_enabled_p ())
226 return;
227
228 dump_printf_loc (report_flags, locus,
229 "loop unrolled %d times",
230 loop->lpt_decision.times);
231 if (profile_info)
232 dump_printf (report_flags,
233 " (header execution count %d)",
234 (int)loop->header->count);
235
236 dump_printf (report_flags, "\n");
237 }
238
239 /* Decide whether unroll loops and how much. */
240 static void
241 decide_unrolling (int flags)
242 {
243 struct loop *loop;
244
245 /* Scan the loops, inner ones first. */
246 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
247 {
248 loop->lpt_decision.decision = LPT_NONE;
249 location_t locus = get_loop_location (loop);
250
251 if (dump_enabled_p ())
252 dump_printf_loc (TDF_RTL, locus,
253 ";; *** Considering loop %d at BB %d for "
254 "unrolling ***\n",
255 loop->num, loop->header->index);
256
257 /* Do not peel cold areas. */
258 if (optimize_loop_for_size_p (loop))
259 {
260 if (dump_file)
261 fprintf (dump_file, ";; Not considering loop, cold area\n");
262 continue;
263 }
264
265 /* Can the loop be manipulated? */
266 if (!can_duplicate_loop_p (loop))
267 {
268 if (dump_file)
269 fprintf (dump_file,
270 ";; Not considering loop, cannot duplicate\n");
271 continue;
272 }
273
274 /* Skip non-innermost loops. */
275 if (loop->inner)
276 {
277 if (dump_file)
278 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
279 continue;
280 }
281
282 loop->ninsns = num_loop_insns (loop);
283 loop->av_ninsns = average_num_loop_insns (loop);
284
285 /* Try transformations one by one in decreasing order of
286 priority. */
287
288 decide_unroll_constant_iterations (loop, flags);
289 if (loop->lpt_decision.decision == LPT_NONE)
290 decide_unroll_runtime_iterations (loop, flags);
291 if (loop->lpt_decision.decision == LPT_NONE)
292 decide_unroll_stupid (loop, flags);
293
294 report_unroll (loop, locus);
295 }
296 }
297
298 /* Unroll LOOPS. */
299 void
300 unroll_loops (int flags)
301 {
302 struct loop *loop;
303 bool changed = false;
304
305 /* Now decide rest of unrolling. */
306 decide_unrolling (flags);
307
308 /* Scan the loops, inner ones first. */
309 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
310 {
311 /* And perform the appropriate transformations. */
312 switch (loop->lpt_decision.decision)
313 {
314 case LPT_UNROLL_CONSTANT:
315 unroll_loop_constant_iterations (loop);
316 changed = true;
317 break;
318 case LPT_UNROLL_RUNTIME:
319 unroll_loop_runtime_iterations (loop);
320 changed = true;
321 break;
322 case LPT_UNROLL_STUPID:
323 unroll_loop_stupid (loop);
324 changed = true;
325 break;
326 case LPT_NONE:
327 break;
328 default:
329 gcc_unreachable ();
330 }
331 }
332
333 if (changed)
334 {
335 calculate_dominance_info (CDI_DOMINATORS);
336 fix_loop_structure (NULL);
337 }
338
339 iv_analysis_done ();
340 }
341
342 /* Check whether exit of the LOOP is at the end of loop body. */
343
344 static bool
345 loop_exit_at_end_p (struct loop *loop)
346 {
347 struct niter_desc *desc = get_simple_loop_desc (loop);
348 rtx_insn *insn;
349
350 /* We should never have conditional in latch block. */
351 gcc_assert (desc->in_edge->dest != loop->header);
352
353 if (desc->in_edge->dest != loop->latch)
354 return false;
355
356 /* Check that the latch is empty. */
357 FOR_BB_INSNS (loop->latch, insn)
358 {
359 if (INSN_P (insn) && active_insn_p (insn))
360 return false;
361 }
362
363 return true;
364 }
365
366 /* Decide whether to unroll LOOP iterating constant number of times
367 and how much. */
368
369 static void
370 decide_unroll_constant_iterations (struct loop *loop, int flags)
371 {
372 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
373 struct niter_desc *desc;
374 widest_int iterations;
375
376 if (!(flags & UAP_UNROLL))
377 {
378 /* We were not asked to, just return back silently. */
379 return;
380 }
381
382 if (dump_file)
383 fprintf (dump_file,
384 "\n;; Considering unrolling loop with constant "
385 "number of iterations\n");
386
387 /* nunroll = total number of copies of the original loop body in
388 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
389 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
390 nunroll_by_av
391 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
392 if (nunroll > nunroll_by_av)
393 nunroll = nunroll_by_av;
394 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
395 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
396
397 if (targetm.loop_unroll_adjust)
398 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
399
400 /* Skip big loops. */
401 if (nunroll <= 1)
402 {
403 if (dump_file)
404 fprintf (dump_file, ";; Not considering loop, is too big\n");
405 return;
406 }
407
408 /* Check for simple loops. */
409 desc = get_simple_loop_desc (loop);
410
411 /* Check number of iterations. */
412 if (!desc->simple_p || !desc->const_iter || desc->assumptions)
413 {
414 if (dump_file)
415 fprintf (dump_file,
416 ";; Unable to prove that the loop iterates constant times\n");
417 return;
418 }
419
420 /* Check whether the loop rolls enough to consider.
421 Consult also loop bounds and profile; in the case the loop has more
422 than one exit it may well loop less than determined maximal number
423 of iterations. */
424 if (desc->niter < 2 * nunroll
425 || ((get_estimated_loop_iterations (loop, &iterations)
426 || get_max_loop_iterations (loop, &iterations))
427 && wi::ltu_p (iterations, 2 * nunroll)))
428 {
429 if (dump_file)
430 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
431 return;
432 }
433
434 /* Success; now compute number of iterations to unroll. We alter
435 nunroll so that as few as possible copies of loop body are
436 necessary, while still not decreasing the number of unrollings
437 too much (at most by 1). */
438 best_copies = 2 * nunroll + 10;
439
440 i = 2 * nunroll + 2;
441 if (i - 1 >= desc->niter)
442 i = desc->niter - 2;
443
444 for (; i >= nunroll - 1; i--)
445 {
446 unsigned exit_mod = desc->niter % (i + 1);
447
448 if (!loop_exit_at_end_p (loop))
449 n_copies = exit_mod + i + 1;
450 else if (exit_mod != (unsigned) i
451 || desc->noloop_assumptions != NULL_RTX)
452 n_copies = exit_mod + i + 2;
453 else
454 n_copies = i + 1;
455
456 if (n_copies < best_copies)
457 {
458 best_copies = n_copies;
459 best_unroll = i;
460 }
461 }
462
463 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
464 loop->lpt_decision.times = best_unroll;
465 }
466
467 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
468 The transformation does this:
469
470 for (i = 0; i < 102; i++)
471 body;
472
473 ==> (LOOP->LPT_DECISION.TIMES == 3)
474
475 i = 0;
476 body; i++;
477 body; i++;
478 while (i < 102)
479 {
480 body; i++;
481 body; i++;
482 body; i++;
483 body; i++;
484 }
485 */
486 static void
487 unroll_loop_constant_iterations (struct loop *loop)
488 {
489 unsigned HOST_WIDE_INT niter;
490 unsigned exit_mod;
491 sbitmap wont_exit;
492 unsigned i;
493 edge e;
494 unsigned max_unroll = loop->lpt_decision.times;
495 struct niter_desc *desc = get_simple_loop_desc (loop);
496 bool exit_at_end = loop_exit_at_end_p (loop);
497 struct opt_info *opt_info = NULL;
498 bool ok;
499
500 niter = desc->niter;
501
502 /* Should not get here (such loop should be peeled instead). */
503 gcc_assert (niter > max_unroll + 1);
504
505 exit_mod = niter % (max_unroll + 1);
506
507 wont_exit = sbitmap_alloc (max_unroll + 1);
508 bitmap_ones (wont_exit);
509
510 auto_vec<edge> remove_edges;
511 if (flag_split_ivs_in_unroller
512 || flag_variable_expansion_in_unroller)
513 opt_info = analyze_insns_in_loop (loop);
514
515 if (!exit_at_end)
516 {
517 /* The exit is not at the end of the loop; leave exit test
518 in the first copy, so that the loops that start with test
519 of exit condition have continuous body after unrolling. */
520
521 if (dump_file)
522 fprintf (dump_file, ";; Condition at beginning of loop.\n");
523
524 /* Peel exit_mod iterations. */
525 bitmap_clear_bit (wont_exit, 0);
526 if (desc->noloop_assumptions)
527 bitmap_clear_bit (wont_exit, 1);
528
529 if (exit_mod)
530 {
531 opt_info_start_duplication (opt_info);
532 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
533 exit_mod,
534 wont_exit, desc->out_edge,
535 &remove_edges,
536 DLTHE_FLAG_UPDATE_FREQ
537 | (opt_info && exit_mod > 1
538 ? DLTHE_RECORD_COPY_NUMBER
539 : 0));
540 gcc_assert (ok);
541
542 if (opt_info && exit_mod > 1)
543 apply_opt_in_copies (opt_info, exit_mod, false, false);
544
545 desc->noloop_assumptions = NULL_RTX;
546 desc->niter -= exit_mod;
547 loop->nb_iterations_upper_bound -= exit_mod;
548 if (loop->any_estimate
549 && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
550 loop->nb_iterations_estimate -= exit_mod;
551 else
552 loop->any_estimate = false;
553 }
554
555 bitmap_set_bit (wont_exit, 1);
556 }
557 else
558 {
559 /* Leave exit test in last copy, for the same reason as above if
560 the loop tests the condition at the end of loop body. */
561
562 if (dump_file)
563 fprintf (dump_file, ";; Condition at end of loop.\n");
564
565 /* We know that niter >= max_unroll + 2; so we do not need to care of
566 case when we would exit before reaching the loop. So just peel
567 exit_mod + 1 iterations. */
568 if (exit_mod != max_unroll
569 || desc->noloop_assumptions)
570 {
571 bitmap_clear_bit (wont_exit, 0);
572 if (desc->noloop_assumptions)
573 bitmap_clear_bit (wont_exit, 1);
574
575 opt_info_start_duplication (opt_info);
576 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
577 exit_mod + 1,
578 wont_exit, desc->out_edge,
579 &remove_edges,
580 DLTHE_FLAG_UPDATE_FREQ
581 | (opt_info && exit_mod > 0
582 ? DLTHE_RECORD_COPY_NUMBER
583 : 0));
584 gcc_assert (ok);
585
586 if (opt_info && exit_mod > 0)
587 apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
588
589 desc->niter -= exit_mod + 1;
590 loop->nb_iterations_upper_bound -= exit_mod + 1;
591 if (loop->any_estimate
592 && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
593 loop->nb_iterations_estimate -= exit_mod + 1;
594 else
595 loop->any_estimate = false;
596 desc->noloop_assumptions = NULL_RTX;
597
598 bitmap_set_bit (wont_exit, 0);
599 bitmap_set_bit (wont_exit, 1);
600 }
601
602 bitmap_clear_bit (wont_exit, max_unroll);
603 }
604
605 /* Now unroll the loop. */
606
607 opt_info_start_duplication (opt_info);
608 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
609 max_unroll,
610 wont_exit, desc->out_edge,
611 &remove_edges,
612 DLTHE_FLAG_UPDATE_FREQ
613 | (opt_info
614 ? DLTHE_RECORD_COPY_NUMBER
615 : 0));
616 gcc_assert (ok);
617
618 if (opt_info)
619 {
620 apply_opt_in_copies (opt_info, max_unroll, true, true);
621 free_opt_info (opt_info);
622 }
623
624 free (wont_exit);
625
626 if (exit_at_end)
627 {
628 basic_block exit_block = get_bb_copy (desc->in_edge->src);
629 /* Find a new in and out edge; they are in the last copy we have made. */
630
631 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
632 {
633 desc->out_edge = EDGE_SUCC (exit_block, 0);
634 desc->in_edge = EDGE_SUCC (exit_block, 1);
635 }
636 else
637 {
638 desc->out_edge = EDGE_SUCC (exit_block, 1);
639 desc->in_edge = EDGE_SUCC (exit_block, 0);
640 }
641 }
642
643 desc->niter /= max_unroll + 1;
644 loop->nb_iterations_upper_bound
645 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
646 if (loop->any_estimate)
647 loop->nb_iterations_estimate
648 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
649 desc->niter_expr = GEN_INT (desc->niter);
650
651 /* Remove the edges. */
652 FOR_EACH_VEC_ELT (remove_edges, i, e)
653 remove_path (e);
654
655 if (dump_file)
656 fprintf (dump_file,
657 ";; Unrolled loop %d times, constant # of iterations %i insns\n",
658 max_unroll, num_loop_insns (loop));
659 }
660
661 /* Decide whether to unroll LOOP iterating runtime computable number of times
662 and how much. */
663 static void
664 decide_unroll_runtime_iterations (struct loop *loop, int flags)
665 {
666 unsigned nunroll, nunroll_by_av, i;
667 struct niter_desc *desc;
668 widest_int iterations;
669
670 if (!(flags & UAP_UNROLL))
671 {
672 /* We were not asked to, just return back silently. */
673 return;
674 }
675
676 if (dump_file)
677 fprintf (dump_file,
678 "\n;; Considering unrolling loop with runtime "
679 "computable number of iterations\n");
680
681 /* nunroll = total number of copies of the original loop body in
682 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
683 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
684 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
685 if (nunroll > nunroll_by_av)
686 nunroll = nunroll_by_av;
687 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
688 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
689
690 if (targetm.loop_unroll_adjust)
691 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
692
693 /* Skip big loops. */
694 if (nunroll <= 1)
695 {
696 if (dump_file)
697 fprintf (dump_file, ";; Not considering loop, is too big\n");
698 return;
699 }
700
701 /* Check for simple loops. */
702 desc = get_simple_loop_desc (loop);
703
704 /* Check simpleness. */
705 if (!desc->simple_p || desc->assumptions)
706 {
707 if (dump_file)
708 fprintf (dump_file,
709 ";; Unable to prove that the number of iterations "
710 "can be counted in runtime\n");
711 return;
712 }
713
714 if (desc->const_iter)
715 {
716 if (dump_file)
717 fprintf (dump_file, ";; Loop iterates constant times\n");
718 return;
719 }
720
721 /* Check whether the loop rolls. */
722 if ((get_estimated_loop_iterations (loop, &iterations)
723 || get_max_loop_iterations (loop, &iterations))
724 && wi::ltu_p (iterations, 2 * nunroll))
725 {
726 if (dump_file)
727 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
728 return;
729 }
730
731 /* Success; now force nunroll to be power of 2, as we are unable to
732 cope with overflows in computation of number of iterations. */
733 for (i = 1; 2 * i <= nunroll; i *= 2)
734 continue;
735
736 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
737 loop->lpt_decision.times = i - 1;
738 }
739
740 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
741 returns the newly created block. If INSNS is NULL_RTX, nothing is changed
742 and NULL is returned instead. */
743
744 basic_block
745 split_edge_and_insert (edge e, rtx_insn *insns)
746 {
747 basic_block bb;
748
749 if (!insns)
750 return NULL;
751 bb = split_edge (e);
752 emit_insn_after (insns, BB_END (bb));
753
754 /* ??? We used to assume that INSNS can contain control flow insns, and
755 that we had to try to find sub basic blocks in BB to maintain a valid
756 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB
757 and call break_superblocks when going out of cfglayout mode. But it
758 turns out that this never happens; and that if it does ever happen,
759 the verify_flow_info at the end of the RTL loop passes would fail.
760
761 There are two reasons why we expected we could have control flow insns
762 in INSNS. The first is when a comparison has to be done in parts, and
763 the second is when the number of iterations is computed for loops with
764 the number of iterations known at runtime. In both cases, test cases
765 to get control flow in INSNS appear to be impossible to construct:
766
767 * If do_compare_rtx_and_jump needs several branches to do comparison
768 in a mode that needs comparison by parts, we cannot analyze the
769 number of iterations of the loop, and we never get to unrolling it.
770
771 * The code in expand_divmod that was suspected to cause creation of
772 branching code seems to be only accessed for signed division. The
773 divisions used by # of iterations analysis are always unsigned.
774 Problems might arise on architectures that emits branching code
775 for some operations that may appear in the unroller (especially
776 for division), but we have no such architectures.
777
778 Considering all this, it was decided that we should for now assume
779 that INSNS can in theory contain control flow insns, but in practice
780 it never does. So we don't handle the theoretical case, and should
781 a real failure ever show up, we have a pretty good clue for how to
782 fix it. */
783
784 return bb;
785 }
786
787 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
788 true, with probability PROB. If CINSN is not NULL, it is the insn to copy
789 in order to create a jump. */
790
791 static rtx_insn *
792 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
793 rtx_code_label *label, int prob, rtx_insn *cinsn)
794 {
795 rtx_insn *seq;
796 rtx_jump_insn *jump;
797 rtx cond;
798 machine_mode mode;
799
800 mode = GET_MODE (op0);
801 if (mode == VOIDmode)
802 mode = GET_MODE (op1);
803
804 start_sequence ();
805 if (GET_MODE_CLASS (mode) == MODE_CC)
806 {
807 /* A hack -- there seems to be no easy generic way how to make a
808 conditional jump from a ccmode comparison. */
809 gcc_assert (cinsn);
810 cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
811 gcc_assert (GET_CODE (cond) == comp);
812 gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
813 gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
814 emit_jump_insn (copy_insn (PATTERN (cinsn)));
815 jump = as_a <rtx_jump_insn *> (get_last_insn ());
816 JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
817 LABEL_NUSES (JUMP_LABEL (jump))++;
818 redirect_jump (jump, label, 0);
819 }
820 else
821 {
822 gcc_assert (!cinsn);
823
824 op0 = force_operand (op0, NULL_RTX);
825 op1 = force_operand (op1, NULL_RTX);
826 do_compare_rtx_and_jump (op0, op1, comp, 0,
827 mode, NULL_RTX, NULL, label, -1);
828 jump = as_a <rtx_jump_insn *> (get_last_insn ());
829 jump->set_jump_target (label);
830 LABEL_NUSES (label)++;
831 }
832 add_int_reg_note (jump, REG_BR_PROB, prob);
833
834 seq = get_insns ();
835 end_sequence ();
836
837 return seq;
838 }
839
840 /* Unroll LOOP for which we are able to count number of iterations in runtime
841 LOOP->LPT_DECISION.TIMES times. The transformation does this (with some
842 extra care for case n < 0):
843
844 for (i = 0; i < n; i++)
845 body;
846
847 ==> (LOOP->LPT_DECISION.TIMES == 3)
848
849 i = 0;
850 mod = n % 4;
851
852 switch (mod)
853 {
854 case 3:
855 body; i++;
856 case 2:
857 body; i++;
858 case 1:
859 body; i++;
860 case 0: ;
861 }
862
863 while (i < n)
864 {
865 body; i++;
866 body; i++;
867 body; i++;
868 body; i++;
869 }
870 */
871 static void
872 unroll_loop_runtime_iterations (struct loop *loop)
873 {
874 rtx old_niter, niter, tmp;
875 rtx_insn *init_code, *branch_code;
876 unsigned i, j, p;
877 basic_block preheader, *body, swtch, ezc_swtch;
878 sbitmap wont_exit;
879 int may_exit_copy;
880 unsigned n_peel;
881 edge e;
882 bool extra_zero_check, last_may_exit;
883 unsigned max_unroll = loop->lpt_decision.times;
884 struct niter_desc *desc = get_simple_loop_desc (loop);
885 bool exit_at_end = loop_exit_at_end_p (loop);
886 struct opt_info *opt_info = NULL;
887 bool ok;
888
889 if (flag_split_ivs_in_unroller
890 || flag_variable_expansion_in_unroller)
891 opt_info = analyze_insns_in_loop (loop);
892
893 /* Remember blocks whose dominators will have to be updated. */
894 auto_vec<basic_block> dom_bbs;
895
896 body = get_loop_body (loop);
897 for (i = 0; i < loop->num_nodes; i++)
898 {
899 vec<basic_block> ldom;
900 basic_block bb;
901
902 ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
903 FOR_EACH_VEC_ELT (ldom, j, bb)
904 if (!flow_bb_inside_loop_p (loop, bb))
905 dom_bbs.safe_push (bb);
906
907 ldom.release ();
908 }
909 free (body);
910
911 if (!exit_at_end)
912 {
913 /* Leave exit in first copy (for explanation why see comment in
914 unroll_loop_constant_iterations). */
915 may_exit_copy = 0;
916 n_peel = max_unroll - 1;
917 extra_zero_check = true;
918 last_may_exit = false;
919 }
920 else
921 {
922 /* Leave exit in last copy (for explanation why see comment in
923 unroll_loop_constant_iterations). */
924 may_exit_copy = max_unroll;
925 n_peel = max_unroll;
926 extra_zero_check = false;
927 last_may_exit = true;
928 }
929
930 /* Get expression for number of iterations. */
931 start_sequence ();
932 old_niter = niter = gen_reg_rtx (desc->mode);
933 tmp = force_operand (copy_rtx (desc->niter_expr), niter);
934 if (tmp != niter)
935 emit_move_insn (niter, tmp);
936
937 /* Count modulo by ANDing it with max_unroll; we use the fact that
938 the number of unrollings is a power of two, and thus this is correct
939 even if there is overflow in the computation. */
940 niter = expand_simple_binop (desc->mode, AND,
941 niter, gen_int_mode (max_unroll, desc->mode),
942 NULL_RTX, 0, OPTAB_LIB_WIDEN);
943
944 init_code = get_insns ();
945 end_sequence ();
946 unshare_all_rtl_in_chain (init_code);
947
948 /* Precondition the loop. */
949 split_edge_and_insert (loop_preheader_edge (loop), init_code);
950
951 auto_vec<edge> remove_edges;
952
953 wont_exit = sbitmap_alloc (max_unroll + 2);
954
955 /* Peel the first copy of loop body (almost always we must leave exit test
956 here; the only exception is when we have extra zero check and the number
957 of iterations is reliable. Also record the place of (possible) extra
958 zero check. */
959 bitmap_clear (wont_exit);
960 if (extra_zero_check
961 && !desc->noloop_assumptions)
962 bitmap_set_bit (wont_exit, 1);
963 ezc_swtch = loop_preheader_edge (loop)->src;
964 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
965 1, wont_exit, desc->out_edge,
966 &remove_edges,
967 DLTHE_FLAG_UPDATE_FREQ);
968 gcc_assert (ok);
969
970 /* Record the place where switch will be built for preconditioning. */
971 swtch = split_edge (loop_preheader_edge (loop));
972
973 for (i = 0; i < n_peel; i++)
974 {
975 /* Peel the copy. */
976 bitmap_clear (wont_exit);
977 if (i != n_peel - 1 || !last_may_exit)
978 bitmap_set_bit (wont_exit, 1);
979 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
980 1, wont_exit, desc->out_edge,
981 &remove_edges,
982 DLTHE_FLAG_UPDATE_FREQ);
983 gcc_assert (ok);
984
985 /* Create item for switch. */
986 j = n_peel - i - (extra_zero_check ? 0 : 1);
987 p = REG_BR_PROB_BASE / (i + 2);
988
989 preheader = split_edge (loop_preheader_edge (loop));
990 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
991 block_label (preheader), p,
992 NULL);
993
994 /* We rely on the fact that the compare and jump cannot be optimized out,
995 and hence the cfg we create is correct. */
996 gcc_assert (branch_code != NULL_RTX);
997
998 swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
999 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1000 single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1001 e = make_edge (swtch, preheader,
1002 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1003 e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
1004 e->probability = p;
1005 }
1006
1007 if (extra_zero_check)
1008 {
1009 /* Add branch for zero iterations. */
1010 p = REG_BR_PROB_BASE / (max_unroll + 1);
1011 swtch = ezc_swtch;
1012 preheader = split_edge (loop_preheader_edge (loop));
1013 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1014 block_label (preheader), p,
1015 NULL);
1016 gcc_assert (branch_code != NULL_RTX);
1017
1018 swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1019 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1020 single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1021 e = make_edge (swtch, preheader,
1022 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1023 e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
1024 e->probability = p;
1025 }
1026
1027 /* Recount dominators for outer blocks. */
1028 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1029
1030 /* And unroll loop. */
1031
1032 bitmap_ones (wont_exit);
1033 bitmap_clear_bit (wont_exit, may_exit_copy);
1034 opt_info_start_duplication (opt_info);
1035
1036 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1037 max_unroll,
1038 wont_exit, desc->out_edge,
1039 &remove_edges,
1040 DLTHE_FLAG_UPDATE_FREQ
1041 | (opt_info
1042 ? DLTHE_RECORD_COPY_NUMBER
1043 : 0));
1044 gcc_assert (ok);
1045
1046 if (opt_info)
1047 {
1048 apply_opt_in_copies (opt_info, max_unroll, true, true);
1049 free_opt_info (opt_info);
1050 }
1051
1052 free (wont_exit);
1053
1054 if (exit_at_end)
1055 {
1056 basic_block exit_block = get_bb_copy (desc->in_edge->src);
1057 /* Find a new in and out edge; they are in the last copy we have
1058 made. */
1059
1060 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1061 {
1062 desc->out_edge = EDGE_SUCC (exit_block, 0);
1063 desc->in_edge = EDGE_SUCC (exit_block, 1);
1064 }
1065 else
1066 {
1067 desc->out_edge = EDGE_SUCC (exit_block, 1);
1068 desc->in_edge = EDGE_SUCC (exit_block, 0);
1069 }
1070 }
1071
1072 /* Remove the edges. */
1073 FOR_EACH_VEC_ELT (remove_edges, i, e)
1074 remove_path (e);
1075
1076 /* We must be careful when updating the number of iterations due to
1077 preconditioning and the fact that the value must be valid at entry
1078 of the loop. After passing through the above code, we see that
1079 the correct new number of iterations is this: */
1080 gcc_assert (!desc->const_iter);
1081 desc->niter_expr =
1082 simplify_gen_binary (UDIV, desc->mode, old_niter,
1083 gen_int_mode (max_unroll + 1, desc->mode));
1084 loop->nb_iterations_upper_bound
1085 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1086 if (loop->any_estimate)
1087 loop->nb_iterations_estimate
1088 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1089 if (exit_at_end)
1090 {
1091 desc->niter_expr =
1092 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1093 desc->noloop_assumptions = NULL_RTX;
1094 --loop->nb_iterations_upper_bound;
1095 if (loop->any_estimate
1096 && loop->nb_iterations_estimate != 0)
1097 --loop->nb_iterations_estimate;
1098 else
1099 loop->any_estimate = false;
1100 }
1101
1102 if (dump_file)
1103 fprintf (dump_file,
1104 ";; Unrolled loop %d times, counting # of iterations "
1105 "in runtime, %i insns\n",
1106 max_unroll, num_loop_insns (loop));
1107 }
1108
1109 /* Decide whether to unroll LOOP stupidly and how much. */
1110 static void
1111 decide_unroll_stupid (struct loop *loop, int flags)
1112 {
1113 unsigned nunroll, nunroll_by_av, i;
1114 struct niter_desc *desc;
1115 widest_int iterations;
1116
1117 if (!(flags & UAP_UNROLL_ALL))
1118 {
1119 /* We were not asked to, just return back silently. */
1120 return;
1121 }
1122
1123 if (dump_file)
1124 fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1125
1126 /* nunroll = total number of copies of the original loop body in
1127 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1128 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1129 nunroll_by_av
1130 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1131 if (nunroll > nunroll_by_av)
1132 nunroll = nunroll_by_av;
1133 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1134 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1135
1136 if (targetm.loop_unroll_adjust)
1137 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1138
1139 /* Skip big loops. */
1140 if (nunroll <= 1)
1141 {
1142 if (dump_file)
1143 fprintf (dump_file, ";; Not considering loop, is too big\n");
1144 return;
1145 }
1146
1147 /* Check for simple loops. */
1148 desc = get_simple_loop_desc (loop);
1149
1150 /* Check simpleness. */
1151 if (desc->simple_p && !desc->assumptions)
1152 {
1153 if (dump_file)
1154 fprintf (dump_file, ";; The loop is simple\n");
1155 return;
1156 }
1157
1158 /* Do not unroll loops with branches inside -- it increases number
1159 of mispredicts.
1160 TODO: this heuristic needs tunning; call inside the loop body
1161 is also relatively good reason to not unroll. */
1162 if (num_loop_branches (loop) > 1)
1163 {
1164 if (dump_file)
1165 fprintf (dump_file, ";; Not unrolling, contains branches\n");
1166 return;
1167 }
1168
1169 /* Check whether the loop rolls. */
1170 if ((get_estimated_loop_iterations (loop, &iterations)
1171 || get_max_loop_iterations (loop, &iterations))
1172 && wi::ltu_p (iterations, 2 * nunroll))
1173 {
1174 if (dump_file)
1175 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1176 return;
1177 }
1178
1179 /* Success. Now force nunroll to be power of 2, as it seems that this
1180 improves results (partially because of better alignments, partially
1181 because of some dark magic). */
1182 for (i = 1; 2 * i <= nunroll; i *= 2)
1183 continue;
1184
1185 loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1186 loop->lpt_decision.times = i - 1;
1187 }
1188
1189 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this:
1190
1191 while (cond)
1192 body;
1193
1194 ==> (LOOP->LPT_DECISION.TIMES == 3)
1195
1196 while (cond)
1197 {
1198 body;
1199 if (!cond) break;
1200 body;
1201 if (!cond) break;
1202 body;
1203 if (!cond) break;
1204 body;
1205 }
1206 */
1207 static void
1208 unroll_loop_stupid (struct loop *loop)
1209 {
1210 sbitmap wont_exit;
1211 unsigned nunroll = loop->lpt_decision.times;
1212 struct niter_desc *desc = get_simple_loop_desc (loop);
1213 struct opt_info *opt_info = NULL;
1214 bool ok;
1215
1216 if (flag_split_ivs_in_unroller
1217 || flag_variable_expansion_in_unroller)
1218 opt_info = analyze_insns_in_loop (loop);
1219
1220
1221 wont_exit = sbitmap_alloc (nunroll + 1);
1222 bitmap_clear (wont_exit);
1223 opt_info_start_duplication (opt_info);
1224
1225 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1226 nunroll, wont_exit,
1227 NULL, NULL,
1228 DLTHE_FLAG_UPDATE_FREQ
1229 | (opt_info
1230 ? DLTHE_RECORD_COPY_NUMBER
1231 : 0));
1232 gcc_assert (ok);
1233
1234 if (opt_info)
1235 {
1236 apply_opt_in_copies (opt_info, nunroll, true, true);
1237 free_opt_info (opt_info);
1238 }
1239
1240 free (wont_exit);
1241
1242 if (desc->simple_p)
1243 {
1244 /* We indeed may get here provided that there are nontrivial assumptions
1245 for a loop to be really simple. We could update the counts, but the
1246 problem is that we are unable to decide which exit will be taken
1247 (not really true in case the number of iterations is constant,
1248 but no one will do anything with this information, so we do not
1249 worry about it). */
1250 desc->simple_p = false;
1251 }
1252
1253 if (dump_file)
1254 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1255 nunroll, num_loop_insns (loop));
1256 }
1257
1258 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1259 Set *DEBUG_USES to the number of debug insns that reference the
1260 variable. */
1261
1262 static bool
1263 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1264 int *debug_uses)
1265 {
1266 basic_block *body, bb;
1267 unsigned i;
1268 int count_ref = 0;
1269 rtx_insn *insn;
1270
1271 body = get_loop_body (loop);
1272 for (i = 0; i < loop->num_nodes; i++)
1273 {
1274 bb = body[i];
1275
1276 FOR_BB_INSNS (bb, insn)
1277 if (!rtx_referenced_p (reg, insn))
1278 continue;
1279 else if (DEBUG_INSN_P (insn))
1280 ++*debug_uses;
1281 else if (++count_ref > 1)
1282 break;
1283 }
1284 free (body);
1285 return (count_ref == 1);
1286 }
1287
1288 /* Reset the DEBUG_USES debug insns in LOOP that reference REG. */
1289
1290 static void
1291 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1292 {
1293 basic_block *body, bb;
1294 unsigned i;
1295 rtx_insn *insn;
1296
1297 body = get_loop_body (loop);
1298 for (i = 0; debug_uses && i < loop->num_nodes; i++)
1299 {
1300 bb = body[i];
1301
1302 FOR_BB_INSNS (bb, insn)
1303 if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1304 continue;
1305 else
1306 {
1307 validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1308 gen_rtx_UNKNOWN_VAR_LOC (), 0);
1309 if (!--debug_uses)
1310 break;
1311 }
1312 }
1313 free (body);
1314 }
1315
1316 /* Determine whether INSN contains an accumulator
1317 which can be expanded into separate copies,
1318 one for each copy of the LOOP body.
1319
1320 for (i = 0 ; i < n; i++)
1321 sum += a[i];
1322
1323 ==>
1324
1325 sum += a[i]
1326 ....
1327 i = i+1;
1328 sum1 += a[i]
1329 ....
1330 i = i+1
1331 sum2 += a[i];
1332 ....
1333
1334 Return NULL if INSN contains no opportunity for expansion of accumulator.
1335 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1336 information and return a pointer to it.
1337 */
1338
1339 static struct var_to_expand *
1340 analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
1341 {
1342 rtx set, dest, src;
1343 struct var_to_expand *ves;
1344 unsigned accum_pos;
1345 enum rtx_code code;
1346 int debug_uses = 0;
1347
1348 set = single_set (insn);
1349 if (!set)
1350 return NULL;
1351
1352 dest = SET_DEST (set);
1353 src = SET_SRC (set);
1354 code = GET_CODE (src);
1355
1356 if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1357 return NULL;
1358
1359 if (FLOAT_MODE_P (GET_MODE (dest)))
1360 {
1361 if (!flag_associative_math)
1362 return NULL;
1363 /* In the case of FMA, we're also changing the rounding. */
1364 if (code == FMA && !flag_unsafe_math_optimizations)
1365 return NULL;
1366 }
1367
1368 /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn
1369 in MD. But if there is no optab to generate the insn, we can not
1370 perform the variable expansion. This can happen if an MD provides
1371 an insn but not a named pattern to generate it, for example to avoid
1372 producing code that needs additional mode switches like for x87/mmx.
1373
1374 So we check have_insn_for which looks for an optab for the operation
1375 in SRC. If it doesn't exist, we can't perform the expansion even
1376 though INSN is valid. */
1377 if (!have_insn_for (code, GET_MODE (src)))
1378 return NULL;
1379
1380 if (!REG_P (dest)
1381 && !(GET_CODE (dest) == SUBREG
1382 && REG_P (SUBREG_REG (dest))))
1383 return NULL;
1384
1385 /* Find the accumulator use within the operation. */
1386 if (code == FMA)
1387 {
1388 /* We only support accumulation via FMA in the ADD position. */
1389 if (!rtx_equal_p (dest, XEXP (src, 2)))
1390 return NULL;
1391 accum_pos = 2;
1392 }
1393 else if (rtx_equal_p (dest, XEXP (src, 0)))
1394 accum_pos = 0;
1395 else if (rtx_equal_p (dest, XEXP (src, 1)))
1396 {
1397 /* The method of expansion that we are using; which includes the
1398 initialization of the expansions with zero and the summation of
1399 the expansions at the end of the computation will yield wrong
1400 results for (x = something - x) thus avoid using it in that case. */
1401 if (code == MINUS)
1402 return NULL;
1403 accum_pos = 1;
1404 }
1405 else
1406 return NULL;
1407
1408 /* It must not otherwise be used. */
1409 if (code == FMA)
1410 {
1411 if (rtx_referenced_p (dest, XEXP (src, 0))
1412 || rtx_referenced_p (dest, XEXP (src, 1)))
1413 return NULL;
1414 }
1415 else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1416 return NULL;
1417
1418 /* It must be used in exactly one insn. */
1419 if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1420 return NULL;
1421
1422 if (dump_file)
1423 {
1424 fprintf (dump_file, "\n;; Expanding Accumulator ");
1425 print_rtl (dump_file, dest);
1426 fprintf (dump_file, "\n");
1427 }
1428
1429 if (debug_uses)
1430 /* Instead of resetting the debug insns, we could replace each
1431 debug use in the loop with the sum or product of all expanded
1432 accummulators. Since we'll only know of all expansions at the
1433 end, we'd have to keep track of which vars_to_expand a debug
1434 insn in the loop references, take note of each copy of the
1435 debug insn during unrolling, and when it's all done, compute
1436 the sum or product of each variable and adjust the original
1437 debug insn and each copy thereof. What a pain! */
1438 reset_debug_uses_in_loop (loop, dest, debug_uses);
1439
1440 /* Record the accumulator to expand. */
1441 ves = XNEW (struct var_to_expand);
1442 ves->insn = insn;
1443 ves->reg = copy_rtx (dest);
1444 ves->var_expansions.create (1);
1445 ves->next = NULL;
1446 ves->op = GET_CODE (src);
1447 ves->expansion_count = 0;
1448 ves->reuse_expansion = 0;
1449 return ves;
1450 }
1451
1452 /* Determine whether there is an induction variable in INSN that
1453 we would like to split during unrolling.
1454
1455 I.e. replace
1456
1457 i = i + 1;
1458 ...
1459 i = i + 1;
1460 ...
1461 i = i + 1;
1462 ...
1463
1464 type chains by
1465
1466 i0 = i + 1
1467 ...
1468 i = i0 + 1
1469 ...
1470 i = i0 + 2
1471 ...
1472
1473 Return NULL if INSN contains no interesting IVs. Otherwise, allocate
1474 an IV_TO_SPLIT structure, fill it with the relevant information and return a
1475 pointer to it. */
1476
1477 static struct iv_to_split *
1478 analyze_iv_to_split_insn (rtx_insn *insn)
1479 {
1480 rtx set, dest;
1481 struct rtx_iv iv;
1482 struct iv_to_split *ivts;
1483 bool ok;
1484
1485 /* For now we just split the basic induction variables. Later this may be
1486 extended for example by selecting also addresses of memory references. */
1487 set = single_set (insn);
1488 if (!set)
1489 return NULL;
1490
1491 dest = SET_DEST (set);
1492 if (!REG_P (dest))
1493 return NULL;
1494
1495 if (!biv_p (insn, dest))
1496 return NULL;
1497
1498 ok = iv_analyze_result (insn, dest, &iv);
1499
1500 /* This used to be an assert under the assumption that if biv_p returns
1501 true that iv_analyze_result must also return true. However, that
1502 assumption is not strictly correct as evidenced by pr25569.
1503
1504 Returning NULL when iv_analyze_result returns false is safe and
1505 avoids the problems in pr25569 until the iv_analyze_* routines
1506 can be fixed, which is apparently hard and time consuming
1507 according to their author. */
1508 if (! ok)
1509 return NULL;
1510
1511 if (iv.step == const0_rtx
1512 || iv.mode != iv.extend_mode)
1513 return NULL;
1514
1515 /* Record the insn to split. */
1516 ivts = XNEW (struct iv_to_split);
1517 ivts->insn = insn;
1518 ivts->orig_var = dest;
1519 ivts->base_var = NULL_RTX;
1520 ivts->step = iv.step;
1521 ivts->next = NULL;
1522
1523 return ivts;
1524 }
1525
1526 /* Determines which of insns in LOOP can be optimized.
1527 Return a OPT_INFO struct with the relevant hash tables filled
1528 with all insns to be optimized. The FIRST_NEW_BLOCK field
1529 is undefined for the return value. */
1530
1531 static struct opt_info *
1532 analyze_insns_in_loop (struct loop *loop)
1533 {
1534 basic_block *body, bb;
1535 unsigned i;
1536 struct opt_info *opt_info = XCNEW (struct opt_info);
1537 rtx_insn *insn;
1538 struct iv_to_split *ivts = NULL;
1539 struct var_to_expand *ves = NULL;
1540 iv_to_split **slot1;
1541 var_to_expand **slot2;
1542 vec<edge> edges = get_loop_exit_edges (loop);
1543 edge exit;
1544 bool can_apply = false;
1545
1546 iv_analysis_loop_init (loop);
1547
1548 body = get_loop_body (loop);
1549
1550 if (flag_split_ivs_in_unroller)
1551 {
1552 opt_info->insns_to_split
1553 = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1554 opt_info->iv_to_split_head = NULL;
1555 opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1556 }
1557
1558 /* Record the loop exit bb and loop preheader before the unrolling. */
1559 opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1560
1561 if (edges.length () == 1)
1562 {
1563 exit = edges[0];
1564 if (!(exit->flags & EDGE_COMPLEX))
1565 {
1566 opt_info->loop_exit = split_edge (exit);
1567 can_apply = true;
1568 }
1569 }
1570
1571 if (flag_variable_expansion_in_unroller
1572 && can_apply)
1573 {
1574 opt_info->insns_with_var_to_expand
1575 = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1576 opt_info->var_to_expand_head = NULL;
1577 opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1578 }
1579
1580 for (i = 0; i < loop->num_nodes; i++)
1581 {
1582 bb = body[i];
1583 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1584 continue;
1585
1586 FOR_BB_INSNS (bb, insn)
1587 {
1588 if (!INSN_P (insn))
1589 continue;
1590
1591 if (opt_info->insns_to_split)
1592 ivts = analyze_iv_to_split_insn (insn);
1593
1594 if (ivts)
1595 {
1596 slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1597 gcc_assert (*slot1 == NULL);
1598 *slot1 = ivts;
1599 *opt_info->iv_to_split_tail = ivts;
1600 opt_info->iv_to_split_tail = &ivts->next;
1601 continue;
1602 }
1603
1604 if (opt_info->insns_with_var_to_expand)
1605 ves = analyze_insn_to_expand_var (loop, insn);
1606
1607 if (ves)
1608 {
1609 slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1610 gcc_assert (*slot2 == NULL);
1611 *slot2 = ves;
1612 *opt_info->var_to_expand_tail = ves;
1613 opt_info->var_to_expand_tail = &ves->next;
1614 }
1615 }
1616 }
1617
1618 edges.release ();
1619 free (body);
1620 return opt_info;
1621 }
1622
1623 /* Called just before loop duplication. Records start of duplicated area
1624 to OPT_INFO. */
1625
1626 static void
1627 opt_info_start_duplication (struct opt_info *opt_info)
1628 {
1629 if (opt_info)
1630 opt_info->first_new_block = last_basic_block_for_fn (cfun);
1631 }
1632
1633 /* Determine the number of iterations between initialization of the base
1634 variable and the current copy (N_COPY). N_COPIES is the total number
1635 of newly created copies. UNROLLING is true if we are unrolling
1636 (not peeling) the loop. */
1637
1638 static unsigned
1639 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1640 {
1641 if (unrolling)
1642 {
1643 /* If we are unrolling, initialization is done in the original loop
1644 body (number 0). */
1645 return n_copy;
1646 }
1647 else
1648 {
1649 /* If we are peeling, the copy in that the initialization occurs has
1650 number 1. The original loop (number 0) is the last. */
1651 if (n_copy)
1652 return n_copy - 1;
1653 else
1654 return n_copies;
1655 }
1656 }
1657
1658 /* Allocate basic variable for the induction variable chain. */
1659
1660 static void
1661 allocate_basic_variable (struct iv_to_split *ivts)
1662 {
1663 rtx expr = SET_SRC (single_set (ivts->insn));
1664
1665 ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1666 }
1667
1668 /* Insert initialization of basic variable of IVTS before INSN, taking
1669 the initial value from INSN. */
1670
1671 static void
1672 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1673 {
1674 rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1675 rtx_insn *seq;
1676
1677 start_sequence ();
1678 expr = force_operand (expr, ivts->base_var);
1679 if (expr != ivts->base_var)
1680 emit_move_insn (ivts->base_var, expr);
1681 seq = get_insns ();
1682 end_sequence ();
1683
1684 emit_insn_before (seq, insn);
1685 }
1686
1687 /* Replace the use of induction variable described in IVTS in INSN
1688 by base variable + DELTA * step. */
1689
1690 static void
1691 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1692 {
1693 rtx expr, *loc, incr, var;
1694 rtx_insn *seq;
1695 machine_mode mode = GET_MODE (ivts->base_var);
1696 rtx src, dest, set;
1697
1698 /* Construct base + DELTA * step. */
1699 if (!delta)
1700 expr = ivts->base_var;
1701 else
1702 {
1703 incr = simplify_gen_binary (MULT, mode,
1704 ivts->step, gen_int_mode (delta, mode));
1705 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1706 ivts->base_var, incr);
1707 }
1708
1709 /* Figure out where to do the replacement. */
1710 loc = &SET_SRC (single_set (insn));
1711
1712 /* If we can make the replacement right away, we're done. */
1713 if (validate_change (insn, loc, expr, 0))
1714 return;
1715
1716 /* Otherwise, force EXPR into a register and try again. */
1717 start_sequence ();
1718 var = gen_reg_rtx (mode);
1719 expr = force_operand (expr, var);
1720 if (expr != var)
1721 emit_move_insn (var, expr);
1722 seq = get_insns ();
1723 end_sequence ();
1724 emit_insn_before (seq, insn);
1725
1726 if (validate_change (insn, loc, var, 0))
1727 return;
1728
1729 /* The last chance. Try recreating the assignment in insn
1730 completely from scratch. */
1731 set = single_set (insn);
1732 gcc_assert (set);
1733
1734 start_sequence ();
1735 *loc = var;
1736 src = copy_rtx (SET_SRC (set));
1737 dest = copy_rtx (SET_DEST (set));
1738 src = force_operand (src, dest);
1739 if (src != dest)
1740 emit_move_insn (dest, src);
1741 seq = get_insns ();
1742 end_sequence ();
1743
1744 emit_insn_before (seq, insn);
1745 delete_insn (insn);
1746 }
1747
1748
1749 /* Return one expansion of the accumulator recorded in struct VE. */
1750
1751 static rtx
1752 get_expansion (struct var_to_expand *ve)
1753 {
1754 rtx reg;
1755
1756 if (ve->reuse_expansion == 0)
1757 reg = ve->reg;
1758 else
1759 reg = ve->var_expansions[ve->reuse_expansion - 1];
1760
1761 if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
1762 ve->reuse_expansion = 0;
1763 else
1764 ve->reuse_expansion++;
1765
1766 return reg;
1767 }
1768
1769
1770 /* Given INSN replace the uses of the accumulator recorded in VE
1771 with a new register. */
1772
1773 static void
1774 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
1775 {
1776 rtx new_reg, set;
1777 bool really_new_expansion = false;
1778
1779 set = single_set (insn);
1780 gcc_assert (set);
1781
1782 /* Generate a new register only if the expansion limit has not been
1783 reached. Else reuse an already existing expansion. */
1784 if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1785 {
1786 really_new_expansion = true;
1787 new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1788 }
1789 else
1790 new_reg = get_expansion (ve);
1791
1792 validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
1793 if (apply_change_group ())
1794 if (really_new_expansion)
1795 {
1796 ve->var_expansions.safe_push (new_reg);
1797 ve->expansion_count++;
1798 }
1799 }
1800
1801 /* Initialize the variable expansions in loop preheader. PLACE is the
1802 loop-preheader basic block where the initialization of the
1803 expansions should take place. The expansions are initialized with
1804 (-0) when the operation is plus or minus to honor sign zero. This
1805 way we can prevent cases where the sign of the final result is
1806 effected by the sign of the expansion. Here is an example to
1807 demonstrate this:
1808
1809 for (i = 0 ; i < n; i++)
1810 sum += something;
1811
1812 ==>
1813
1814 sum += something
1815 ....
1816 i = i+1;
1817 sum1 += something
1818 ....
1819 i = i+1
1820 sum2 += something;
1821 ....
1822
1823 When SUM is initialized with -zero and SOMETHING is also -zero; the
1824 final result of sum should be -zero thus the expansions sum1 and sum2
1825 should be initialized with -zero as well (otherwise we will get +zero
1826 as the final result). */
1827
1828 static void
1829 insert_var_expansion_initialization (struct var_to_expand *ve,
1830 basic_block place)
1831 {
1832 rtx_insn *seq;
1833 rtx var, zero_init;
1834 unsigned i;
1835 machine_mode mode = GET_MODE (ve->reg);
1836 bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1837
1838 if (ve->var_expansions.length () == 0)
1839 return;
1840
1841 start_sequence ();
1842 switch (ve->op)
1843 {
1844 case FMA:
1845 /* Note that we only accumulate FMA via the ADD operand. */
1846 case PLUS:
1847 case MINUS:
1848 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1849 {
1850 if (honor_signed_zero_p)
1851 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1852 else
1853 zero_init = CONST0_RTX (mode);
1854 emit_move_insn (var, zero_init);
1855 }
1856 break;
1857
1858 case MULT:
1859 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1860 {
1861 zero_init = CONST1_RTX (GET_MODE (var));
1862 emit_move_insn (var, zero_init);
1863 }
1864 break;
1865
1866 default:
1867 gcc_unreachable ();
1868 }
1869
1870 seq = get_insns ();
1871 end_sequence ();
1872
1873 emit_insn_after (seq, BB_END (place));
1874 }
1875
1876 /* Combine the variable expansions at the loop exit. PLACE is the
1877 loop exit basic block where the summation of the expansions should
1878 take place. */
1879
1880 static void
1881 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
1882 {
1883 rtx sum = ve->reg;
1884 rtx expr, var;
1885 rtx_insn *seq, *insn;
1886 unsigned i;
1887
1888 if (ve->var_expansions.length () == 0)
1889 return;
1890
1891 start_sequence ();
1892 switch (ve->op)
1893 {
1894 case FMA:
1895 /* Note that we only accumulate FMA via the ADD operand. */
1896 case PLUS:
1897 case MINUS:
1898 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1899 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1900 break;
1901
1902 case MULT:
1903 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1904 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1905 break;
1906
1907 default:
1908 gcc_unreachable ();
1909 }
1910
1911 expr = force_operand (sum, ve->reg);
1912 if (expr != ve->reg)
1913 emit_move_insn (ve->reg, expr);
1914 seq = get_insns ();
1915 end_sequence ();
1916
1917 insn = BB_HEAD (place);
1918 while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1919 insn = NEXT_INSN (insn);
1920
1921 emit_insn_after (seq, insn);
1922 }
1923
1924 /* Strip away REG_EQUAL notes for IVs we're splitting.
1925
1926 Updating REG_EQUAL notes for IVs we split is tricky: We
1927 cannot tell until after unrolling, DF-rescanning, and liveness
1928 updating, whether an EQ_USE is reached by the split IV while
1929 the IV reg is still live. See PR55006.
1930
1931 ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1932 because RTL loop-iv requires us to defer rescanning insns and
1933 any notes attached to them. So resort to old techniques... */
1934
1935 static void
1936 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1937 {
1938 struct iv_to_split *ivts;
1939 rtx note = find_reg_equal_equiv_note (insn);
1940 if (! note)
1941 return;
1942 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1943 if (reg_mentioned_p (ivts->orig_var, note))
1944 {
1945 remove_note (insn, note);
1946 return;
1947 }
1948 }
1949
1950 /* Apply loop optimizations in loop copies using the
1951 data which gathered during the unrolling. Structure
1952 OPT_INFO record that data.
1953
1954 UNROLLING is true if we unrolled (not peeled) the loop.
1955 REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1956 the loop (as it should happen in complete unrolling, but not in ordinary
1957 peeling of the loop). */
1958
1959 static void
1960 apply_opt_in_copies (struct opt_info *opt_info,
1961 unsigned n_copies, bool unrolling,
1962 bool rewrite_original_loop)
1963 {
1964 unsigned i, delta;
1965 basic_block bb, orig_bb;
1966 rtx_insn *insn, *orig_insn, *next;
1967 struct iv_to_split ivts_templ, *ivts;
1968 struct var_to_expand ve_templ, *ves;
1969
1970 /* Sanity check -- we need to put initialization in the original loop
1971 body. */
1972 gcc_assert (!unrolling || rewrite_original_loop);
1973
1974 /* Allocate the basic variables (i0). */
1975 if (opt_info->insns_to_split)
1976 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1977 allocate_basic_variable (ivts);
1978
1979 for (i = opt_info->first_new_block;
1980 i < (unsigned) last_basic_block_for_fn (cfun);
1981 i++)
1982 {
1983 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1984 orig_bb = get_bb_original (bb);
1985
1986 /* bb->aux holds position in copy sequence initialized by
1987 duplicate_loop_to_header_edge. */
1988 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
1989 unrolling);
1990 bb->aux = 0;
1991 orig_insn = BB_HEAD (orig_bb);
1992 FOR_BB_INSNS_SAFE (bb, insn, next)
1993 {
1994 if (!INSN_P (insn)
1995 || (DEBUG_INSN_P (insn)
1996 && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
1997 continue;
1998
1999 while (!INSN_P (orig_insn)
2000 || (DEBUG_INSN_P (orig_insn)
2001 && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
2002 == LABEL_DECL)))
2003 orig_insn = NEXT_INSN (orig_insn);
2004
2005 ivts_templ.insn = orig_insn;
2006 ve_templ.insn = orig_insn;
2007
2008 /* Apply splitting iv optimization. */
2009 if (opt_info->insns_to_split)
2010 {
2011 maybe_strip_eq_note_for_split_iv (opt_info, insn);
2012
2013 ivts = opt_info->insns_to_split->find (&ivts_templ);
2014
2015 if (ivts)
2016 {
2017 gcc_assert (GET_CODE (PATTERN (insn))
2018 == GET_CODE (PATTERN (orig_insn)));
2019
2020 if (!delta)
2021 insert_base_initialization (ivts, insn);
2022 split_iv (ivts, insn, delta);
2023 }
2024 }
2025 /* Apply variable expansion optimization. */
2026 if (unrolling && opt_info->insns_with_var_to_expand)
2027 {
2028 ves = (struct var_to_expand *)
2029 opt_info->insns_with_var_to_expand->find (&ve_templ);
2030 if (ves)
2031 {
2032 gcc_assert (GET_CODE (PATTERN (insn))
2033 == GET_CODE (PATTERN (orig_insn)));
2034 expand_var_during_unrolling (ves, insn);
2035 }
2036 }
2037 orig_insn = NEXT_INSN (orig_insn);
2038 }
2039 }
2040
2041 if (!rewrite_original_loop)
2042 return;
2043
2044 /* Initialize the variable expansions in the loop preheader
2045 and take care of combining them at the loop exit. */
2046 if (opt_info->insns_with_var_to_expand)
2047 {
2048 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2049 insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2050 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2051 combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2052 }
2053
2054 /* Rewrite also the original loop body. Find them as originals of the blocks
2055 in the last copied iteration, i.e. those that have
2056 get_bb_copy (get_bb_original (bb)) == bb. */
2057 for (i = opt_info->first_new_block;
2058 i < (unsigned) last_basic_block_for_fn (cfun);
2059 i++)
2060 {
2061 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2062 orig_bb = get_bb_original (bb);
2063 if (get_bb_copy (orig_bb) != bb)
2064 continue;
2065
2066 delta = determine_split_iv_delta (0, n_copies, unrolling);
2067 for (orig_insn = BB_HEAD (orig_bb);
2068 orig_insn != NEXT_INSN (BB_END (bb));
2069 orig_insn = next)
2070 {
2071 next = NEXT_INSN (orig_insn);
2072
2073 if (!INSN_P (orig_insn))
2074 continue;
2075
2076 ivts_templ.insn = orig_insn;
2077 if (opt_info->insns_to_split)
2078 {
2079 maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2080
2081 ivts = (struct iv_to_split *)
2082 opt_info->insns_to_split->find (&ivts_templ);
2083 if (ivts)
2084 {
2085 if (!delta)
2086 insert_base_initialization (ivts, orig_insn);
2087 split_iv (ivts, orig_insn, delta);
2088 continue;
2089 }
2090 }
2091
2092 }
2093 }
2094 }
2095
2096 /* Release OPT_INFO. */
2097
2098 static void
2099 free_opt_info (struct opt_info *opt_info)
2100 {
2101 delete opt_info->insns_to_split;
2102 opt_info->insns_to_split = NULL;
2103 if (opt_info->insns_with_var_to_expand)
2104 {
2105 struct var_to_expand *ves;
2106
2107 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2108 ves->var_expansions.release ();
2109 delete opt_info->insns_with_var_to_expand;
2110 opt_info->insns_with_var_to_expand = NULL;
2111 }
2112 free (opt_info);
2113 }