decl.c, [...]: Remove redundant enum from machine_mode.
[gcc.git] / gcc / loop-unroll.c
1 /* Loop unrolling.
2 Copyright (C) 2002-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "hard-reg-set.h"
27 #include "obstack.h"
28 #include "profile.h"
29 #include "predict.h"
30 #include "vec.h"
31 #include "hashtab.h"
32 #include "hash-set.h"
33 #include "machmode.h"
34 #include "input.h"
35 #include "function.h"
36 #include "dominance.h"
37 #include "cfg.h"
38 #include "cfgrtl.h"
39 #include "basic-block.h"
40 #include "cfgloop.h"
41 #include "params.h"
42 #include "expr.h"
43 #include "hash-table.h"
44 #include "recog.h"
45 #include "target.h"
46 #include "dumpfile.h"
47
48 /* This pass performs loop unrolling. We only perform this
49 optimization on innermost loops (with single exception) because
50 the impact on performance is greatest here, and we want to avoid
51 unnecessary code size growth. The gain is caused by greater sequentiality
52 of code, better code to optimize for further passes and in some cases
53 by fewer testings of exit conditions. The main problem is code growth,
54 that impacts performance negatively due to effect of caches.
55
56 What we do:
57
58 -- unrolling of loops that roll constant times; this is almost always
59 win, as we get rid of exit condition tests.
60 -- unrolling of loops that roll number of times that we can compute
61 in runtime; we also get rid of exit condition tests here, but there
62 is the extra expense for calculating the number of iterations
63 -- simple unrolling of remaining loops; this is performed only if we
64 are asked to, as the gain is questionable in this case and often
65 it may even slow down the code
66 For more detailed descriptions of each of those, see comments at
67 appropriate function below.
68
69 There is a lot of parameters (defined and described in params.def) that
70 control how much we unroll.
71
72 ??? A great problem is that we don't have a good way how to determine
73 how many times we should unroll the loop; the experiments I have made
74 showed that this choice may affect performance in order of several %.
75 */
76
77 /* Information about induction variables to split. */
78
79 struct iv_to_split
80 {
81 rtx_insn *insn; /* The insn in that the induction variable occurs. */
82 rtx orig_var; /* The variable (register) for the IV before split. */
83 rtx base_var; /* The variable on that the values in the further
84 iterations are based. */
85 rtx step; /* Step of the induction variable. */
86 struct iv_to_split *next; /* Next entry in walking order. */
87 };
88
89 /* Information about accumulators to expand. */
90
91 struct var_to_expand
92 {
93 rtx_insn *insn; /* The insn in that the variable expansion occurs. */
94 rtx reg; /* The accumulator which is expanded. */
95 vec<rtx> var_expansions; /* The copies of the accumulator which is expanded. */
96 struct var_to_expand *next; /* Next entry in walking order. */
97 enum rtx_code op; /* The type of the accumulation - addition, subtraction
98 or multiplication. */
99 int expansion_count; /* Count the number of expansions generated so far. */
100 int reuse_expansion; /* The expansion we intend to reuse to expand
101 the accumulator. If REUSE_EXPANSION is 0 reuse
102 the original accumulator. Else use
103 var_expansions[REUSE_EXPANSION - 1]. */
104 };
105
106 /* Hashtable helper for iv_to_split. */
107
108 struct iv_split_hasher : typed_free_remove <iv_to_split>
109 {
110 typedef iv_to_split value_type;
111 typedef iv_to_split compare_type;
112 static inline hashval_t hash (const value_type *);
113 static inline bool equal (const value_type *, const compare_type *);
114 };
115
116
117 /* A hash function for information about insns to split. */
118
119 inline hashval_t
120 iv_split_hasher::hash (const value_type *ivts)
121 {
122 return (hashval_t) INSN_UID (ivts->insn);
123 }
124
125 /* An equality functions for information about insns to split. */
126
127 inline bool
128 iv_split_hasher::equal (const value_type *i1, const compare_type *i2)
129 {
130 return i1->insn == i2->insn;
131 }
132
133 /* Hashtable helper for iv_to_split. */
134
135 struct var_expand_hasher : typed_free_remove <var_to_expand>
136 {
137 typedef var_to_expand value_type;
138 typedef var_to_expand compare_type;
139 static inline hashval_t hash (const value_type *);
140 static inline bool equal (const value_type *, const compare_type *);
141 };
142
143 /* Return a hash for VES. */
144
145 inline hashval_t
146 var_expand_hasher::hash (const value_type *ves)
147 {
148 return (hashval_t) INSN_UID (ves->insn);
149 }
150
151 /* Return true if I1 and I2 refer to the same instruction. */
152
153 inline bool
154 var_expand_hasher::equal (const value_type *i1, const compare_type *i2)
155 {
156 return i1->insn == i2->insn;
157 }
158
159 /* Information about optimization applied in
160 the unrolled loop. */
161
162 struct opt_info
163 {
164 hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
165 split. */
166 struct iv_to_split *iv_to_split_head; /* The first iv to split. */
167 struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */
168 hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
169 insns with accumulators to expand. */
170 struct var_to_expand *var_to_expand_head; /* The first var to expand. */
171 struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */
172 unsigned first_new_block; /* The first basic block that was
173 duplicated. */
174 basic_block loop_exit; /* The loop exit basic block. */
175 basic_block loop_preheader; /* The loop preheader basic block. */
176 };
177
178 static void decide_unroll_stupid (struct loop *, int);
179 static void decide_unroll_constant_iterations (struct loop *, int);
180 static void decide_unroll_runtime_iterations (struct loop *, int);
181 static void unroll_loop_stupid (struct loop *);
182 static void decide_unrolling (int);
183 static void unroll_loop_constant_iterations (struct loop *);
184 static void unroll_loop_runtime_iterations (struct loop *);
185 static struct opt_info *analyze_insns_in_loop (struct loop *);
186 static void opt_info_start_duplication (struct opt_info *);
187 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
188 static void free_opt_info (struct opt_info *);
189 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
190 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
191 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
192 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
193 static void insert_var_expansion_initialization (struct var_to_expand *,
194 basic_block);
195 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
196 basic_block);
197 static rtx get_expansion (struct var_to_expand *);
198
199 /* Emit a message summarizing the unroll that will be
200 performed for LOOP, along with the loop's location LOCUS, if
201 appropriate given the dump or -fopt-info settings. */
202
203 static void
204 report_unroll (struct loop *loop, location_t locus)
205 {
206 int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
207
208 if (loop->lpt_decision.decision == LPT_NONE)
209 return;
210
211 if (!dump_enabled_p ())
212 return;
213
214 dump_printf_loc (report_flags, locus,
215 "loop unrolled %d times",
216 loop->lpt_decision.times);
217 if (profile_info)
218 dump_printf (report_flags,
219 " (header execution count %d)",
220 (int)loop->header->count);
221
222 dump_printf (report_flags, "\n");
223 }
224
225 /* Decide whether unroll loops and how much. */
226 static void
227 decide_unrolling (int flags)
228 {
229 struct loop *loop;
230
231 /* Scan the loops, inner ones first. */
232 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
233 {
234 loop->lpt_decision.decision = LPT_NONE;
235 location_t locus = get_loop_location (loop);
236
237 if (dump_enabled_p ())
238 dump_printf_loc (TDF_RTL, locus,
239 ";; *** Considering loop %d at BB %d for "
240 "unrolling ***\n",
241 loop->num, loop->header->index);
242
243 /* Do not peel cold areas. */
244 if (optimize_loop_for_size_p (loop))
245 {
246 if (dump_file)
247 fprintf (dump_file, ";; Not considering loop, cold area\n");
248 continue;
249 }
250
251 /* Can the loop be manipulated? */
252 if (!can_duplicate_loop_p (loop))
253 {
254 if (dump_file)
255 fprintf (dump_file,
256 ";; Not considering loop, cannot duplicate\n");
257 continue;
258 }
259
260 /* Skip non-innermost loops. */
261 if (loop->inner)
262 {
263 if (dump_file)
264 fprintf (dump_file, ";; Not considering loop, is not innermost\n");
265 continue;
266 }
267
268 loop->ninsns = num_loop_insns (loop);
269 loop->av_ninsns = average_num_loop_insns (loop);
270
271 /* Try transformations one by one in decreasing order of
272 priority. */
273
274 decide_unroll_constant_iterations (loop, flags);
275 if (loop->lpt_decision.decision == LPT_NONE)
276 decide_unroll_runtime_iterations (loop, flags);
277 if (loop->lpt_decision.decision == LPT_NONE)
278 decide_unroll_stupid (loop, flags);
279
280 report_unroll (loop, locus);
281 }
282 }
283
284 /* Unroll LOOPS. */
285 void
286 unroll_loops (int flags)
287 {
288 struct loop *loop;
289 bool changed = false;
290
291 /* Now decide rest of unrolling. */
292 decide_unrolling (flags);
293
294 /* Scan the loops, inner ones first. */
295 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
296 {
297 /* And perform the appropriate transformations. */
298 switch (loop->lpt_decision.decision)
299 {
300 case LPT_UNROLL_CONSTANT:
301 unroll_loop_constant_iterations (loop);
302 changed = true;
303 break;
304 case LPT_UNROLL_RUNTIME:
305 unroll_loop_runtime_iterations (loop);
306 changed = true;
307 break;
308 case LPT_UNROLL_STUPID:
309 unroll_loop_stupid (loop);
310 changed = true;
311 break;
312 case LPT_NONE:
313 break;
314 default:
315 gcc_unreachable ();
316 }
317 }
318
319 if (changed)
320 {
321 calculate_dominance_info (CDI_DOMINATORS);
322 fix_loop_structure (NULL);
323 }
324
325 iv_analysis_done ();
326 }
327
328 /* Check whether exit of the LOOP is at the end of loop body. */
329
330 static bool
331 loop_exit_at_end_p (struct loop *loop)
332 {
333 struct niter_desc *desc = get_simple_loop_desc (loop);
334 rtx_insn *insn;
335
336 /* We should never have conditional in latch block. */
337 gcc_assert (desc->in_edge->dest != loop->header);
338
339 if (desc->in_edge->dest != loop->latch)
340 return false;
341
342 /* Check that the latch is empty. */
343 FOR_BB_INSNS (loop->latch, insn)
344 {
345 if (INSN_P (insn) && active_insn_p (insn))
346 return false;
347 }
348
349 return true;
350 }
351
352 /* Decide whether to unroll LOOP iterating constant number of times
353 and how much. */
354
355 static void
356 decide_unroll_constant_iterations (struct loop *loop, int flags)
357 {
358 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
359 struct niter_desc *desc;
360 widest_int iterations;
361
362 if (!(flags & UAP_UNROLL))
363 {
364 /* We were not asked to, just return back silently. */
365 return;
366 }
367
368 if (dump_file)
369 fprintf (dump_file,
370 "\n;; Considering unrolling loop with constant "
371 "number of iterations\n");
372
373 /* nunroll = total number of copies of the original loop body in
374 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
375 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
376 nunroll_by_av
377 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
378 if (nunroll > nunroll_by_av)
379 nunroll = nunroll_by_av;
380 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
381 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
382
383 if (targetm.loop_unroll_adjust)
384 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
385
386 /* Skip big loops. */
387 if (nunroll <= 1)
388 {
389 if (dump_file)
390 fprintf (dump_file, ";; Not considering loop, is too big\n");
391 return;
392 }
393
394 /* Check for simple loops. */
395 desc = get_simple_loop_desc (loop);
396
397 /* Check number of iterations. */
398 if (!desc->simple_p || !desc->const_iter || desc->assumptions)
399 {
400 if (dump_file)
401 fprintf (dump_file,
402 ";; Unable to prove that the loop iterates constant times\n");
403 return;
404 }
405
406 /* Check whether the loop rolls enough to consider.
407 Consult also loop bounds and profile; in the case the loop has more
408 than one exit it may well loop less than determined maximal number
409 of iterations. */
410 if (desc->niter < 2 * nunroll
411 || ((get_estimated_loop_iterations (loop, &iterations)
412 || get_max_loop_iterations (loop, &iterations))
413 && wi::ltu_p (iterations, 2 * nunroll)))
414 {
415 if (dump_file)
416 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
417 return;
418 }
419
420 /* Success; now compute number of iterations to unroll. We alter
421 nunroll so that as few as possible copies of loop body are
422 necessary, while still not decreasing the number of unrollings
423 too much (at most by 1). */
424 best_copies = 2 * nunroll + 10;
425
426 i = 2 * nunroll + 2;
427 if (i - 1 >= desc->niter)
428 i = desc->niter - 2;
429
430 for (; i >= nunroll - 1; i--)
431 {
432 unsigned exit_mod = desc->niter % (i + 1);
433
434 if (!loop_exit_at_end_p (loop))
435 n_copies = exit_mod + i + 1;
436 else if (exit_mod != (unsigned) i
437 || desc->noloop_assumptions != NULL_RTX)
438 n_copies = exit_mod + i + 2;
439 else
440 n_copies = i + 1;
441
442 if (n_copies < best_copies)
443 {
444 best_copies = n_copies;
445 best_unroll = i;
446 }
447 }
448
449 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
450 loop->lpt_decision.times = best_unroll;
451 }
452
453 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
454 The transformation does this:
455
456 for (i = 0; i < 102; i++)
457 body;
458
459 ==> (LOOP->LPT_DECISION.TIMES == 3)
460
461 i = 0;
462 body; i++;
463 body; i++;
464 while (i < 102)
465 {
466 body; i++;
467 body; i++;
468 body; i++;
469 body; i++;
470 }
471 */
472 static void
473 unroll_loop_constant_iterations (struct loop *loop)
474 {
475 unsigned HOST_WIDE_INT niter;
476 unsigned exit_mod;
477 sbitmap wont_exit;
478 unsigned i;
479 edge e;
480 unsigned max_unroll = loop->lpt_decision.times;
481 struct niter_desc *desc = get_simple_loop_desc (loop);
482 bool exit_at_end = loop_exit_at_end_p (loop);
483 struct opt_info *opt_info = NULL;
484 bool ok;
485
486 niter = desc->niter;
487
488 /* Should not get here (such loop should be peeled instead). */
489 gcc_assert (niter > max_unroll + 1);
490
491 exit_mod = niter % (max_unroll + 1);
492
493 wont_exit = sbitmap_alloc (max_unroll + 1);
494 bitmap_ones (wont_exit);
495
496 auto_vec<edge> remove_edges;
497 if (flag_split_ivs_in_unroller
498 || flag_variable_expansion_in_unroller)
499 opt_info = analyze_insns_in_loop (loop);
500
501 if (!exit_at_end)
502 {
503 /* The exit is not at the end of the loop; leave exit test
504 in the first copy, so that the loops that start with test
505 of exit condition have continuous body after unrolling. */
506
507 if (dump_file)
508 fprintf (dump_file, ";; Condition at beginning of loop.\n");
509
510 /* Peel exit_mod iterations. */
511 bitmap_clear_bit (wont_exit, 0);
512 if (desc->noloop_assumptions)
513 bitmap_clear_bit (wont_exit, 1);
514
515 if (exit_mod)
516 {
517 opt_info_start_duplication (opt_info);
518 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
519 exit_mod,
520 wont_exit, desc->out_edge,
521 &remove_edges,
522 DLTHE_FLAG_UPDATE_FREQ
523 | (opt_info && exit_mod > 1
524 ? DLTHE_RECORD_COPY_NUMBER
525 : 0));
526 gcc_assert (ok);
527
528 if (opt_info && exit_mod > 1)
529 apply_opt_in_copies (opt_info, exit_mod, false, false);
530
531 desc->noloop_assumptions = NULL_RTX;
532 desc->niter -= exit_mod;
533 loop->nb_iterations_upper_bound -= exit_mod;
534 if (loop->any_estimate
535 && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
536 loop->nb_iterations_estimate -= exit_mod;
537 else
538 loop->any_estimate = false;
539 }
540
541 bitmap_set_bit (wont_exit, 1);
542 }
543 else
544 {
545 /* Leave exit test in last copy, for the same reason as above if
546 the loop tests the condition at the end of loop body. */
547
548 if (dump_file)
549 fprintf (dump_file, ";; Condition at end of loop.\n");
550
551 /* We know that niter >= max_unroll + 2; so we do not need to care of
552 case when we would exit before reaching the loop. So just peel
553 exit_mod + 1 iterations. */
554 if (exit_mod != max_unroll
555 || desc->noloop_assumptions)
556 {
557 bitmap_clear_bit (wont_exit, 0);
558 if (desc->noloop_assumptions)
559 bitmap_clear_bit (wont_exit, 1);
560
561 opt_info_start_duplication (opt_info);
562 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
563 exit_mod + 1,
564 wont_exit, desc->out_edge,
565 &remove_edges,
566 DLTHE_FLAG_UPDATE_FREQ
567 | (opt_info && exit_mod > 0
568 ? DLTHE_RECORD_COPY_NUMBER
569 : 0));
570 gcc_assert (ok);
571
572 if (opt_info && exit_mod > 0)
573 apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
574
575 desc->niter -= exit_mod + 1;
576 loop->nb_iterations_upper_bound -= exit_mod + 1;
577 if (loop->any_estimate
578 && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
579 loop->nb_iterations_estimate -= exit_mod + 1;
580 else
581 loop->any_estimate = false;
582 desc->noloop_assumptions = NULL_RTX;
583
584 bitmap_set_bit (wont_exit, 0);
585 bitmap_set_bit (wont_exit, 1);
586 }
587
588 bitmap_clear_bit (wont_exit, max_unroll);
589 }
590
591 /* Now unroll the loop. */
592
593 opt_info_start_duplication (opt_info);
594 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
595 max_unroll,
596 wont_exit, desc->out_edge,
597 &remove_edges,
598 DLTHE_FLAG_UPDATE_FREQ
599 | (opt_info
600 ? DLTHE_RECORD_COPY_NUMBER
601 : 0));
602 gcc_assert (ok);
603
604 if (opt_info)
605 {
606 apply_opt_in_copies (opt_info, max_unroll, true, true);
607 free_opt_info (opt_info);
608 }
609
610 free (wont_exit);
611
612 if (exit_at_end)
613 {
614 basic_block exit_block = get_bb_copy (desc->in_edge->src);
615 /* Find a new in and out edge; they are in the last copy we have made. */
616
617 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
618 {
619 desc->out_edge = EDGE_SUCC (exit_block, 0);
620 desc->in_edge = EDGE_SUCC (exit_block, 1);
621 }
622 else
623 {
624 desc->out_edge = EDGE_SUCC (exit_block, 1);
625 desc->in_edge = EDGE_SUCC (exit_block, 0);
626 }
627 }
628
629 desc->niter /= max_unroll + 1;
630 loop->nb_iterations_upper_bound
631 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
632 if (loop->any_estimate)
633 loop->nb_iterations_estimate
634 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
635 desc->niter_expr = GEN_INT (desc->niter);
636
637 /* Remove the edges. */
638 FOR_EACH_VEC_ELT (remove_edges, i, e)
639 remove_path (e);
640
641 if (dump_file)
642 fprintf (dump_file,
643 ";; Unrolled loop %d times, constant # of iterations %i insns\n",
644 max_unroll, num_loop_insns (loop));
645 }
646
647 /* Decide whether to unroll LOOP iterating runtime computable number of times
648 and how much. */
649 static void
650 decide_unroll_runtime_iterations (struct loop *loop, int flags)
651 {
652 unsigned nunroll, nunroll_by_av, i;
653 struct niter_desc *desc;
654 widest_int iterations;
655
656 if (!(flags & UAP_UNROLL))
657 {
658 /* We were not asked to, just return back silently. */
659 return;
660 }
661
662 if (dump_file)
663 fprintf (dump_file,
664 "\n;; Considering unrolling loop with runtime "
665 "computable number of iterations\n");
666
667 /* nunroll = total number of copies of the original loop body in
668 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
669 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
670 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
671 if (nunroll > nunroll_by_av)
672 nunroll = nunroll_by_av;
673 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
674 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
675
676 if (targetm.loop_unroll_adjust)
677 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
678
679 /* Skip big loops. */
680 if (nunroll <= 1)
681 {
682 if (dump_file)
683 fprintf (dump_file, ";; Not considering loop, is too big\n");
684 return;
685 }
686
687 /* Check for simple loops. */
688 desc = get_simple_loop_desc (loop);
689
690 /* Check simpleness. */
691 if (!desc->simple_p || desc->assumptions)
692 {
693 if (dump_file)
694 fprintf (dump_file,
695 ";; Unable to prove that the number of iterations "
696 "can be counted in runtime\n");
697 return;
698 }
699
700 if (desc->const_iter)
701 {
702 if (dump_file)
703 fprintf (dump_file, ";; Loop iterates constant times\n");
704 return;
705 }
706
707 /* Check whether the loop rolls. */
708 if ((get_estimated_loop_iterations (loop, &iterations)
709 || get_max_loop_iterations (loop, &iterations))
710 && wi::ltu_p (iterations, 2 * nunroll))
711 {
712 if (dump_file)
713 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
714 return;
715 }
716
717 /* Success; now force nunroll to be power of 2, as we are unable to
718 cope with overflows in computation of number of iterations. */
719 for (i = 1; 2 * i <= nunroll; i *= 2)
720 continue;
721
722 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
723 loop->lpt_decision.times = i - 1;
724 }
725
726 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
727 returns the newly created block. If INSNS is NULL_RTX, nothing is changed
728 and NULL is returned instead. */
729
730 basic_block
731 split_edge_and_insert (edge e, rtx_insn *insns)
732 {
733 basic_block bb;
734
735 if (!insns)
736 return NULL;
737 bb = split_edge (e);
738 emit_insn_after (insns, BB_END (bb));
739
740 /* ??? We used to assume that INSNS can contain control flow insns, and
741 that we had to try to find sub basic blocks in BB to maintain a valid
742 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB
743 and call break_superblocks when going out of cfglayout mode. But it
744 turns out that this never happens; and that if it does ever happen,
745 the verify_flow_info at the end of the RTL loop passes would fail.
746
747 There are two reasons why we expected we could have control flow insns
748 in INSNS. The first is when a comparison has to be done in parts, and
749 the second is when the number of iterations is computed for loops with
750 the number of iterations known at runtime. In both cases, test cases
751 to get control flow in INSNS appear to be impossible to construct:
752
753 * If do_compare_rtx_and_jump needs several branches to do comparison
754 in a mode that needs comparison by parts, we cannot analyze the
755 number of iterations of the loop, and we never get to unrolling it.
756
757 * The code in expand_divmod that was suspected to cause creation of
758 branching code seems to be only accessed for signed division. The
759 divisions used by # of iterations analysis are always unsigned.
760 Problems might arise on architectures that emits branching code
761 for some operations that may appear in the unroller (especially
762 for division), but we have no such architectures.
763
764 Considering all this, it was decided that we should for now assume
765 that INSNS can in theory contain control flow insns, but in practice
766 it never does. So we don't handle the theoretical case, and should
767 a real failure ever show up, we have a pretty good clue for how to
768 fix it. */
769
770 return bb;
771 }
772
773 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
774 true, with probability PROB. If CINSN is not NULL, it is the insn to copy
775 in order to create a jump. */
776
777 static rtx_insn *
778 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob,
779 rtx_insn *cinsn)
780 {
781 rtx_insn *seq, *jump;
782 rtx cond;
783 machine_mode mode;
784
785 mode = GET_MODE (op0);
786 if (mode == VOIDmode)
787 mode = GET_MODE (op1);
788
789 start_sequence ();
790 if (GET_MODE_CLASS (mode) == MODE_CC)
791 {
792 /* A hack -- there seems to be no easy generic way how to make a
793 conditional jump from a ccmode comparison. */
794 gcc_assert (cinsn);
795 cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
796 gcc_assert (GET_CODE (cond) == comp);
797 gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
798 gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
799 emit_jump_insn (copy_insn (PATTERN (cinsn)));
800 jump = get_last_insn ();
801 gcc_assert (JUMP_P (jump));
802 JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
803 LABEL_NUSES (JUMP_LABEL (jump))++;
804 redirect_jump (jump, label, 0);
805 }
806 else
807 {
808 gcc_assert (!cinsn);
809
810 op0 = force_operand (op0, NULL_RTX);
811 op1 = force_operand (op1, NULL_RTX);
812 do_compare_rtx_and_jump (op0, op1, comp, 0,
813 mode, NULL_RTX, NULL_RTX, label, -1);
814 jump = get_last_insn ();
815 gcc_assert (JUMP_P (jump));
816 JUMP_LABEL (jump) = label;
817 LABEL_NUSES (label)++;
818 }
819 add_int_reg_note (jump, REG_BR_PROB, prob);
820
821 seq = get_insns ();
822 end_sequence ();
823
824 return seq;
825 }
826
827 /* Unroll LOOP for which we are able to count number of iterations in runtime
828 LOOP->LPT_DECISION.TIMES times. The transformation does this (with some
829 extra care for case n < 0):
830
831 for (i = 0; i < n; i++)
832 body;
833
834 ==> (LOOP->LPT_DECISION.TIMES == 3)
835
836 i = 0;
837 mod = n % 4;
838
839 switch (mod)
840 {
841 case 3:
842 body; i++;
843 case 2:
844 body; i++;
845 case 1:
846 body; i++;
847 case 0: ;
848 }
849
850 while (i < n)
851 {
852 body; i++;
853 body; i++;
854 body; i++;
855 body; i++;
856 }
857 */
858 static void
859 unroll_loop_runtime_iterations (struct loop *loop)
860 {
861 rtx old_niter, niter, tmp;
862 rtx_insn *init_code, *branch_code;
863 unsigned i, j, p;
864 basic_block preheader, *body, swtch, ezc_swtch;
865 sbitmap wont_exit;
866 int may_exit_copy;
867 unsigned n_peel;
868 edge e;
869 bool extra_zero_check, last_may_exit;
870 unsigned max_unroll = loop->lpt_decision.times;
871 struct niter_desc *desc = get_simple_loop_desc (loop);
872 bool exit_at_end = loop_exit_at_end_p (loop);
873 struct opt_info *opt_info = NULL;
874 bool ok;
875
876 if (flag_split_ivs_in_unroller
877 || flag_variable_expansion_in_unroller)
878 opt_info = analyze_insns_in_loop (loop);
879
880 /* Remember blocks whose dominators will have to be updated. */
881 auto_vec<basic_block> dom_bbs;
882
883 body = get_loop_body (loop);
884 for (i = 0; i < loop->num_nodes; i++)
885 {
886 vec<basic_block> ldom;
887 basic_block bb;
888
889 ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
890 FOR_EACH_VEC_ELT (ldom, j, bb)
891 if (!flow_bb_inside_loop_p (loop, bb))
892 dom_bbs.safe_push (bb);
893
894 ldom.release ();
895 }
896 free (body);
897
898 if (!exit_at_end)
899 {
900 /* Leave exit in first copy (for explanation why see comment in
901 unroll_loop_constant_iterations). */
902 may_exit_copy = 0;
903 n_peel = max_unroll - 1;
904 extra_zero_check = true;
905 last_may_exit = false;
906 }
907 else
908 {
909 /* Leave exit in last copy (for explanation why see comment in
910 unroll_loop_constant_iterations). */
911 may_exit_copy = max_unroll;
912 n_peel = max_unroll;
913 extra_zero_check = false;
914 last_may_exit = true;
915 }
916
917 /* Get expression for number of iterations. */
918 start_sequence ();
919 old_niter = niter = gen_reg_rtx (desc->mode);
920 tmp = force_operand (copy_rtx (desc->niter_expr), niter);
921 if (tmp != niter)
922 emit_move_insn (niter, tmp);
923
924 /* Count modulo by ANDing it with max_unroll; we use the fact that
925 the number of unrollings is a power of two, and thus this is correct
926 even if there is overflow in the computation. */
927 niter = expand_simple_binop (desc->mode, AND,
928 niter, gen_int_mode (max_unroll, desc->mode),
929 NULL_RTX, 0, OPTAB_LIB_WIDEN);
930
931 init_code = get_insns ();
932 end_sequence ();
933 unshare_all_rtl_in_chain (init_code);
934
935 /* Precondition the loop. */
936 split_edge_and_insert (loop_preheader_edge (loop), init_code);
937
938 auto_vec<edge> remove_edges;
939
940 wont_exit = sbitmap_alloc (max_unroll + 2);
941
942 /* Peel the first copy of loop body (almost always we must leave exit test
943 here; the only exception is when we have extra zero check and the number
944 of iterations is reliable. Also record the place of (possible) extra
945 zero check. */
946 bitmap_clear (wont_exit);
947 if (extra_zero_check
948 && !desc->noloop_assumptions)
949 bitmap_set_bit (wont_exit, 1);
950 ezc_swtch = loop_preheader_edge (loop)->src;
951 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
952 1, wont_exit, desc->out_edge,
953 &remove_edges,
954 DLTHE_FLAG_UPDATE_FREQ);
955 gcc_assert (ok);
956
957 /* Record the place where switch will be built for preconditioning. */
958 swtch = split_edge (loop_preheader_edge (loop));
959
960 for (i = 0; i < n_peel; i++)
961 {
962 /* Peel the copy. */
963 bitmap_clear (wont_exit);
964 if (i != n_peel - 1 || !last_may_exit)
965 bitmap_set_bit (wont_exit, 1);
966 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
967 1, wont_exit, desc->out_edge,
968 &remove_edges,
969 DLTHE_FLAG_UPDATE_FREQ);
970 gcc_assert (ok);
971
972 /* Create item for switch. */
973 j = n_peel - i - (extra_zero_check ? 0 : 1);
974 p = REG_BR_PROB_BASE / (i + 2);
975
976 preheader = split_edge (loop_preheader_edge (loop));
977 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
978 block_label (preheader), p,
979 NULL);
980
981 /* We rely on the fact that the compare and jump cannot be optimized out,
982 and hence the cfg we create is correct. */
983 gcc_assert (branch_code != NULL_RTX);
984
985 swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
986 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
987 single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
988 e = make_edge (swtch, preheader,
989 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
990 e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
991 e->probability = p;
992 }
993
994 if (extra_zero_check)
995 {
996 /* Add branch for zero iterations. */
997 p = REG_BR_PROB_BASE / (max_unroll + 1);
998 swtch = ezc_swtch;
999 preheader = split_edge (loop_preheader_edge (loop));
1000 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1001 block_label (preheader), p,
1002 NULL);
1003 gcc_assert (branch_code != NULL_RTX);
1004
1005 swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1006 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1007 single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1008 e = make_edge (swtch, preheader,
1009 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1010 e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
1011 e->probability = p;
1012 }
1013
1014 /* Recount dominators for outer blocks. */
1015 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1016
1017 /* And unroll loop. */
1018
1019 bitmap_ones (wont_exit);
1020 bitmap_clear_bit (wont_exit, may_exit_copy);
1021 opt_info_start_duplication (opt_info);
1022
1023 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1024 max_unroll,
1025 wont_exit, desc->out_edge,
1026 &remove_edges,
1027 DLTHE_FLAG_UPDATE_FREQ
1028 | (opt_info
1029 ? DLTHE_RECORD_COPY_NUMBER
1030 : 0));
1031 gcc_assert (ok);
1032
1033 if (opt_info)
1034 {
1035 apply_opt_in_copies (opt_info, max_unroll, true, true);
1036 free_opt_info (opt_info);
1037 }
1038
1039 free (wont_exit);
1040
1041 if (exit_at_end)
1042 {
1043 basic_block exit_block = get_bb_copy (desc->in_edge->src);
1044 /* Find a new in and out edge; they are in the last copy we have
1045 made. */
1046
1047 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1048 {
1049 desc->out_edge = EDGE_SUCC (exit_block, 0);
1050 desc->in_edge = EDGE_SUCC (exit_block, 1);
1051 }
1052 else
1053 {
1054 desc->out_edge = EDGE_SUCC (exit_block, 1);
1055 desc->in_edge = EDGE_SUCC (exit_block, 0);
1056 }
1057 }
1058
1059 /* Remove the edges. */
1060 FOR_EACH_VEC_ELT (remove_edges, i, e)
1061 remove_path (e);
1062
1063 /* We must be careful when updating the number of iterations due to
1064 preconditioning and the fact that the value must be valid at entry
1065 of the loop. After passing through the above code, we see that
1066 the correct new number of iterations is this: */
1067 gcc_assert (!desc->const_iter);
1068 desc->niter_expr =
1069 simplify_gen_binary (UDIV, desc->mode, old_niter,
1070 gen_int_mode (max_unroll + 1, desc->mode));
1071 loop->nb_iterations_upper_bound
1072 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1073 if (loop->any_estimate)
1074 loop->nb_iterations_estimate
1075 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1076 if (exit_at_end)
1077 {
1078 desc->niter_expr =
1079 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1080 desc->noloop_assumptions = NULL_RTX;
1081 --loop->nb_iterations_upper_bound;
1082 if (loop->any_estimate
1083 && loop->nb_iterations_estimate != 0)
1084 --loop->nb_iterations_estimate;
1085 else
1086 loop->any_estimate = false;
1087 }
1088
1089 if (dump_file)
1090 fprintf (dump_file,
1091 ";; Unrolled loop %d times, counting # of iterations "
1092 "in runtime, %i insns\n",
1093 max_unroll, num_loop_insns (loop));
1094 }
1095
1096 /* Decide whether to unroll LOOP stupidly and how much. */
1097 static void
1098 decide_unroll_stupid (struct loop *loop, int flags)
1099 {
1100 unsigned nunroll, nunroll_by_av, i;
1101 struct niter_desc *desc;
1102 widest_int iterations;
1103
1104 if (!(flags & UAP_UNROLL_ALL))
1105 {
1106 /* We were not asked to, just return back silently. */
1107 return;
1108 }
1109
1110 if (dump_file)
1111 fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1112
1113 /* nunroll = total number of copies of the original loop body in
1114 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
1115 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1116 nunroll_by_av
1117 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1118 if (nunroll > nunroll_by_av)
1119 nunroll = nunroll_by_av;
1120 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1121 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1122
1123 if (targetm.loop_unroll_adjust)
1124 nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1125
1126 /* Skip big loops. */
1127 if (nunroll <= 1)
1128 {
1129 if (dump_file)
1130 fprintf (dump_file, ";; Not considering loop, is too big\n");
1131 return;
1132 }
1133
1134 /* Check for simple loops. */
1135 desc = get_simple_loop_desc (loop);
1136
1137 /* Check simpleness. */
1138 if (desc->simple_p && !desc->assumptions)
1139 {
1140 if (dump_file)
1141 fprintf (dump_file, ";; The loop is simple\n");
1142 return;
1143 }
1144
1145 /* Do not unroll loops with branches inside -- it increases number
1146 of mispredicts.
1147 TODO: this heuristic needs tunning; call inside the loop body
1148 is also relatively good reason to not unroll. */
1149 if (num_loop_branches (loop) > 1)
1150 {
1151 if (dump_file)
1152 fprintf (dump_file, ";; Not unrolling, contains branches\n");
1153 return;
1154 }
1155
1156 /* Check whether the loop rolls. */
1157 if ((get_estimated_loop_iterations (loop, &iterations)
1158 || get_max_loop_iterations (loop, &iterations))
1159 && wi::ltu_p (iterations, 2 * nunroll))
1160 {
1161 if (dump_file)
1162 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1163 return;
1164 }
1165
1166 /* Success. Now force nunroll to be power of 2, as it seems that this
1167 improves results (partially because of better alignments, partially
1168 because of some dark magic). */
1169 for (i = 1; 2 * i <= nunroll; i *= 2)
1170 continue;
1171
1172 loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1173 loop->lpt_decision.times = i - 1;
1174 }
1175
1176 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this:
1177
1178 while (cond)
1179 body;
1180
1181 ==> (LOOP->LPT_DECISION.TIMES == 3)
1182
1183 while (cond)
1184 {
1185 body;
1186 if (!cond) break;
1187 body;
1188 if (!cond) break;
1189 body;
1190 if (!cond) break;
1191 body;
1192 }
1193 */
1194 static void
1195 unroll_loop_stupid (struct loop *loop)
1196 {
1197 sbitmap wont_exit;
1198 unsigned nunroll = loop->lpt_decision.times;
1199 struct niter_desc *desc = get_simple_loop_desc (loop);
1200 struct opt_info *opt_info = NULL;
1201 bool ok;
1202
1203 if (flag_split_ivs_in_unroller
1204 || flag_variable_expansion_in_unroller)
1205 opt_info = analyze_insns_in_loop (loop);
1206
1207
1208 wont_exit = sbitmap_alloc (nunroll + 1);
1209 bitmap_clear (wont_exit);
1210 opt_info_start_duplication (opt_info);
1211
1212 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1213 nunroll, wont_exit,
1214 NULL, NULL,
1215 DLTHE_FLAG_UPDATE_FREQ
1216 | (opt_info
1217 ? DLTHE_RECORD_COPY_NUMBER
1218 : 0));
1219 gcc_assert (ok);
1220
1221 if (opt_info)
1222 {
1223 apply_opt_in_copies (opt_info, nunroll, true, true);
1224 free_opt_info (opt_info);
1225 }
1226
1227 free (wont_exit);
1228
1229 if (desc->simple_p)
1230 {
1231 /* We indeed may get here provided that there are nontrivial assumptions
1232 for a loop to be really simple. We could update the counts, but the
1233 problem is that we are unable to decide which exit will be taken
1234 (not really true in case the number of iterations is constant,
1235 but no one will do anything with this information, so we do not
1236 worry about it). */
1237 desc->simple_p = false;
1238 }
1239
1240 if (dump_file)
1241 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1242 nunroll, num_loop_insns (loop));
1243 }
1244
1245 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1246 Set *DEBUG_USES to the number of debug insns that reference the
1247 variable. */
1248
1249 static bool
1250 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1251 int *debug_uses)
1252 {
1253 basic_block *body, bb;
1254 unsigned i;
1255 int count_ref = 0;
1256 rtx_insn *insn;
1257
1258 body = get_loop_body (loop);
1259 for (i = 0; i < loop->num_nodes; i++)
1260 {
1261 bb = body[i];
1262
1263 FOR_BB_INSNS (bb, insn)
1264 if (!rtx_referenced_p (reg, insn))
1265 continue;
1266 else if (DEBUG_INSN_P (insn))
1267 ++*debug_uses;
1268 else if (++count_ref > 1)
1269 break;
1270 }
1271 free (body);
1272 return (count_ref == 1);
1273 }
1274
1275 /* Reset the DEBUG_USES debug insns in LOOP that reference REG. */
1276
1277 static void
1278 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1279 {
1280 basic_block *body, bb;
1281 unsigned i;
1282 rtx_insn *insn;
1283
1284 body = get_loop_body (loop);
1285 for (i = 0; debug_uses && i < loop->num_nodes; i++)
1286 {
1287 bb = body[i];
1288
1289 FOR_BB_INSNS (bb, insn)
1290 if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1291 continue;
1292 else
1293 {
1294 validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1295 gen_rtx_UNKNOWN_VAR_LOC (), 0);
1296 if (!--debug_uses)
1297 break;
1298 }
1299 }
1300 free (body);
1301 }
1302
1303 /* Determine whether INSN contains an accumulator
1304 which can be expanded into separate copies,
1305 one for each copy of the LOOP body.
1306
1307 for (i = 0 ; i < n; i++)
1308 sum += a[i];
1309
1310 ==>
1311
1312 sum += a[i]
1313 ....
1314 i = i+1;
1315 sum1 += a[i]
1316 ....
1317 i = i+1
1318 sum2 += a[i];
1319 ....
1320
1321 Return NULL if INSN contains no opportunity for expansion of accumulator.
1322 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1323 information and return a pointer to it.
1324 */
1325
1326 static struct var_to_expand *
1327 analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
1328 {
1329 rtx set, dest, src;
1330 struct var_to_expand *ves;
1331 unsigned accum_pos;
1332 enum rtx_code code;
1333 int debug_uses = 0;
1334
1335 set = single_set (insn);
1336 if (!set)
1337 return NULL;
1338
1339 dest = SET_DEST (set);
1340 src = SET_SRC (set);
1341 code = GET_CODE (src);
1342
1343 if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1344 return NULL;
1345
1346 if (FLOAT_MODE_P (GET_MODE (dest)))
1347 {
1348 if (!flag_associative_math)
1349 return NULL;
1350 /* In the case of FMA, we're also changing the rounding. */
1351 if (code == FMA && !flag_unsafe_math_optimizations)
1352 return NULL;
1353 }
1354
1355 /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn
1356 in MD. But if there is no optab to generate the insn, we can not
1357 perform the variable expansion. This can happen if an MD provides
1358 an insn but not a named pattern to generate it, for example to avoid
1359 producing code that needs additional mode switches like for x87/mmx.
1360
1361 So we check have_insn_for which looks for an optab for the operation
1362 in SRC. If it doesn't exist, we can't perform the expansion even
1363 though INSN is valid. */
1364 if (!have_insn_for (code, GET_MODE (src)))
1365 return NULL;
1366
1367 if (!REG_P (dest)
1368 && !(GET_CODE (dest) == SUBREG
1369 && REG_P (SUBREG_REG (dest))))
1370 return NULL;
1371
1372 /* Find the accumulator use within the operation. */
1373 if (code == FMA)
1374 {
1375 /* We only support accumulation via FMA in the ADD position. */
1376 if (!rtx_equal_p (dest, XEXP (src, 2)))
1377 return NULL;
1378 accum_pos = 2;
1379 }
1380 else if (rtx_equal_p (dest, XEXP (src, 0)))
1381 accum_pos = 0;
1382 else if (rtx_equal_p (dest, XEXP (src, 1)))
1383 {
1384 /* The method of expansion that we are using; which includes the
1385 initialization of the expansions with zero and the summation of
1386 the expansions at the end of the computation will yield wrong
1387 results for (x = something - x) thus avoid using it in that case. */
1388 if (code == MINUS)
1389 return NULL;
1390 accum_pos = 1;
1391 }
1392 else
1393 return NULL;
1394
1395 /* It must not otherwise be used. */
1396 if (code == FMA)
1397 {
1398 if (rtx_referenced_p (dest, XEXP (src, 0))
1399 || rtx_referenced_p (dest, XEXP (src, 1)))
1400 return NULL;
1401 }
1402 else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1403 return NULL;
1404
1405 /* It must be used in exactly one insn. */
1406 if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1407 return NULL;
1408
1409 if (dump_file)
1410 {
1411 fprintf (dump_file, "\n;; Expanding Accumulator ");
1412 print_rtl (dump_file, dest);
1413 fprintf (dump_file, "\n");
1414 }
1415
1416 if (debug_uses)
1417 /* Instead of resetting the debug insns, we could replace each
1418 debug use in the loop with the sum or product of all expanded
1419 accummulators. Since we'll only know of all expansions at the
1420 end, we'd have to keep track of which vars_to_expand a debug
1421 insn in the loop references, take note of each copy of the
1422 debug insn during unrolling, and when it's all done, compute
1423 the sum or product of each variable and adjust the original
1424 debug insn and each copy thereof. What a pain! */
1425 reset_debug_uses_in_loop (loop, dest, debug_uses);
1426
1427 /* Record the accumulator to expand. */
1428 ves = XNEW (struct var_to_expand);
1429 ves->insn = insn;
1430 ves->reg = copy_rtx (dest);
1431 ves->var_expansions.create (1);
1432 ves->next = NULL;
1433 ves->op = GET_CODE (src);
1434 ves->expansion_count = 0;
1435 ves->reuse_expansion = 0;
1436 return ves;
1437 }
1438
1439 /* Determine whether there is an induction variable in INSN that
1440 we would like to split during unrolling.
1441
1442 I.e. replace
1443
1444 i = i + 1;
1445 ...
1446 i = i + 1;
1447 ...
1448 i = i + 1;
1449 ...
1450
1451 type chains by
1452
1453 i0 = i + 1
1454 ...
1455 i = i0 + 1
1456 ...
1457 i = i0 + 2
1458 ...
1459
1460 Return NULL if INSN contains no interesting IVs. Otherwise, allocate
1461 an IV_TO_SPLIT structure, fill it with the relevant information and return a
1462 pointer to it. */
1463
1464 static struct iv_to_split *
1465 analyze_iv_to_split_insn (rtx_insn *insn)
1466 {
1467 rtx set, dest;
1468 struct rtx_iv iv;
1469 struct iv_to_split *ivts;
1470 bool ok;
1471
1472 /* For now we just split the basic induction variables. Later this may be
1473 extended for example by selecting also addresses of memory references. */
1474 set = single_set (insn);
1475 if (!set)
1476 return NULL;
1477
1478 dest = SET_DEST (set);
1479 if (!REG_P (dest))
1480 return NULL;
1481
1482 if (!biv_p (insn, dest))
1483 return NULL;
1484
1485 ok = iv_analyze_result (insn, dest, &iv);
1486
1487 /* This used to be an assert under the assumption that if biv_p returns
1488 true that iv_analyze_result must also return true. However, that
1489 assumption is not strictly correct as evidenced by pr25569.
1490
1491 Returning NULL when iv_analyze_result returns false is safe and
1492 avoids the problems in pr25569 until the iv_analyze_* routines
1493 can be fixed, which is apparently hard and time consuming
1494 according to their author. */
1495 if (! ok)
1496 return NULL;
1497
1498 if (iv.step == const0_rtx
1499 || iv.mode != iv.extend_mode)
1500 return NULL;
1501
1502 /* Record the insn to split. */
1503 ivts = XNEW (struct iv_to_split);
1504 ivts->insn = insn;
1505 ivts->orig_var = dest;
1506 ivts->base_var = NULL_RTX;
1507 ivts->step = iv.step;
1508 ivts->next = NULL;
1509
1510 return ivts;
1511 }
1512
1513 /* Determines which of insns in LOOP can be optimized.
1514 Return a OPT_INFO struct with the relevant hash tables filled
1515 with all insns to be optimized. The FIRST_NEW_BLOCK field
1516 is undefined for the return value. */
1517
1518 static struct opt_info *
1519 analyze_insns_in_loop (struct loop *loop)
1520 {
1521 basic_block *body, bb;
1522 unsigned i;
1523 struct opt_info *opt_info = XCNEW (struct opt_info);
1524 rtx_insn *insn;
1525 struct iv_to_split *ivts = NULL;
1526 struct var_to_expand *ves = NULL;
1527 iv_to_split **slot1;
1528 var_to_expand **slot2;
1529 vec<edge> edges = get_loop_exit_edges (loop);
1530 edge exit;
1531 bool can_apply = false;
1532
1533 iv_analysis_loop_init (loop);
1534
1535 body = get_loop_body (loop);
1536
1537 if (flag_split_ivs_in_unroller)
1538 {
1539 opt_info->insns_to_split
1540 = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1541 opt_info->iv_to_split_head = NULL;
1542 opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1543 }
1544
1545 /* Record the loop exit bb and loop preheader before the unrolling. */
1546 opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1547
1548 if (edges.length () == 1)
1549 {
1550 exit = edges[0];
1551 if (!(exit->flags & EDGE_COMPLEX))
1552 {
1553 opt_info->loop_exit = split_edge (exit);
1554 can_apply = true;
1555 }
1556 }
1557
1558 if (flag_variable_expansion_in_unroller
1559 && can_apply)
1560 {
1561 opt_info->insns_with_var_to_expand
1562 = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1563 opt_info->var_to_expand_head = NULL;
1564 opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1565 }
1566
1567 for (i = 0; i < loop->num_nodes; i++)
1568 {
1569 bb = body[i];
1570 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1571 continue;
1572
1573 FOR_BB_INSNS (bb, insn)
1574 {
1575 if (!INSN_P (insn))
1576 continue;
1577
1578 if (opt_info->insns_to_split)
1579 ivts = analyze_iv_to_split_insn (insn);
1580
1581 if (ivts)
1582 {
1583 slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1584 gcc_assert (*slot1 == NULL);
1585 *slot1 = ivts;
1586 *opt_info->iv_to_split_tail = ivts;
1587 opt_info->iv_to_split_tail = &ivts->next;
1588 continue;
1589 }
1590
1591 if (opt_info->insns_with_var_to_expand)
1592 ves = analyze_insn_to_expand_var (loop, insn);
1593
1594 if (ves)
1595 {
1596 slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1597 gcc_assert (*slot2 == NULL);
1598 *slot2 = ves;
1599 *opt_info->var_to_expand_tail = ves;
1600 opt_info->var_to_expand_tail = &ves->next;
1601 }
1602 }
1603 }
1604
1605 edges.release ();
1606 free (body);
1607 return opt_info;
1608 }
1609
1610 /* Called just before loop duplication. Records start of duplicated area
1611 to OPT_INFO. */
1612
1613 static void
1614 opt_info_start_duplication (struct opt_info *opt_info)
1615 {
1616 if (opt_info)
1617 opt_info->first_new_block = last_basic_block_for_fn (cfun);
1618 }
1619
1620 /* Determine the number of iterations between initialization of the base
1621 variable and the current copy (N_COPY). N_COPIES is the total number
1622 of newly created copies. UNROLLING is true if we are unrolling
1623 (not peeling) the loop. */
1624
1625 static unsigned
1626 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1627 {
1628 if (unrolling)
1629 {
1630 /* If we are unrolling, initialization is done in the original loop
1631 body (number 0). */
1632 return n_copy;
1633 }
1634 else
1635 {
1636 /* If we are peeling, the copy in that the initialization occurs has
1637 number 1. The original loop (number 0) is the last. */
1638 if (n_copy)
1639 return n_copy - 1;
1640 else
1641 return n_copies;
1642 }
1643 }
1644
1645 /* Allocate basic variable for the induction variable chain. */
1646
1647 static void
1648 allocate_basic_variable (struct iv_to_split *ivts)
1649 {
1650 rtx expr = SET_SRC (single_set (ivts->insn));
1651
1652 ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1653 }
1654
1655 /* Insert initialization of basic variable of IVTS before INSN, taking
1656 the initial value from INSN. */
1657
1658 static void
1659 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1660 {
1661 rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1662 rtx_insn *seq;
1663
1664 start_sequence ();
1665 expr = force_operand (expr, ivts->base_var);
1666 if (expr != ivts->base_var)
1667 emit_move_insn (ivts->base_var, expr);
1668 seq = get_insns ();
1669 end_sequence ();
1670
1671 emit_insn_before (seq, insn);
1672 }
1673
1674 /* Replace the use of induction variable described in IVTS in INSN
1675 by base variable + DELTA * step. */
1676
1677 static void
1678 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1679 {
1680 rtx expr, *loc, incr, var;
1681 rtx_insn *seq;
1682 machine_mode mode = GET_MODE (ivts->base_var);
1683 rtx src, dest, set;
1684
1685 /* Construct base + DELTA * step. */
1686 if (!delta)
1687 expr = ivts->base_var;
1688 else
1689 {
1690 incr = simplify_gen_binary (MULT, mode,
1691 ivts->step, gen_int_mode (delta, mode));
1692 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1693 ivts->base_var, incr);
1694 }
1695
1696 /* Figure out where to do the replacement. */
1697 loc = &SET_SRC (single_set (insn));
1698
1699 /* If we can make the replacement right away, we're done. */
1700 if (validate_change (insn, loc, expr, 0))
1701 return;
1702
1703 /* Otherwise, force EXPR into a register and try again. */
1704 start_sequence ();
1705 var = gen_reg_rtx (mode);
1706 expr = force_operand (expr, var);
1707 if (expr != var)
1708 emit_move_insn (var, expr);
1709 seq = get_insns ();
1710 end_sequence ();
1711 emit_insn_before (seq, insn);
1712
1713 if (validate_change (insn, loc, var, 0))
1714 return;
1715
1716 /* The last chance. Try recreating the assignment in insn
1717 completely from scratch. */
1718 set = single_set (insn);
1719 gcc_assert (set);
1720
1721 start_sequence ();
1722 *loc = var;
1723 src = copy_rtx (SET_SRC (set));
1724 dest = copy_rtx (SET_DEST (set));
1725 src = force_operand (src, dest);
1726 if (src != dest)
1727 emit_move_insn (dest, src);
1728 seq = get_insns ();
1729 end_sequence ();
1730
1731 emit_insn_before (seq, insn);
1732 delete_insn (insn);
1733 }
1734
1735
1736 /* Return one expansion of the accumulator recorded in struct VE. */
1737
1738 static rtx
1739 get_expansion (struct var_to_expand *ve)
1740 {
1741 rtx reg;
1742
1743 if (ve->reuse_expansion == 0)
1744 reg = ve->reg;
1745 else
1746 reg = ve->var_expansions[ve->reuse_expansion - 1];
1747
1748 if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
1749 ve->reuse_expansion = 0;
1750 else
1751 ve->reuse_expansion++;
1752
1753 return reg;
1754 }
1755
1756
1757 /* Given INSN replace the uses of the accumulator recorded in VE
1758 with a new register. */
1759
1760 static void
1761 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
1762 {
1763 rtx new_reg, set;
1764 bool really_new_expansion = false;
1765
1766 set = single_set (insn);
1767 gcc_assert (set);
1768
1769 /* Generate a new register only if the expansion limit has not been
1770 reached. Else reuse an already existing expansion. */
1771 if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1772 {
1773 really_new_expansion = true;
1774 new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1775 }
1776 else
1777 new_reg = get_expansion (ve);
1778
1779 validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
1780 if (apply_change_group ())
1781 if (really_new_expansion)
1782 {
1783 ve->var_expansions.safe_push (new_reg);
1784 ve->expansion_count++;
1785 }
1786 }
1787
1788 /* Initialize the variable expansions in loop preheader. PLACE is the
1789 loop-preheader basic block where the initialization of the
1790 expansions should take place. The expansions are initialized with
1791 (-0) when the operation is plus or minus to honor sign zero. This
1792 way we can prevent cases where the sign of the final result is
1793 effected by the sign of the expansion. Here is an example to
1794 demonstrate this:
1795
1796 for (i = 0 ; i < n; i++)
1797 sum += something;
1798
1799 ==>
1800
1801 sum += something
1802 ....
1803 i = i+1;
1804 sum1 += something
1805 ....
1806 i = i+1
1807 sum2 += something;
1808 ....
1809
1810 When SUM is initialized with -zero and SOMETHING is also -zero; the
1811 final result of sum should be -zero thus the expansions sum1 and sum2
1812 should be initialized with -zero as well (otherwise we will get +zero
1813 as the final result). */
1814
1815 static void
1816 insert_var_expansion_initialization (struct var_to_expand *ve,
1817 basic_block place)
1818 {
1819 rtx_insn *seq;
1820 rtx var, zero_init;
1821 unsigned i;
1822 machine_mode mode = GET_MODE (ve->reg);
1823 bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1824
1825 if (ve->var_expansions.length () == 0)
1826 return;
1827
1828 start_sequence ();
1829 switch (ve->op)
1830 {
1831 case FMA:
1832 /* Note that we only accumulate FMA via the ADD operand. */
1833 case PLUS:
1834 case MINUS:
1835 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1836 {
1837 if (honor_signed_zero_p)
1838 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1839 else
1840 zero_init = CONST0_RTX (mode);
1841 emit_move_insn (var, zero_init);
1842 }
1843 break;
1844
1845 case MULT:
1846 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1847 {
1848 zero_init = CONST1_RTX (GET_MODE (var));
1849 emit_move_insn (var, zero_init);
1850 }
1851 break;
1852
1853 default:
1854 gcc_unreachable ();
1855 }
1856
1857 seq = get_insns ();
1858 end_sequence ();
1859
1860 emit_insn_after (seq, BB_END (place));
1861 }
1862
1863 /* Combine the variable expansions at the loop exit. PLACE is the
1864 loop exit basic block where the summation of the expansions should
1865 take place. */
1866
1867 static void
1868 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
1869 {
1870 rtx sum = ve->reg;
1871 rtx expr, var;
1872 rtx_insn *seq, *insn;
1873 unsigned i;
1874
1875 if (ve->var_expansions.length () == 0)
1876 return;
1877
1878 start_sequence ();
1879 switch (ve->op)
1880 {
1881 case FMA:
1882 /* Note that we only accumulate FMA via the ADD operand. */
1883 case PLUS:
1884 case MINUS:
1885 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1886 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1887 break;
1888
1889 case MULT:
1890 FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1891 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1892 break;
1893
1894 default:
1895 gcc_unreachable ();
1896 }
1897
1898 expr = force_operand (sum, ve->reg);
1899 if (expr != ve->reg)
1900 emit_move_insn (ve->reg, expr);
1901 seq = get_insns ();
1902 end_sequence ();
1903
1904 insn = BB_HEAD (place);
1905 while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1906 insn = NEXT_INSN (insn);
1907
1908 emit_insn_after (seq, insn);
1909 }
1910
1911 /* Strip away REG_EQUAL notes for IVs we're splitting.
1912
1913 Updating REG_EQUAL notes for IVs we split is tricky: We
1914 cannot tell until after unrolling, DF-rescanning, and liveness
1915 updating, whether an EQ_USE is reached by the split IV while
1916 the IV reg is still live. See PR55006.
1917
1918 ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1919 because RTL loop-iv requires us to defer rescanning insns and
1920 any notes attached to them. So resort to old techniques... */
1921
1922 static void
1923 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1924 {
1925 struct iv_to_split *ivts;
1926 rtx note = find_reg_equal_equiv_note (insn);
1927 if (! note)
1928 return;
1929 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1930 if (reg_mentioned_p (ivts->orig_var, note))
1931 {
1932 remove_note (insn, note);
1933 return;
1934 }
1935 }
1936
1937 /* Apply loop optimizations in loop copies using the
1938 data which gathered during the unrolling. Structure
1939 OPT_INFO record that data.
1940
1941 UNROLLING is true if we unrolled (not peeled) the loop.
1942 REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1943 the loop (as it should happen in complete unrolling, but not in ordinary
1944 peeling of the loop). */
1945
1946 static void
1947 apply_opt_in_copies (struct opt_info *opt_info,
1948 unsigned n_copies, bool unrolling,
1949 bool rewrite_original_loop)
1950 {
1951 unsigned i, delta;
1952 basic_block bb, orig_bb;
1953 rtx_insn *insn, *orig_insn, *next;
1954 struct iv_to_split ivts_templ, *ivts;
1955 struct var_to_expand ve_templ, *ves;
1956
1957 /* Sanity check -- we need to put initialization in the original loop
1958 body. */
1959 gcc_assert (!unrolling || rewrite_original_loop);
1960
1961 /* Allocate the basic variables (i0). */
1962 if (opt_info->insns_to_split)
1963 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1964 allocate_basic_variable (ivts);
1965
1966 for (i = opt_info->first_new_block;
1967 i < (unsigned) last_basic_block_for_fn (cfun);
1968 i++)
1969 {
1970 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1971 orig_bb = get_bb_original (bb);
1972
1973 /* bb->aux holds position in copy sequence initialized by
1974 duplicate_loop_to_header_edge. */
1975 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
1976 unrolling);
1977 bb->aux = 0;
1978 orig_insn = BB_HEAD (orig_bb);
1979 FOR_BB_INSNS_SAFE (bb, insn, next)
1980 {
1981 if (!INSN_P (insn)
1982 || (DEBUG_INSN_P (insn)
1983 && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
1984 continue;
1985
1986 while (!INSN_P (orig_insn)
1987 || (DEBUG_INSN_P (orig_insn)
1988 && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
1989 == LABEL_DECL)))
1990 orig_insn = NEXT_INSN (orig_insn);
1991
1992 ivts_templ.insn = orig_insn;
1993 ve_templ.insn = orig_insn;
1994
1995 /* Apply splitting iv optimization. */
1996 if (opt_info->insns_to_split)
1997 {
1998 maybe_strip_eq_note_for_split_iv (opt_info, insn);
1999
2000 ivts = opt_info->insns_to_split->find (&ivts_templ);
2001
2002 if (ivts)
2003 {
2004 gcc_assert (GET_CODE (PATTERN (insn))
2005 == GET_CODE (PATTERN (orig_insn)));
2006
2007 if (!delta)
2008 insert_base_initialization (ivts, insn);
2009 split_iv (ivts, insn, delta);
2010 }
2011 }
2012 /* Apply variable expansion optimization. */
2013 if (unrolling && opt_info->insns_with_var_to_expand)
2014 {
2015 ves = (struct var_to_expand *)
2016 opt_info->insns_with_var_to_expand->find (&ve_templ);
2017 if (ves)
2018 {
2019 gcc_assert (GET_CODE (PATTERN (insn))
2020 == GET_CODE (PATTERN (orig_insn)));
2021 expand_var_during_unrolling (ves, insn);
2022 }
2023 }
2024 orig_insn = NEXT_INSN (orig_insn);
2025 }
2026 }
2027
2028 if (!rewrite_original_loop)
2029 return;
2030
2031 /* Initialize the variable expansions in the loop preheader
2032 and take care of combining them at the loop exit. */
2033 if (opt_info->insns_with_var_to_expand)
2034 {
2035 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2036 insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2037 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2038 combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2039 }
2040
2041 /* Rewrite also the original loop body. Find them as originals of the blocks
2042 in the last copied iteration, i.e. those that have
2043 get_bb_copy (get_bb_original (bb)) == bb. */
2044 for (i = opt_info->first_new_block;
2045 i < (unsigned) last_basic_block_for_fn (cfun);
2046 i++)
2047 {
2048 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2049 orig_bb = get_bb_original (bb);
2050 if (get_bb_copy (orig_bb) != bb)
2051 continue;
2052
2053 delta = determine_split_iv_delta (0, n_copies, unrolling);
2054 for (orig_insn = BB_HEAD (orig_bb);
2055 orig_insn != NEXT_INSN (BB_END (bb));
2056 orig_insn = next)
2057 {
2058 next = NEXT_INSN (orig_insn);
2059
2060 if (!INSN_P (orig_insn))
2061 continue;
2062
2063 ivts_templ.insn = orig_insn;
2064 if (opt_info->insns_to_split)
2065 {
2066 maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2067
2068 ivts = (struct iv_to_split *)
2069 opt_info->insns_to_split->find (&ivts_templ);
2070 if (ivts)
2071 {
2072 if (!delta)
2073 insert_base_initialization (ivts, orig_insn);
2074 split_iv (ivts, orig_insn, delta);
2075 continue;
2076 }
2077 }
2078
2079 }
2080 }
2081 }
2082
2083 /* Release OPT_INFO. */
2084
2085 static void
2086 free_opt_info (struct opt_info *opt_info)
2087 {
2088 delete opt_info->insns_to_split;
2089 opt_info->insns_to_split = NULL;
2090 if (opt_info->insns_with_var_to_expand)
2091 {
2092 struct var_to_expand *ves;
2093
2094 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2095 ves->var_expansions.release ();
2096 delete opt_info->insns_with_var_to_expand;
2097 opt_info->insns_with_var_to_expand = NULL;
2098 }
2099 free (opt_info);
2100 }