regmove.c (optimize_reg_copy_1): Undo Aug 18 change.
[gcc.git] / gcc / gcse.c
1 /* Global common subexpression elimination
2 and global constant/copy propagation for GNU compiler.
3 Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
4
5 This file is part of GNU CC.
6
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 /* TODO
23 - reordering of memory allocation and freeing to be more space efficient
24 - do rough calc of how many regs are needed in each block, and a rough
25 calc of how many regs are available in each class and use that to
26 throttle back the code in cases where RTX_COST is minimal.
27 - memory aliasing support
28 - ability to realloc sbitmap vectors would allow one initial computation
29 of reg_set_in_block with only subsequent additions, rather than
30 recomputing it for each pass
31
32 NOTES
33 - the classic gcse implementation is kept in for now for comparison
34 */
35
36 /* References searched while implementing this.
37 This list will eventually be deleted but I wanted to have a record of it
38 for now.
39
40 Compilers Principles, Techniques and Tools
41 Aho, Sethi, Ullman
42 Addison-Wesley, 1988
43
44 Global Optimization by Suppression of Partial Redundancies
45 E. Morel, C. Renvoise
46 communications of the acm, Vol. 22, Num. 2, Feb. 1979
47
48 A Portable Machine-Independent Global Optimizer - Design and Measurements
49 Frederick Chow
50 Stanford Ph.D. thesis, Dec. 1983
51
52 xxx
53 Elimination Algorithms for Data Flow Analysis
54 B.G. Ryder, M.C. Paull
55 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
56
57 reread
58 A Fast Algorithm for Code Movement Optimization
59 D.M. Dhamdhere
60 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
61
62 A Solution to a Problem with Morel and Renvoise's
63 Global Optimization by Suppression of Partial Redundancies
64 K-H Drechsler, M.P. Stadel
65 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
66
67 Practical Adaptation of the Global Optimization
68 Algorithm of Morel and Renvoise
69 D.M. Dhamdhere
70 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
71
72 Efficiently Computing Static Single Assignment Form and the Control
73 Dependence Graph
74 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
75 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
76
77 yyy
78 How to Analyze Large Programs Efficiently and Informatively
79 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
80 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
81
82 Lazy Code Motion
83 J. Knoop, O. Ruthing, B. Steffen
84 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
85
86 What's In a Region? Or Computing Control Dependence Regions in Near-Linear
87 Time for Reducible Flow Control
88 Thomas Ball
89 ACM Letters on Programming Languages and Systems,
90 Vol. 2, Num. 1-4, Mar-Dec 1993
91
92 An Efficient Representation for Sparse Sets
93 Preston Briggs, Linda Torczon
94 ACM Letters on Programming Languages and Systems,
95 Vol. 2, Num. 1-4, Mar-Dec 1993
96
97 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
98 K-H Drechsler, M.P. Stadel
99 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
100
101 Partial Dead Code Elimination
102 J. Knoop, O. Ruthing, B. Steffen
103 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
104
105 Effective Partial Redundancy Elimination
106 P. Briggs, K.D. Cooper
107 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
108
109 The Program Structure Tree: Computing Control Regions in Linear Time
110 R. Johnson, D. Pearson, K. Pingali
111 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
112
113 Optimal Code Motion: Theory and Practice
114 J. Knoop, O. Ruthing, B. Steffen
115 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
116
117 The power of assignment motion
118 J. Knoop, O. Ruthing, B. Steffen
119 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
120
121 Global code motion / global value numbering
122 C. Click
123 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
124
125 Value Driven Redundancy Elimination
126 L.T. Simpson
127 Rice University Ph.D. thesis, Apr. 1996
128
129 Value Numbering
130 L.T. Simpson
131 Massively Scalar Compiler Project, Rice University, Sep. 1996
132
133 High Performance Compilers for Parallel Computing
134 Michael Wolfe
135 Addison-Wesley, 1996
136
137 People wishing to speed up the code here should read xxx, yyy.
138 People wishing to do something different can find various possibilities
139 in the above papers and elsewhere.
140 */
141
142 #include "config.h"
143 /* Must precede rtl.h for FFS. */
144 #include "system.h"
145
146 #include "rtl.h"
147 #include "regs.h"
148 #include "hard-reg-set.h"
149 #include "flags.h"
150 #include "real.h"
151 #include "insn-config.h"
152 #include "recog.h"
153 #include "basic-block.h"
154 #include "output.h"
155 #include "expr.h"
156
157 #include "obstack.h"
158 #define obstack_chunk_alloc gmalloc
159 #define obstack_chunk_free free
160
161 /* Maximum number of passes to perform. */
162 #define MAX_PASSES 1
163
164 /* Propagate flow information through back edges and thus enable PRE's
165 moving loop invariant calculations out of loops.
166
167 Originally this tended to create worse overall code, but several
168 improvements during the development of PRE seem to have made following
169 back edges generally a win.
170
171 Note much of the loop invariant code motion done here would normally
172 be done by loop.c, which has more heuristics for when to move invariants
173 out of loops. At some point we might need to move some of those
174 heuristics into gcse.c. */
175 #define FOLLOW_BACK_EDGES 1
176
177 /* We support two GCSE implementations: Classic GCSE (i.e. Dragon Book)
178 and PRE (Partial Redundancy Elimination) GCSE (based on Fred Chow's thesis).
179 The default is PRE.
180
181 In either case we perform the following steps:
182
183 1) Compute basic block information.
184
185 2) Compute table of places where registers are set.
186
187 3) Perform copy/constant propagation.
188
189 4) Perform global cse.
190
191 5) Perform another pass of copy/constant propagation [only if PRE].
192
193 Two passes of copy/constant propagation are done because the first one
194 enables more GCSE and the second one helps to clean up the copies that
195 GCSE creates. This is needed more for PRE than for Classic because Classic
196 GCSE will try to use an existing register containing the common
197 subexpression rather than create a new one. This is harder to do for PRE
198 because of the code motion (which Classic GCSE doesn't do).
199
200 Expressions we are interested in GCSE-ing are of the form
201 (set (pseudo-reg) (expression)).
202 Function want_to_gcse_p says what these are.
203
204 PRE handles moving invariant expressions out of loops (by treating them as
205 partially redundant). This feature of PRE is disabled here (by not
206 propagating dataflow information along back edges) because loop.c has more
207 involved (and thus typically better) heuristics for what to move out of
208 loops.
209
210 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single
211 assignment) based GVN (global value numbering). L. T. Simpson's paper
212 (Rice University) on value numbering is a useful reference for this.
213
214 **********************
215
216 We used to support multiple passes but there are diminishing returns in
217 doing so. The first pass usually makes 90% of the changes that are doable.
218 A second pass can make a few more changes made possible by the first pass.
219 Experiments show any further passes don't make enough changes to justify
220 the expense.
221
222 A study of spec92 using an unlimited number of passes:
223 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
224 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
225 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
226
227 It was found doing copy propagation between each pass enables further
228 substitutions.
229
230 PRE is quite expensive in complicated functions because the DFA can take
231 awhile to converge. Hence we only perform one pass. Macro MAX_PASSES can
232 be modified if one wants to experiment.
233
234 **********************
235
236 The steps for PRE are:
237
238 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
239
240 2) Perform the data flow analysis for PRE.
241
242 3) Delete the redundant instructions
243
244 4) Insert the required copies [if any] that make the partially
245 redundant instructions fully redundant.
246
247 5) For other reaching expressions, insert an instruction to copy the value
248 to a newly created pseudo that will reach the redundant instruction.
249
250 The deletion is done first so that when we do insertions we
251 know which pseudo reg to use.
252
253 Various papers have argued that PRE DFA is expensive (O(n^2)) and others
254 argue it is not. The number of iterations for the algorithm to converge
255 is typically 2-4 so I don't view it as that expensive (relatively speaking).
256
257 PRE GCSE depends heavily on the seconds CSE pass to clean up the copies
258 we create. To make an expression reach the place where it's redundant,
259 the result of the expression is copied to a new register, and the redundant
260 expression is deleted by replacing it with this new register. Classic GCSE
261 doesn't have this problem as much as it computes the reaching defs of
262 each register in each block and thus can try to use an existing register.
263
264 **********************
265
266 When -fclassic-gcse, we perform a classic global CSE pass.
267 It is based on the algorithms in the Dragon book, and is based on code
268 written by Devor Tevi at Intel.
269
270 The steps for Classic GCSE are:
271
272 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
273 Also recorded are reaching definition "gen" statements (rd_gen)
274
275 2) Compute the reaching definitions (reaching_defs).
276 This is a bitmap for each basic block indexed by INSN_CUID that is 1
277 for each statement containing a definition that reaches the block.
278
279 3) Compute the available expressions (ae_in).
280 This is a bitmap for each basic block indexed by expression number
281 that is 1 for each expression that is available at the beginning of
282 the block.
283
284 4) Perform GCSE.
285 This is done by scanning each instruction looking for sets of the form
286 (set (pseudo-reg) (expression)) and checking if `expression' is in the
287 hash table. If it is, and if the expression is available, and if only
288 one computation of the expression reaches the instruction, we substitute
289 the expression for a register containing its value. If there is no
290 such register, but the expression is expensive enough we create an
291 instruction to copy the result of the expression into and use that.
292
293 **********************
294
295 A fair bit of simplicity is created by creating small functions for simple
296 tasks, even when the function is only called in one place. This may
297 measurably slow things down [or may not] by creating more function call
298 overhead than is necessary. The source is laid out so that it's trivial
299 to make the affected functions inline so that one can measure what speed
300 up, if any, can be achieved, and maybe later when things settle things can
301 be rearranged.
302
303 Help stamp out big monolithic functions! */
304 \f
305 /* GCSE global vars. */
306
307 /* -dG dump file. */
308 static FILE *gcse_file;
309
310 /* Bitmaps are normally not included in debugging dumps.
311 However it's useful to be able to print them from GDB.
312 We could create special functions for this, but it's simpler to
313 just allow passing stderr to the dump_foo fns. Since stderr can
314 be a macro, we store a copy here. */
315 static FILE *debug_stderr;
316
317 /* An obstack for our working variables. */
318 static struct obstack gcse_obstack;
319
320 /* Non-zero for each mode that supports (set (reg) (reg)).
321 This is trivially true for integer and floating point values.
322 It may or may not be true for condition codes. */
323 static char can_copy_p[(int) NUM_MACHINE_MODES];
324
325 /* Non-zero if can_copy_p has been initialized. */
326 static int can_copy_init_p;
327
328 /* Element I is a list of I's predecessors/successors. */
329 static int_list_ptr *s_preds;
330 static int_list_ptr *s_succs;
331
332 /* Element I is the number of predecessors/successors of basic block I. */
333 static int *num_preds;
334 static int *num_succs;
335
336 /* Hash table of expressions. */
337
338 struct expr
339 {
340 /* The expression (SET_SRC for expressions, PATTERN for assignments). */
341 rtx expr;
342 /* Index in the available expression bitmaps. */
343 int bitmap_index;
344 /* Next entry with the same hash. */
345 struct expr *next_same_hash;
346 /* List of anticipatable occurrences in basic blocks in the function.
347 An "anticipatable occurrence" is one that is the first occurrence in the
348 basic block and the operands are not modified in the basic block prior
349 to the occurrence. */
350 struct occr *antic_occr;
351 /* List of available occurrence in basic blocks in the function.
352 An "available occurrence" is one that is the last occurrence in the
353 basic block and the operands are not modified by following statements in
354 the basic block [including this insn]. */
355 struct occr *avail_occr;
356 /* Non-null if the computation is PRE redundant.
357 The value is the newly created pseudo-reg to record a copy of the
358 expression in all the places that reach the redundant copy. */
359 rtx reaching_reg;
360 };
361
362 /* Occurrence of an expression.
363 There is one per basic block. If a pattern appears more than once the
364 last appearance is used [or first for anticipatable expressions]. */
365
366 struct occr
367 {
368 /* Next occurrence of this expression. */
369 struct occr *next;
370 /* The insn that computes the expression. */
371 rtx insn;
372 /* Non-zero if this [anticipatable] occurrence has been deleted. */
373 char deleted_p;
374 /* Non-zero if this [available] occurrence has been copied to
375 reaching_reg. */
376 /* ??? This is mutually exclusive with deleted_p, so they could share
377 the same byte. */
378 char copied_p;
379 };
380
381 /* Expression and copy propagation hash tables.
382 Each hash table is an array of buckets.
383 ??? It is known that if it were an array of entries, structure elements
384 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
385 not clear whether in the final analysis a sufficient amount of memory would
386 be saved as the size of the available expression bitmaps would be larger
387 [one could build a mapping table without holes afterwards though].
388 Someday I'll perform the computation and figure it out.
389 */
390
391 /* Total size of the expression hash table, in elements. */
392 static int expr_hash_table_size;
393 /* The table itself.
394 This is an array of `expr_hash_table_size' elements. */
395 static struct expr **expr_hash_table;
396
397 /* Total size of the copy propagation hash table, in elements. */
398 static int set_hash_table_size;
399 /* The table itself.
400 This is an array of `set_hash_table_size' elements. */
401 static struct expr **set_hash_table;
402
403 /* Mapping of uids to cuids.
404 Only real insns get cuids. */
405 static int *uid_cuid;
406
407 /* Highest UID in UID_CUID. */
408 static int max_uid;
409
410 /* Get the cuid of an insn. */
411 #define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)])
412
413 /* Number of cuids. */
414 static int max_cuid;
415
416 /* Mapping of cuids to insns. */
417 static rtx *cuid_insn;
418
419 /* Get insn from cuid. */
420 #define CUID_INSN(CUID) (cuid_insn[CUID])
421
422 /* Maximum register number in function prior to doing gcse + 1.
423 Registers created during this pass have regno >= max_gcse_regno.
424 This is named with "gcse" to not collide with global of same name. */
425 static int max_gcse_regno;
426
427 /* Maximum number of cse-able expressions found. */
428 static int n_exprs;
429 /* Maximum number of assignments for copy propagation found. */
430 static int n_sets;
431
432 /* Table of registers that are modified.
433 For each register, each element is a list of places where the pseudo-reg
434 is set.
435
436 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only
437 requires knowledge of which blocks kill which regs [and thus could use
438 a bitmap instead of the lists `reg_set_table' uses]. The classic GCSE
439 uses the information in lists.
440
441 If the classic GCSE pass is deleted `reg_set_table' and could be turned
442 into an array of bitmaps (num-bbs x num-regs)
443 [however perhaps it may be useful to keep the data as is].
444 One advantage of recording things this way is that `reg_set_table' is
445 fairly sparse with respect to pseudo regs but for hard regs could be
446 fairly dense [relatively speaking].
447 And recording sets of pseudo-regs in lists speeds
448 up functions like compute_transp since in the case of pseudo-regs we only
449 need to iterate over the number of times a pseudo-reg is set, not over the
450 number of basic blocks [clearly there is a bit of a slow down in the cases
451 where a pseudo is set more than once in a block, however it is believed
452 that the net effect is to speed things up]. This isn't done for hard-regs
453 because recording call-clobbered hard-regs in `reg_set_table' at each
454 function call can consume a fair bit of memory, and iterating over hard-regs
455 stored this way in compute_transp will be more expensive. */
456
457 typedef struct reg_set {
458 /* The next setting of this register. */
459 struct reg_set *next;
460 /* The insn where it was set. */
461 rtx insn;
462 } reg_set;
463 static reg_set **reg_set_table;
464 /* Size of `reg_set_table'.
465 The table starts out at max_gcse_regno + slop, and is enlarged as
466 necessary. */
467 static int reg_set_table_size;
468 /* Amount to grow `reg_set_table' by when it's full. */
469 #define REG_SET_TABLE_SLOP 100
470
471 /* Bitmap containing one bit for each register in the program.
472 Used when performing GCSE to track which registers have been set since
473 the start of the basic block. */
474 static sbitmap reg_set_bitmap;
475
476 /* For each block, a bitmap of registers set in the block.
477 This is used by expr_killed_p and compute_transp.
478 It is computed during hash table computation and not by compute_sets
479 as it includes registers added since the last pass (or between cprop and
480 gcse) and it's currently not easy to realloc sbitmap vectors. */
481 static sbitmap *reg_set_in_block;
482
483 /* For each block, non-zero if memory is set in that block.
484 This is computed during hash table computation and is used by
485 expr_killed_p and compute_transp.
486 ??? Handling of memory is very simple, we don't make any attempt
487 to optimize things (later).
488 ??? This can be computed by compute_sets since the information
489 doesn't change. */
490 static char *mem_set_in_block;
491
492 /* Various variables for statistics gathering. */
493
494 /* Memory used in a pass.
495 This isn't intended to be absolutely precise. Its intent is only
496 to keep an eye on memory usage. */
497 static int bytes_used;
498 /* GCSE substitutions made. */
499 static int gcse_subst_count;
500 /* Number of copy instructions created. */
501 static int gcse_create_count;
502 /* Number of constants propagated. */
503 static int const_prop_count;
504 /* Number of copys propagated. */
505 static int copy_prop_count;
506
507 extern char *current_function_name;
508 extern int current_function_calls_setjmp;
509 extern int current_function_calls_longjmp;
510 \f
511 /* These variables are used by classic GCSE.
512 Normally they'd be defined a bit later, but `rd_gen' needs to
513 be declared sooner. */
514
515 /* A bitmap of all ones for implementing the algorithm for available
516 expressions and reaching definitions. */
517 /* ??? Available expression bitmaps have a different size than reaching
518 definition bitmaps. This should be the larger of the two, however, it
519 is not currently used for reaching definitions. */
520 static sbitmap u_bitmap;
521
522 /* Each block has a bitmap of each type.
523 The length of each blocks bitmap is:
524
525 max_cuid - for reaching definitions
526 n_exprs - for available expressions
527
528 Thus we view the bitmaps as 2 dimensional arrays. i.e.
529 rd_kill[block_num][cuid_num]
530 ae_kill[block_num][expr_num]
531 */
532
533 /* For reaching defs */
534 static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
535
536 /* for available exprs */
537 static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
538 \f
539 static void compute_can_copy PROTO ((void));
540
541 static char *gmalloc PROTO ((unsigned int));
542 static char *grealloc PROTO ((char *, unsigned int));
543 static char *gcse_alloc PROTO ((unsigned long));
544 static void alloc_gcse_mem PROTO ((rtx));
545 static void free_gcse_mem PROTO ((void));
546 extern void dump_cuid_table PROTO ((FILE *));
547
548 static void alloc_reg_set_mem PROTO ((int));
549 static void free_reg_set_mem PROTO ((void));
550 static void record_one_set PROTO ((int, rtx));
551 static void record_set_info PROTO ((rtx, rtx));
552 static void compute_sets PROTO ((rtx));
553
554 static void hash_scan_insn PROTO ((rtx, int, int));
555 static void hash_scan_set PROTO ((rtx, rtx, int));
556 static void hash_scan_clobber PROTO ((rtx, rtx));
557 static void hash_scan_call PROTO ((rtx, rtx));
558 static void maybe_set_rd_gen PROTO ((int, rtx));
559 static int want_to_gcse_p PROTO ((rtx));
560 static int oprs_unchanged_p PROTO ((rtx, rtx, int));
561 static int oprs_anticipatable_p PROTO ((rtx, rtx));
562 static int oprs_available_p PROTO ((rtx, rtx));
563 static void insert_expr_in_table PROTO ((rtx, enum machine_mode, rtx, int, int));
564 static void insert_set_in_table PROTO ((rtx, rtx));
565 static unsigned int hash_expr PROTO ((rtx, enum machine_mode, int *, int));
566 static unsigned int hash_expr_1 PROTO ((rtx, enum machine_mode, int *));
567 static unsigned int hash_set PROTO ((int, int));
568 static int expr_equiv_p PROTO ((rtx, rtx));
569 static void record_last_reg_set_info PROTO ((rtx, int));
570 static void record_last_mem_set_info PROTO ((rtx));
571 static void record_last_set_info PROTO ((rtx, rtx));
572 static void compute_hash_table PROTO ((rtx, int));
573 static void alloc_set_hash_table PROTO ((int));
574 static void free_set_hash_table PROTO ((void));
575 static void compute_set_hash_table PROTO ((rtx));
576 static void alloc_expr_hash_table PROTO ((int));
577 static void free_expr_hash_table PROTO ((void));
578 static void compute_expr_hash_table PROTO ((rtx));
579 static void dump_hash_table PROTO ((FILE *, char *, struct expr **, int, int));
580 static struct expr *lookup_expr PROTO ((rtx));
581 static struct expr *lookup_set PROTO ((int, rtx));
582 static struct expr *next_set PROTO ((int, struct expr *));
583 static void reset_opr_set_tables PROTO ((void));
584 static int oprs_not_set_p PROTO ((rtx, rtx));
585 static void mark_call PROTO ((rtx, rtx));
586 static void mark_set PROTO ((rtx, rtx));
587 static void mark_clobber PROTO ((rtx, rtx));
588 static void mark_oprs_set PROTO ((rtx));
589
590 static void alloc_rd_mem PROTO ((int, int));
591 static void free_rd_mem PROTO ((void));
592 static void compute_kill_rd PROTO ((void));
593 static void handle_rd_kill_set PROTO ((rtx, int, int));
594 static void compute_rd PROTO ((void));
595 extern void dump_rd_table PROTO ((FILE *, char *, sbitmap *));
596
597 static void alloc_avail_expr_mem PROTO ((int, int));
598 static void free_avail_expr_mem PROTO ((void));
599 static void compute_ae_gen PROTO ((void));
600 static void compute_ae_kill PROTO ((void));
601 static int expr_killed_p PROTO ((rtx, int));
602 static void compute_available PROTO ((void));
603
604 static int expr_reaches_here_p PROTO ((struct occr *, struct expr *,
605 int, int, char *));
606 static rtx computing_insn PROTO ((struct expr *, rtx));
607 static int def_reaches_here_p PROTO ((rtx, rtx));
608 static int can_disregard_other_sets PROTO ((struct reg_set **, rtx, int));
609 static int handle_avail_expr PROTO ((rtx, struct expr *));
610 static int classic_gcse PROTO ((void));
611 static int one_classic_gcse_pass PROTO ((rtx, int));
612
613 static void alloc_cprop_mem PROTO ((int, int));
614 static void free_cprop_mem PROTO ((void));
615 extern void dump_cprop_data PROTO ((FILE *));
616 static void compute_transp PROTO ((rtx, int, sbitmap *, int));
617 static void compute_cprop_local_properties PROTO ((void));
618 static void compute_cprop_avinout PROTO ((void));
619 static void compute_cprop_data PROTO ((void));
620 static void find_used_regs PROTO ((rtx));
621 static int try_replace_reg PROTO ((rtx, rtx, rtx));
622 static struct expr *find_avail_set PROTO ((int, rtx));
623 static int cprop_insn PROTO ((rtx));
624 static int cprop PROTO ((void));
625 static int one_cprop_pass PROTO ((rtx, int));
626
627 static void alloc_pre_mem PROTO ((int, int));
628 static void free_pre_mem PROTO ((void));
629 extern void dump_pre_data PROTO ((FILE *));
630 static void compute_pre_local_properties PROTO ((void));
631 static void compute_pre_avinout PROTO ((void));
632 static void compute_pre_antinout PROTO ((void));
633 static void compute_pre_pavinout PROTO ((void));
634 static void compute_pre_ppinout PROTO ((void));
635 static void compute_pre_data PROTO ((void));
636 static int pre_expr_reaches_here_p PROTO ((struct occr *, struct expr *,
637 int, char *));
638 static void pre_insert_insn PROTO ((struct expr *, int));
639 static void pre_insert PROTO ((struct expr **));
640 static void pre_insert_copy_insn PROTO ((struct expr *, rtx));
641 static void pre_insert_copies PROTO ((void));
642 static int pre_delete PROTO ((void));
643 static int pre_gcse PROTO ((void));
644 static int one_pre_gcse_pass PROTO ((rtx, int));
645
646 static void add_label_notes PROTO ((rtx, rtx));
647 \f
648 /* Entry point for global common subexpression elimination.
649 F is the first instruction in the function. */
650
651 void
652 gcse_main (f, file)
653 rtx f;
654 FILE *file;
655 {
656 int changed, pass;
657 /* Bytes used at start of pass. */
658 int initial_bytes_used;
659 /* Maximum number of bytes used by a pass. */
660 int max_pass_bytes;
661 /* Point to release obstack data from for each pass. */
662 char *gcse_obstack_bottom;
663
664 /* It's impossible to construct a correct control flow graph in the
665 presense of setjmp, so just punt to be safe. */
666 if (current_function_calls_setjmp)
667 return;
668
669 /* For calling dump_foo fns from gdb. */
670 debug_stderr = stderr;
671
672 max_gcse_regno = max_reg_num ();
673 find_basic_blocks (f, max_gcse_regno, file);
674
675 /* Return if there's nothing to do. */
676 if (n_basic_blocks <= 1)
677 {
678 /* Free storage allocated by find_basic_blocks. */
679 free_basic_block_vars (0);
680 return;
681 }
682
683 /* See what modes support reg/reg copy operations. */
684 if (! can_copy_init_p)
685 {
686 compute_can_copy ();
687 can_copy_init_p = 1;
688 }
689
690 gcc_obstack_init (&gcse_obstack);
691
692 gcse_file = file;
693
694 /* Allocate and compute predecessors/successors. */
695
696 s_preds = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
697 s_succs = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr));
698 num_preds = (int *) alloca (n_basic_blocks * sizeof (int));
699 num_succs = (int *) alloca (n_basic_blocks * sizeof (int));
700 bytes_used = 4 * n_basic_blocks * sizeof (int_list_ptr);
701 compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
702
703 if (file)
704 dump_bb_data (file, s_preds, s_succs, 0);
705
706 /* Record where pseudo-registers are set.
707 This data is kept accurate during each pass.
708 ??? We could also record hard-reg and memory information here
709 [since it's unchanging], however it is currently done during
710 hash table computation. */
711
712 alloc_reg_set_mem (max_gcse_regno);
713 compute_sets (f);
714
715 pass = 0;
716 initial_bytes_used = bytes_used;
717 max_pass_bytes = 0;
718 gcse_obstack_bottom = gcse_alloc (1);
719 changed = 1;
720 while (changed && pass < MAX_PASSES)
721 {
722 changed = 0;
723 if (file)
724 fprintf (file, "GCSE pass %d\n\n", pass + 1);
725
726 /* Initialize bytes_used to the space for the pred/succ lists,
727 and the reg_set_table data. */
728 bytes_used = initial_bytes_used;
729
730 /* Each pass may create new registers, so recalculate each time. */
731 max_gcse_regno = max_reg_num ();
732
733 alloc_gcse_mem (f);
734
735 changed = one_cprop_pass (f, pass + 1);
736
737 if (optimize_size)
738 changed |= one_classic_gcse_pass (f, pass + 1);
739 else
740 changed |= one_pre_gcse_pass (f, pass + 1);
741
742 if (max_pass_bytes < bytes_used)
743 max_pass_bytes = bytes_used;
744
745 free_gcse_mem ();
746
747 if (file)
748 {
749 fprintf (file, "\n");
750 fflush (file);
751 }
752 obstack_free (&gcse_obstack, gcse_obstack_bottom);
753 pass++;
754 }
755
756 /* If we're doing PRE, do one last pass of copy propagation. */
757 if (! optimize_size)
758 {
759 max_gcse_regno = max_reg_num ();
760 alloc_gcse_mem (f);
761 one_cprop_pass (f, pass + 1);
762 free_gcse_mem ();
763 }
764
765 if (file)
766 {
767 fprintf (file, "GCSE of %s: %d basic blocks, ",
768 current_function_name, n_basic_blocks);
769 fprintf (file, "%d pass%s, %d bytes\n\n",
770 pass, pass > 1 ? "es" : "", max_pass_bytes);
771 }
772
773 /* Free our obstack. */
774 obstack_free (&gcse_obstack, NULL_PTR);
775 /* Free reg_set_table. */
776 free_reg_set_mem ();
777 /* Free storage used to record predecessor/successor data. */
778 free_bb_mem ();
779 /* Free storage allocated by find_basic_blocks. */
780 free_basic_block_vars (0);
781 }
782 \f
783 /* Misc. utilities. */
784
785 /* Compute which modes support reg/reg copy operations. */
786
787 static void
788 compute_can_copy ()
789 {
790 int i;
791 #ifndef AVOID_CCMODE_COPIES
792 rtx reg,insn;
793 #endif
794 char *free_point = (char *) oballoc (1);
795
796 bzero (can_copy_p, NUM_MACHINE_MODES);
797
798 start_sequence ();
799 for (i = 0; i < NUM_MACHINE_MODES; i++)
800 {
801 switch (GET_MODE_CLASS (i))
802 {
803 case MODE_CC :
804 #ifdef AVOID_CCMODE_COPIES
805 can_copy_p[i] = 0;
806 #else
807 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
808 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
809 if (recog (PATTERN (insn), insn, NULL_PTR) >= 0)
810 can_copy_p[i] = 1;
811 #endif
812 break;
813 default :
814 can_copy_p[i] = 1;
815 break;
816 }
817 }
818 end_sequence ();
819
820 /* Free the objects we just allocated. */
821 obfree (free_point);
822 }
823 \f
824 /* Cover function to xmalloc to record bytes allocated. */
825
826 static char *
827 gmalloc (size)
828 unsigned int size;
829 {
830 bytes_used += size;
831 return xmalloc (size);
832 }
833
834 /* Cover function to xrealloc.
835 We don't record the additional size since we don't know it.
836 It won't affect memory usage stats much anyway. */
837
838 static char *
839 grealloc (ptr, size)
840 char *ptr;
841 unsigned int size;
842 {
843 return xrealloc (ptr, size);
844 }
845
846 /* Cover function to obstack_alloc.
847 We don't need to record the bytes allocated here since
848 obstack_chunk_alloc is set to gmalloc. */
849
850 static char *
851 gcse_alloc (size)
852 unsigned long size;
853 {
854 return (char *) obstack_alloc (&gcse_obstack, size);
855 }
856
857 /* Allocate memory for the cuid mapping array,
858 and reg/memory set tracking tables.
859
860 This is called at the start of each pass. */
861
862 static void
863 alloc_gcse_mem (f)
864 rtx f;
865 {
866 int i,n;
867 rtx insn;
868
869 /* Find the largest UID and create a mapping from UIDs to CUIDs.
870 CUIDs are like UIDs except they increase monotonically, have no gaps,
871 and only apply to real insns. */
872
873 max_uid = get_max_uid ();
874 n = (max_uid + 1) * sizeof (int);
875 uid_cuid = (int *) gmalloc (n);
876 bzero ((char *) uid_cuid, n);
877 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
878 {
879 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
880 INSN_CUID (insn) = i++;
881 else
882 INSN_CUID (insn) = i;
883 }
884
885 /* Create a table mapping cuids to insns. */
886
887 max_cuid = i;
888 n = (max_cuid + 1) * sizeof (rtx);
889 cuid_insn = (rtx *) gmalloc (n);
890 bzero ((char *) cuid_insn, n);
891 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
892 {
893 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
894 {
895 CUID_INSN (i) = insn;
896 i++;
897 }
898 }
899
900 /* Allocate vars to track sets of regs. */
901
902 reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno);
903
904 /* Allocate vars to track sets of regs, memory per block. */
905
906 reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks,
907 max_gcse_regno);
908 mem_set_in_block = (char *) gmalloc (n_basic_blocks);
909 }
910
911 /* Free memory allocated by alloc_gcse_mem. */
912
913 static void
914 free_gcse_mem ()
915 {
916 free (uid_cuid);
917 free (cuid_insn);
918
919 free (reg_set_bitmap);
920
921 free (reg_set_in_block);
922 free (mem_set_in_block);
923 }
924
925 void
926 dump_cuid_table (file)
927 FILE *file;
928 {
929 int i;
930
931 fprintf (file, "CUID table\n");
932 for (i = 0; i < max_cuid; i++)
933 {
934 rtx insn = CUID_INSN (i);
935 if (i != 0 && i % 10 == 0)
936 fprintf (file, "\n");
937 if (insn != NULL)
938 fprintf (file, " %d", INSN_UID (insn));
939 }
940 fprintf (file, "\n\n");
941 }
942 \f
943 /* Register set information.
944
945 `reg_set_table' records where each register is set or otherwise
946 modified. */
947
948 static struct obstack reg_set_obstack;
949
950 static void
951 alloc_reg_set_mem (n_regs)
952 int n_regs;
953 {
954 int n;
955
956 reg_set_table_size = n_regs + REG_SET_TABLE_SLOP;
957 n = reg_set_table_size * sizeof (struct reg_set *);
958 reg_set_table = (struct reg_set **) gmalloc (n);
959 bzero ((char *) reg_set_table, n);
960
961 gcc_obstack_init (&reg_set_obstack);
962 }
963
964 static void
965 free_reg_set_mem ()
966 {
967 free (reg_set_table);
968 obstack_free (&reg_set_obstack, NULL_PTR);
969 }
970
971 /* Record REGNO in the reg_set table. */
972
973 static void
974 record_one_set (regno, insn)
975 int regno;
976 rtx insn;
977 {
978 /* allocate a new reg_set element and link it onto the list */
979 struct reg_set *new_reg_info, *reg_info_ptr1, *reg_info_ptr2;
980
981 /* If the table isn't big enough, enlarge it. */
982 if (regno >= reg_set_table_size)
983 {
984 int new_size = regno + REG_SET_TABLE_SLOP;
985 reg_set_table = (struct reg_set **)
986 grealloc ((char *) reg_set_table,
987 new_size * sizeof (struct reg_set *));
988 bzero ((char *) (reg_set_table + reg_set_table_size),
989 (new_size - reg_set_table_size) * sizeof (struct reg_set *));
990 reg_set_table_size = new_size;
991 }
992
993 new_reg_info = (struct reg_set *) obstack_alloc (&reg_set_obstack,
994 sizeof (struct reg_set));
995 bytes_used += sizeof (struct reg_set);
996 new_reg_info->insn = insn;
997 new_reg_info->next = NULL;
998 if (reg_set_table[regno] == NULL)
999 reg_set_table[regno] = new_reg_info;
1000 else
1001 {
1002 reg_info_ptr1 = reg_info_ptr2 = reg_set_table[regno];
1003 /* ??? One could keep a "last" pointer to speed this up. */
1004 while (reg_info_ptr1 != NULL)
1005 {
1006 reg_info_ptr2 = reg_info_ptr1;
1007 reg_info_ptr1 = reg_info_ptr1->next;
1008 }
1009 reg_info_ptr2->next = new_reg_info;
1010 }
1011 }
1012
1013 /* For communication between next two functions (via note_stores). */
1014 static rtx record_set_insn;
1015
1016 /* Called from compute_sets via note_stores to handle one
1017 SET or CLOBBER in an insn. */
1018
1019 static void
1020 record_set_info (dest, setter)
1021 rtx dest, setter ATTRIBUTE_UNUSED;
1022 {
1023 if (GET_CODE (dest) == SUBREG)
1024 dest = SUBREG_REG (dest);
1025
1026 if (GET_CODE (dest) == REG)
1027 {
1028 if (REGNO (dest) >= FIRST_PSEUDO_REGISTER)
1029 record_one_set (REGNO (dest), record_set_insn);
1030 }
1031 }
1032
1033 /* Scan the function and record each set of each pseudo-register.
1034
1035 This is called once, at the start of the gcse pass.
1036 See the comments for `reg_set_table' for further docs. */
1037
1038 static void
1039 compute_sets (f)
1040 rtx f;
1041 {
1042 rtx insn = f;
1043
1044 while (insn)
1045 {
1046 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
1047 {
1048 record_set_insn = insn;
1049 note_stores (PATTERN (insn), record_set_info);
1050 }
1051 insn = NEXT_INSN (insn);
1052 }
1053 }
1054 \f
1055 /* Hash table support. */
1056
1057 #define NEVER_SET -1
1058
1059 /* For each register, the cuid of the first/last insn in the block to set it,
1060 or -1 if not set. */
1061 static int *reg_first_set;
1062 static int *reg_last_set;
1063
1064 /* While computing "first/last set" info, this is the CUID of first/last insn
1065 to set memory or -1 if not set. `mem_last_set' is also used when
1066 performing GCSE to record whether memory has been set since the beginning
1067 of the block.
1068 Note that handling of memory is very simple, we don't make any attempt
1069 to optimize things (later). */
1070 static int mem_first_set;
1071 static int mem_last_set;
1072
1073 /* Set the appropriate bit in `rd_gen' [the gen for reaching defs] if the
1074 register set in this insn is not set after this insn in this block. */
1075
1076 static void
1077 maybe_set_rd_gen (regno, insn)
1078 int regno;
1079 rtx insn;
1080 {
1081 if (reg_last_set[regno] <= INSN_CUID (insn))
1082 SET_BIT (rd_gen[BLOCK_NUM (insn)], INSN_CUID (insn));
1083 }
1084
1085 /* Perform a quick check whether X, the source of a set, is something
1086 we want to consider for GCSE. */
1087
1088 static int
1089 want_to_gcse_p (x)
1090 rtx x;
1091 {
1092 enum rtx_code code = GET_CODE (x);
1093
1094 switch (code)
1095 {
1096 case REG:
1097 case SUBREG:
1098 case CONST_INT:
1099 case CONST_DOUBLE:
1100 case CALL:
1101 return 0;
1102
1103 default:
1104 break;
1105 }
1106
1107 return 1;
1108 }
1109
1110 /* Return non-zero if the operands of expression X are unchanged from the
1111 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
1112 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
1113
1114 static int
1115 oprs_unchanged_p (x, insn, avail_p)
1116 rtx x, insn;
1117 int avail_p;
1118 {
1119 int i;
1120 enum rtx_code code;
1121 char *fmt;
1122
1123 /* repeat is used to turn tail-recursion into iteration. */
1124 repeat:
1125
1126 if (x == 0)
1127 return 1;
1128
1129 code = GET_CODE (x);
1130 switch (code)
1131 {
1132 case REG:
1133 if (avail_p)
1134 return (reg_last_set[REGNO (x)] == NEVER_SET
1135 || reg_last_set[REGNO (x)] < INSN_CUID (insn));
1136 else
1137 return (reg_first_set[REGNO (x)] == NEVER_SET
1138 || reg_first_set[REGNO (x)] >= INSN_CUID (insn));
1139
1140 case MEM:
1141 if (avail_p)
1142 {
1143 if (mem_last_set != NEVER_SET
1144 && mem_last_set >= INSN_CUID (insn))
1145 return 0;
1146 }
1147 else
1148 {
1149 if (mem_first_set != NEVER_SET
1150 && mem_first_set < INSN_CUID (insn))
1151 return 0;
1152 }
1153 x = XEXP (x, 0);
1154 goto repeat;
1155
1156 case PRE_DEC:
1157 case PRE_INC:
1158 case POST_DEC:
1159 case POST_INC:
1160 return 0;
1161
1162 case PC:
1163 case CC0: /*FIXME*/
1164 case CONST:
1165 case CONST_INT:
1166 case CONST_DOUBLE:
1167 case SYMBOL_REF:
1168 case LABEL_REF:
1169 case ADDR_VEC:
1170 case ADDR_DIFF_VEC:
1171 return 1;
1172
1173 default:
1174 break;
1175 }
1176
1177 i = GET_RTX_LENGTH (code) - 1;
1178 fmt = GET_RTX_FORMAT (code);
1179 for (; i >= 0; i--)
1180 {
1181 if (fmt[i] == 'e')
1182 {
1183 rtx tem = XEXP (x, i);
1184
1185 /* If we are about to do the last recursive call
1186 needed at this level, change it into iteration.
1187 This function is called enough to be worth it. */
1188 if (i == 0)
1189 {
1190 x = tem;
1191 goto repeat;
1192 }
1193 if (! oprs_unchanged_p (tem, insn, avail_p))
1194 return 0;
1195 }
1196 else if (fmt[i] == 'E')
1197 {
1198 int j;
1199 for (j = 0; j < XVECLEN (x, i); j++)
1200 {
1201 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
1202 return 0;
1203 }
1204 }
1205 }
1206
1207 return 1;
1208 }
1209
1210 /* Return non-zero if the operands of expression X are unchanged from
1211 the start of INSN's basic block up to but not including INSN. */
1212
1213 static int
1214 oprs_anticipatable_p (x, insn)
1215 rtx x, insn;
1216 {
1217 return oprs_unchanged_p (x, insn, 0);
1218 }
1219
1220 /* Return non-zero if the operands of expression X are unchanged from
1221 INSN to the end of INSN's basic block. */
1222
1223 static int
1224 oprs_available_p (x, insn)
1225 rtx x, insn;
1226 {
1227 return oprs_unchanged_p (x, insn, 1);
1228 }
1229
1230 /* Hash expression X.
1231 MODE is only used if X is a CONST_INT.
1232 A boolean indicating if a volatile operand is found or if the expression
1233 contains something we don't want to insert in the table is stored in
1234 DO_NOT_RECORD_P.
1235
1236 ??? One might want to merge this with canon_hash. Later. */
1237
1238 static unsigned int
1239 hash_expr (x, mode, do_not_record_p, hash_table_size)
1240 rtx x;
1241 enum machine_mode mode;
1242 int *do_not_record_p;
1243 int hash_table_size;
1244 {
1245 unsigned int hash;
1246
1247 *do_not_record_p = 0;
1248
1249 hash = hash_expr_1 (x, mode, do_not_record_p);
1250 return hash % hash_table_size;
1251 }
1252
1253 /* Subroutine of hash_expr to do the actual work. */
1254
1255 static unsigned int
1256 hash_expr_1 (x, mode, do_not_record_p)
1257 rtx x;
1258 enum machine_mode mode;
1259 int *do_not_record_p;
1260 {
1261 int i, j;
1262 unsigned hash = 0;
1263 enum rtx_code code;
1264 char *fmt;
1265
1266 /* repeat is used to turn tail-recursion into iteration. */
1267 repeat:
1268
1269 if (x == 0)
1270 return hash;
1271
1272 code = GET_CODE (x);
1273 switch (code)
1274 {
1275 case REG:
1276 {
1277 register int regno = REGNO (x);
1278 hash += ((unsigned) REG << 7) + regno;
1279 return hash;
1280 }
1281
1282 case CONST_INT:
1283 {
1284 unsigned HOST_WIDE_INT tem = INTVAL (x);
1285 hash += ((unsigned) CONST_INT << 7) + (unsigned) mode + tem;
1286 return hash;
1287 }
1288
1289 case CONST_DOUBLE:
1290 /* This is like the general case, except that it only counts
1291 the integers representing the constant. */
1292 hash += (unsigned) code + (unsigned) GET_MODE (x);
1293 if (GET_MODE (x) != VOIDmode)
1294 for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++)
1295 {
1296 unsigned tem = XINT (x, i);
1297 hash += tem;
1298 }
1299 else
1300 hash += ((unsigned) CONST_DOUBLE_LOW (x)
1301 + (unsigned) CONST_DOUBLE_HIGH (x));
1302 return hash;
1303
1304 /* Assume there is only one rtx object for any given label. */
1305 case LABEL_REF:
1306 /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
1307 differences and differences between each stage's debugging dumps. */
1308 hash += ((unsigned) LABEL_REF << 7) + CODE_LABEL_NUMBER (XEXP (x, 0));
1309 return hash;
1310
1311 case SYMBOL_REF:
1312 {
1313 /* Don't hash on the symbol's address to avoid bootstrap differences.
1314 Different hash values may cause expressions to be recorded in
1315 different orders and thus different registers to be used in the
1316 final assembler. This also avoids differences in the dump files
1317 between various stages. */
1318 unsigned int h = 0;
1319 unsigned char *p = (unsigned char *) XSTR (x, 0);
1320 while (*p)
1321 h += (h << 7) + *p++; /* ??? revisit */
1322 hash += ((unsigned) SYMBOL_REF << 7) + h;
1323 return hash;
1324 }
1325
1326 case MEM:
1327 if (MEM_VOLATILE_P (x))
1328 {
1329 *do_not_record_p = 1;
1330 return 0;
1331 }
1332 hash += (unsigned) MEM;
1333 x = XEXP (x, 0);
1334 goto repeat;
1335
1336 case PRE_DEC:
1337 case PRE_INC:
1338 case POST_DEC:
1339 case POST_INC:
1340 case PC:
1341 case CC0:
1342 case CALL:
1343 case UNSPEC_VOLATILE:
1344 *do_not_record_p = 1;
1345 return 0;
1346
1347 case ASM_OPERANDS:
1348 if (MEM_VOLATILE_P (x))
1349 {
1350 *do_not_record_p = 1;
1351 return 0;
1352 }
1353
1354 default:
1355 break;
1356 }
1357
1358 i = GET_RTX_LENGTH (code) - 1;
1359 hash += (unsigned) code + (unsigned) GET_MODE (x);
1360 fmt = GET_RTX_FORMAT (code);
1361 for (; i >= 0; i--)
1362 {
1363 if (fmt[i] == 'e')
1364 {
1365 rtx tem = XEXP (x, i);
1366
1367 /* If we are about to do the last recursive call
1368 needed at this level, change it into iteration.
1369 This function is called enough to be worth it. */
1370 if (i == 0)
1371 {
1372 x = tem;
1373 goto repeat;
1374 }
1375 hash += hash_expr_1 (tem, 0, do_not_record_p);
1376 if (*do_not_record_p)
1377 return 0;
1378 }
1379 else if (fmt[i] == 'E')
1380 for (j = 0; j < XVECLEN (x, i); j++)
1381 {
1382 hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p);
1383 if (*do_not_record_p)
1384 return 0;
1385 }
1386 else if (fmt[i] == 's')
1387 {
1388 register unsigned char *p = (unsigned char *) XSTR (x, i);
1389 if (p)
1390 while (*p)
1391 hash += *p++;
1392 }
1393 else if (fmt[i] == 'i')
1394 {
1395 register unsigned tem = XINT (x, i);
1396 hash += tem;
1397 }
1398 else
1399 abort ();
1400 }
1401
1402 return hash;
1403 }
1404
1405 /* Hash a set of register REGNO.
1406
1407 Sets are hashed on the register that is set.
1408 This simplifies the PRE copy propagation code.
1409
1410 ??? May need to make things more elaborate. Later, as necessary. */
1411
1412 static unsigned int
1413 hash_set (regno, hash_table_size)
1414 int regno;
1415 int hash_table_size;
1416 {
1417 unsigned int hash;
1418
1419 hash = regno;
1420 return hash % hash_table_size;
1421 }
1422
1423 /* Return non-zero if exp1 is equivalent to exp2.
1424 ??? Borrowed from cse.c. Might want to remerge with cse.c. Later. */
1425
1426 static int
1427 expr_equiv_p (x, y)
1428 rtx x, y;
1429 {
1430 register int i, j;
1431 register enum rtx_code code;
1432 register char *fmt;
1433
1434 if (x == y)
1435 return 1;
1436 if (x == 0 || y == 0)
1437 return x == y;
1438
1439 code = GET_CODE (x);
1440 if (code != GET_CODE (y))
1441 return 0;
1442
1443 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */
1444 if (GET_MODE (x) != GET_MODE (y))
1445 return 0;
1446
1447 switch (code)
1448 {
1449 case PC:
1450 case CC0:
1451 return x == y;
1452
1453 case CONST_INT:
1454 return INTVAL (x) == INTVAL (y);
1455
1456 case LABEL_REF:
1457 return XEXP (x, 0) == XEXP (y, 0);
1458
1459 case SYMBOL_REF:
1460 return XSTR (x, 0) == XSTR (y, 0);
1461
1462 case REG:
1463 return REGNO (x) == REGNO (y);
1464
1465 /* For commutative operations, check both orders. */
1466 case PLUS:
1467 case MULT:
1468 case AND:
1469 case IOR:
1470 case XOR:
1471 case NE:
1472 case EQ:
1473 return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0))
1474 && expr_equiv_p (XEXP (x, 1), XEXP (y, 1)))
1475 || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1))
1476 && expr_equiv_p (XEXP (x, 1), XEXP (y, 0))));
1477
1478 default:
1479 break;
1480 }
1481
1482 /* Compare the elements. If any pair of corresponding elements
1483 fail to match, return 0 for the whole thing. */
1484
1485 fmt = GET_RTX_FORMAT (code);
1486 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1487 {
1488 switch (fmt[i])
1489 {
1490 case 'e':
1491 if (! expr_equiv_p (XEXP (x, i), XEXP (y, i)))
1492 return 0;
1493 break;
1494
1495 case 'E':
1496 if (XVECLEN (x, i) != XVECLEN (y, i))
1497 return 0;
1498 for (j = 0; j < XVECLEN (x, i); j++)
1499 if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j)))
1500 return 0;
1501 break;
1502
1503 case 's':
1504 if (strcmp (XSTR (x, i), XSTR (y, i)))
1505 return 0;
1506 break;
1507
1508 case 'i':
1509 if (XINT (x, i) != XINT (y, i))
1510 return 0;
1511 break;
1512
1513 case 'w':
1514 if (XWINT (x, i) != XWINT (y, i))
1515 return 0;
1516 break;
1517
1518 case '0':
1519 break;
1520
1521 default:
1522 abort ();
1523 }
1524 }
1525
1526 return 1;
1527 }
1528
1529 /* Insert expression X in INSN in the hash table.
1530 If it is already present, record it as the last occurrence in INSN's
1531 basic block.
1532
1533 MODE is the mode of the value X is being stored into.
1534 It is only used if X is a CONST_INT.
1535
1536 ANTIC_P is non-zero if X is an anticipatable expression.
1537 AVAIL_P is non-zero if X is an available expression. */
1538
1539 static void
1540 insert_expr_in_table (x, mode, insn, antic_p, avail_p)
1541 rtx x;
1542 enum machine_mode mode;
1543 rtx insn;
1544 int antic_p, avail_p;
1545 {
1546 int found, do_not_record_p;
1547 unsigned int hash;
1548 struct expr *cur_expr, *last_expr = NULL;
1549 struct occr *antic_occr, *avail_occr;
1550 struct occr *last_occr = NULL;
1551
1552 hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size);
1553
1554 /* Do not insert expression in table if it contains volatile operands,
1555 or if hash_expr determines the expression is something we don't want
1556 to or can't handle. */
1557 if (do_not_record_p)
1558 return;
1559
1560 cur_expr = expr_hash_table[hash];
1561 found = 0;
1562
1563 while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1564 {
1565 /* If the expression isn't found, save a pointer to the end of
1566 the list. */
1567 last_expr = cur_expr;
1568 cur_expr = cur_expr->next_same_hash;
1569 }
1570
1571 if (! found)
1572 {
1573 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1574 bytes_used += sizeof (struct expr);
1575 if (expr_hash_table[hash] == NULL)
1576 {
1577 /* This is the first pattern that hashed to this index. */
1578 expr_hash_table[hash] = cur_expr;
1579 }
1580 else
1581 {
1582 /* Add EXPR to end of this hash chain. */
1583 last_expr->next_same_hash = cur_expr;
1584 }
1585 /* Set the fields of the expr element. */
1586 cur_expr->expr = x;
1587 cur_expr->bitmap_index = n_exprs++;
1588 cur_expr->next_same_hash = NULL;
1589 cur_expr->antic_occr = NULL;
1590 cur_expr->avail_occr = NULL;
1591 }
1592
1593 /* Now record the occurrence(s). */
1594
1595 if (antic_p)
1596 {
1597 antic_occr = cur_expr->antic_occr;
1598
1599 /* Search for another occurrence in the same basic block. */
1600 while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn))
1601 {
1602 /* If an occurrence isn't found, save a pointer to the end of
1603 the list. */
1604 last_occr = antic_occr;
1605 antic_occr = antic_occr->next;
1606 }
1607
1608 if (antic_occr)
1609 {
1610 /* Found another instance of the expression in the same basic block.
1611 Prefer the currently recorded one. We want the first one in the
1612 block and the block is scanned from start to end. */
1613 ; /* nothing to do */
1614 }
1615 else
1616 {
1617 /* First occurrence of this expression in this basic block. */
1618 antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1619 bytes_used += sizeof (struct occr);
1620 /* First occurrence of this expression in any block? */
1621 if (cur_expr->antic_occr == NULL)
1622 cur_expr->antic_occr = antic_occr;
1623 else
1624 last_occr->next = antic_occr;
1625 antic_occr->insn = insn;
1626 antic_occr->next = NULL;
1627 }
1628 }
1629
1630 if (avail_p)
1631 {
1632 avail_occr = cur_expr->avail_occr;
1633
1634 /* Search for another occurrence in the same basic block. */
1635 while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn))
1636 {
1637 /* If an occurrence isn't found, save a pointer to the end of
1638 the list. */
1639 last_occr = avail_occr;
1640 avail_occr = avail_occr->next;
1641 }
1642
1643 if (avail_occr)
1644 {
1645 /* Found another instance of the expression in the same basic block.
1646 Prefer this occurrence to the currently recorded one. We want
1647 the last one in the block and the block is scanned from start
1648 to end. */
1649 avail_occr->insn = insn;
1650 }
1651 else
1652 {
1653 /* First occurrence of this expression in this basic block. */
1654 avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1655 bytes_used += sizeof (struct occr);
1656 /* First occurrence of this expression in any block? */
1657 if (cur_expr->avail_occr == NULL)
1658 cur_expr->avail_occr = avail_occr;
1659 else
1660 last_occr->next = avail_occr;
1661 avail_occr->insn = insn;
1662 avail_occr->next = NULL;
1663 }
1664 }
1665 }
1666
1667 /* Insert pattern X in INSN in the hash table.
1668 X is a SET of a reg to either another reg or a constant.
1669 If it is already present, record it as the last occurrence in INSN's
1670 basic block. */
1671
1672 static void
1673 insert_set_in_table (x, insn)
1674 rtx x;
1675 rtx insn;
1676 {
1677 int found;
1678 unsigned int hash;
1679 struct expr *cur_expr, *last_expr = NULL;
1680 struct occr *cur_occr, *last_occr = NULL;
1681
1682 if (GET_CODE (x) != SET
1683 || GET_CODE (SET_DEST (x)) != REG)
1684 abort ();
1685
1686 hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size);
1687
1688 cur_expr = set_hash_table[hash];
1689 found = 0;
1690
1691 while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x)))
1692 {
1693 /* If the expression isn't found, save a pointer to the end of
1694 the list. */
1695 last_expr = cur_expr;
1696 cur_expr = cur_expr->next_same_hash;
1697 }
1698
1699 if (! found)
1700 {
1701 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr));
1702 bytes_used += sizeof (struct expr);
1703 if (set_hash_table[hash] == NULL)
1704 {
1705 /* This is the first pattern that hashed to this index. */
1706 set_hash_table[hash] = cur_expr;
1707 }
1708 else
1709 {
1710 /* Add EXPR to end of this hash chain. */
1711 last_expr->next_same_hash = cur_expr;
1712 }
1713 /* Set the fields of the expr element.
1714 We must copy X because it can be modified when copy propagation is
1715 performed on its operands. */
1716 /* ??? Should this go in a different obstack? */
1717 cur_expr->expr = copy_rtx (x);
1718 cur_expr->bitmap_index = n_sets++;
1719 cur_expr->next_same_hash = NULL;
1720 cur_expr->antic_occr = NULL;
1721 cur_expr->avail_occr = NULL;
1722 }
1723
1724 /* Now record the occurrence. */
1725
1726 cur_occr = cur_expr->avail_occr;
1727
1728 /* Search for another occurrence in the same basic block. */
1729 while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn))
1730 {
1731 /* If an occurrence isn't found, save a pointer to the end of
1732 the list. */
1733 last_occr = cur_occr;
1734 cur_occr = cur_occr->next;
1735 }
1736
1737 if (cur_occr)
1738 {
1739 /* Found another instance of the expression in the same basic block.
1740 Prefer this occurrence to the currently recorded one. We want
1741 the last one in the block and the block is scanned from start
1742 to end. */
1743 cur_occr->insn = insn;
1744 }
1745 else
1746 {
1747 /* First occurrence of this expression in this basic block. */
1748 cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr));
1749 bytes_used += sizeof (struct occr);
1750 /* First occurrence of this expression in any block? */
1751 if (cur_expr->avail_occr == NULL)
1752 cur_expr->avail_occr = cur_occr;
1753 else
1754 last_occr->next = cur_occr;
1755 cur_occr->insn = insn;
1756 cur_occr->next = NULL;
1757 }
1758 }
1759
1760 /* Scan pattern PAT of INSN and add an entry to the hash table.
1761 If SET_P is non-zero, this is for the assignment hash table,
1762 otherwise it is for the expression hash table. */
1763
1764 static void
1765 hash_scan_set (pat, insn, set_p)
1766 rtx pat, insn;
1767 int set_p;
1768 {
1769 rtx src = SET_SRC (pat);
1770 rtx dest = SET_DEST (pat);
1771
1772 if (GET_CODE (src) == CALL)
1773 hash_scan_call (src, insn);
1774
1775 if (GET_CODE (dest) == REG)
1776 {
1777 int regno = REGNO (dest);
1778 rtx tmp;
1779
1780 /* Only record sets of pseudo-regs in the hash table. */
1781 if (! set_p
1782 && regno >= FIRST_PSEUDO_REGISTER
1783 /* Don't GCSE something if we can't do a reg/reg copy. */
1784 && can_copy_p [GET_MODE (dest)]
1785 /* Is SET_SRC something we want to gcse? */
1786 && want_to_gcse_p (src))
1787 {
1788 /* An expression is not anticipatable if its operands are
1789 modified before this insn. */
1790 int antic_p = ! optimize_size && oprs_anticipatable_p (src, insn);
1791 /* An expression is not available if its operands are
1792 subsequently modified, including this insn. */
1793 int avail_p = oprs_available_p (src, insn);
1794 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p);
1795 }
1796 /* Record sets for constant/copy propagation. */
1797 else if (set_p
1798 && regno >= FIRST_PSEUDO_REGISTER
1799 && ((GET_CODE (src) == REG
1800 && REGNO (src) >= FIRST_PSEUDO_REGISTER
1801 && can_copy_p [GET_MODE (dest)])
1802 /* ??? CONST_INT:wip */
1803 || GET_CODE (src) == CONST_INT)
1804 /* A copy is not available if its src or dest is subsequently
1805 modified. Here we want to search from INSN+1 on, but
1806 oprs_available_p searches from INSN on. */
1807 && (insn == BLOCK_END (BLOCK_NUM (insn))
1808 || ((tmp = next_nonnote_insn (insn)) != NULL_RTX
1809 && oprs_available_p (pat, tmp))))
1810 insert_set_in_table (pat, insn);
1811 }
1812
1813 /* Check if first/last set in this block for classic gcse,
1814 but not for copy/constant propagation. */
1815 if (optimize_size && !set_p)
1816
1817 {
1818 rtx dest = SET_DEST (pat);
1819
1820 while (GET_CODE (dest) == SUBREG
1821 || GET_CODE (dest) == ZERO_EXTRACT
1822 || GET_CODE (dest) == SIGN_EXTRACT
1823 || GET_CODE (dest) == STRICT_LOW_PART)
1824 dest = XEXP (dest, 0);
1825 if (GET_CODE (dest) == REG)
1826 maybe_set_rd_gen (REGNO (dest), insn);
1827 }
1828 }
1829
1830 static void
1831 hash_scan_clobber (x, insn)
1832 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1833 {
1834 /* Currently nothing to do. */
1835 }
1836
1837 static void
1838 hash_scan_call (x, insn)
1839 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED;
1840 {
1841 /* Currently nothing to do. */
1842 }
1843
1844 /* Process INSN and add hash table entries as appropriate.
1845
1846 Only available expressions that set a single pseudo-reg are recorded.
1847
1848 Single sets in a PARALLEL could be handled, but it's an extra complication
1849 that isn't dealt with right now. The trick is handling the CLOBBERs that
1850 are also in the PARALLEL. Later.
1851
1852 If SET_P is non-zero, this is for the assignment hash table,
1853 otherwise it is for the expression hash table.
1854 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should
1855 not record any expressions. */
1856
1857 static void
1858 hash_scan_insn (insn, set_p, in_libcall_block)
1859 rtx insn;
1860 int set_p;
1861 int in_libcall_block;
1862 {
1863 rtx pat = PATTERN (insn);
1864
1865 /* Pick out the sets of INSN and for other forms of instructions record
1866 what's been modified. */
1867
1868 if (GET_CODE (pat) == SET && ! in_libcall_block)
1869 hash_scan_set (pat, insn, set_p);
1870 else if (GET_CODE (pat) == PARALLEL)
1871 {
1872 int i;
1873
1874 for (i = 0; i < XVECLEN (pat, 0); i++)
1875 {
1876 rtx x = XVECEXP (pat, 0, i);
1877
1878 if (GET_CODE (x) == SET)
1879 {
1880 if (GET_CODE (SET_SRC (x)) == CALL)
1881 hash_scan_call (SET_SRC (x), insn);
1882
1883 /* Check if first/last set in this block for classic
1884 gcse, but not for constant/copy propagation. */
1885 if (optimize_size && !set_p)
1886 {
1887 rtx dest = SET_DEST (x);
1888
1889 while (GET_CODE (dest) == SUBREG
1890 || GET_CODE (dest) == ZERO_EXTRACT
1891 || GET_CODE (dest) == SIGN_EXTRACT
1892 || GET_CODE (dest) == STRICT_LOW_PART)
1893 dest = XEXP (dest, 0);
1894 if (GET_CODE (dest) == REG)
1895 maybe_set_rd_gen (REGNO (dest), insn);
1896 }
1897 }
1898 else if (GET_CODE (x) == CLOBBER)
1899 hash_scan_clobber (x, insn);
1900 else if (GET_CODE (x) == CALL)
1901 hash_scan_call (x, insn);
1902 }
1903 }
1904 else if (GET_CODE (pat) == CLOBBER)
1905 hash_scan_clobber (pat, insn);
1906 else if (GET_CODE (pat) == CALL)
1907 hash_scan_call (pat, insn);
1908 }
1909
1910 static void
1911 dump_hash_table (file, name, table, table_size, total_size)
1912 FILE *file;
1913 char *name;
1914 struct expr **table;
1915 int table_size, total_size;
1916 {
1917 int i;
1918 /* Flattened out table, so it's printed in proper order. */
1919 struct expr **flat_table = (struct expr **) alloca (total_size * sizeof (struct expr *));
1920 unsigned int *hash_val = (unsigned int *) alloca (total_size * sizeof (unsigned int));
1921
1922 bzero ((char *) flat_table, total_size * sizeof (struct expr *));
1923 for (i = 0; i < table_size; i++)
1924 {
1925 struct expr *expr;
1926
1927 for (expr = table[i]; expr != NULL; expr = expr->next_same_hash)
1928 {
1929 flat_table[expr->bitmap_index] = expr;
1930 hash_val[expr->bitmap_index] = i;
1931 }
1932 }
1933
1934 fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1935 name, table_size, total_size);
1936
1937 for (i = 0; i < total_size; i++)
1938 {
1939 struct expr *expr = flat_table[i];
1940
1941 fprintf (file, "Index %d (hash value %d)\n ",
1942 expr->bitmap_index, hash_val[i]);
1943 print_rtl (file, expr->expr);
1944 fprintf (file, "\n");
1945 }
1946
1947 fprintf (file, "\n");
1948 }
1949
1950 /* Record register first/last/block set information for REGNO in INSN.
1951 reg_first_set records the first place in the block where the register
1952 is set and is used to compute "anticipatability".
1953 reg_last_set records the last place in the block where the register
1954 is set and is used to compute "availability".
1955 reg_set_in_block records whether the register is set in the block
1956 and is used to compute "transparency". */
1957
1958 static void
1959 record_last_reg_set_info (insn, regno)
1960 rtx insn;
1961 int regno;
1962 {
1963 if (reg_first_set[regno] == NEVER_SET)
1964 reg_first_set[regno] = INSN_CUID (insn);
1965 reg_last_set[regno] = INSN_CUID (insn);
1966 SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno);
1967 }
1968
1969 /* Record memory first/last/block set information for INSN. */
1970
1971 static void
1972 record_last_mem_set_info (insn)
1973 rtx insn;
1974 {
1975 if (mem_first_set == NEVER_SET)
1976 mem_first_set = INSN_CUID (insn);
1977 mem_last_set = INSN_CUID (insn);
1978 mem_set_in_block[BLOCK_NUM (insn)] = 1;
1979 }
1980
1981 /* Used for communicating between next two routines. */
1982 static rtx last_set_insn;
1983
1984 /* Called from compute_hash_table via note_stores to handle one
1985 SET or CLOBBER in an insn. */
1986
1987 static void
1988 record_last_set_info (dest, setter)
1989 rtx dest, setter ATTRIBUTE_UNUSED;
1990 {
1991 if (GET_CODE (dest) == SUBREG)
1992 dest = SUBREG_REG (dest);
1993
1994 if (GET_CODE (dest) == REG)
1995 record_last_reg_set_info (last_set_insn, REGNO (dest));
1996 else if (GET_CODE (dest) == MEM
1997 /* Ignore pushes, they clobber nothing. */
1998 && ! push_operand (dest, GET_MODE (dest)))
1999 record_last_mem_set_info (last_set_insn);
2000 }
2001
2002 /* Top level function to create an expression or assignment hash table.
2003
2004 Expression entries are placed in the hash table if
2005 - they are of the form (set (pseudo-reg) src),
2006 - src is something we want to perform GCSE on,
2007 - none of the operands are subsequently modified in the block
2008
2009 Assignment entries are placed in the hash table if
2010 - they are of the form (set (pseudo-reg) src),
2011 - src is something we want to perform const/copy propagation on,
2012 - none of the operands or target are subsequently modified in the block
2013 Currently src must be a pseudo-reg or a const_int.
2014
2015 F is the first insn.
2016 SET_P is non-zero for computing the assignment hash table. */
2017
2018 static void
2019 compute_hash_table (f, set_p)
2020 rtx f ATTRIBUTE_UNUSED;
2021 int set_p;
2022 {
2023 int bb;
2024
2025 /* While we compute the hash table we also compute a bit array of which
2026 registers are set in which blocks.
2027 We also compute which blocks set memory, in the absence of aliasing
2028 support [which is TODO].
2029 ??? This isn't needed during const/copy propagation, but it's cheap to
2030 compute. Later. */
2031 sbitmap_vector_zero (reg_set_in_block, n_basic_blocks);
2032 bzero ((char *) mem_set_in_block, n_basic_blocks);
2033
2034 /* Some working arrays used to track first and last set in each block. */
2035 /* ??? One could use alloca here, but at some size a threshold is crossed
2036 beyond which one should use malloc. Are we at that threshold here? */
2037 reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2038 reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int));
2039
2040 for (bb = 0; bb < n_basic_blocks; bb++)
2041 {
2042 rtx insn;
2043 int regno;
2044 int in_libcall_block;
2045 int i;
2046
2047 /* First pass over the instructions records information used to
2048 determine when registers and memory are first and last set.
2049 ??? The mem_set_in_block and hard-reg reg_set_in_block computation
2050 could be moved to compute_sets since they currently don't change. */
2051
2052 for (i = 0; i < max_gcse_regno; i++)
2053 reg_first_set[i] = reg_last_set[i] = NEVER_SET;
2054 mem_first_set = NEVER_SET;
2055 mem_last_set = NEVER_SET;
2056
2057 for (insn = BLOCK_HEAD (bb);
2058 insn && insn != NEXT_INSN (BLOCK_END (bb));
2059 insn = NEXT_INSN (insn))
2060 {
2061 #ifdef NON_SAVING_SETJMP
2062 if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE
2063 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP)
2064 {
2065 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2066 record_last_reg_set_info (insn, regno);
2067 continue;
2068 }
2069 #endif
2070
2071 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
2072 continue;
2073
2074 if (GET_CODE (insn) == CALL_INSN)
2075 {
2076 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2077 if ((call_used_regs[regno]
2078 && regno != STACK_POINTER_REGNUM
2079 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2080 && regno != HARD_FRAME_POINTER_REGNUM
2081 #endif
2082 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2083 && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
2084 #endif
2085 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2086 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2087 #endif
2088
2089 && regno != FRAME_POINTER_REGNUM)
2090 || global_regs[regno])
2091 record_last_reg_set_info (insn, regno);
2092 if (! CONST_CALL_P (insn))
2093 record_last_mem_set_info (insn);
2094 }
2095
2096 last_set_insn = insn;
2097 note_stores (PATTERN (insn), record_last_set_info);
2098 }
2099
2100 /* The next pass builds the hash table. */
2101
2102 for (insn = BLOCK_HEAD (bb), in_libcall_block = 0;
2103 insn && insn != NEXT_INSN (BLOCK_END (bb));
2104 insn = NEXT_INSN (insn))
2105 {
2106 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2107 {
2108 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
2109 in_libcall_block = 1;
2110 else if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
2111 in_libcall_block = 0;
2112 hash_scan_insn (insn, set_p, in_libcall_block);
2113 }
2114 }
2115 }
2116
2117 free (reg_first_set);
2118 free (reg_last_set);
2119 /* Catch bugs early. */
2120 reg_first_set = reg_last_set = 0;
2121 }
2122
2123 /* Allocate space for the set hash table.
2124 N_INSNS is the number of instructions in the function.
2125 It is used to determine the number of buckets to use. */
2126
2127 static void
2128 alloc_set_hash_table (n_insns)
2129 int n_insns;
2130 {
2131 int n;
2132
2133 set_hash_table_size = n_insns / 4;
2134 if (set_hash_table_size < 11)
2135 set_hash_table_size = 11;
2136 /* Attempt to maintain efficient use of hash table.
2137 Making it an odd number is simplest for now.
2138 ??? Later take some measurements. */
2139 set_hash_table_size |= 1;
2140 n = set_hash_table_size * sizeof (struct expr *);
2141 set_hash_table = (struct expr **) gmalloc (n);
2142 }
2143
2144 /* Free things allocated by alloc_set_hash_table. */
2145
2146 static void
2147 free_set_hash_table ()
2148 {
2149 free (set_hash_table);
2150 }
2151
2152 /* Compute the hash table for doing copy/const propagation. */
2153
2154 static void
2155 compute_set_hash_table (f)
2156 rtx f;
2157 {
2158 /* Initialize count of number of entries in hash table. */
2159 n_sets = 0;
2160 bzero ((char *) set_hash_table, set_hash_table_size * sizeof (struct expr *));
2161
2162 compute_hash_table (f, 1);
2163 }
2164
2165 /* Allocate space for the expression hash table.
2166 N_INSNS is the number of instructions in the function.
2167 It is used to determine the number of buckets to use. */
2168
2169 static void
2170 alloc_expr_hash_table (n_insns)
2171 int n_insns;
2172 {
2173 int n;
2174
2175 expr_hash_table_size = n_insns / 2;
2176 /* Make sure the amount is usable. */
2177 if (expr_hash_table_size < 11)
2178 expr_hash_table_size = 11;
2179 /* Attempt to maintain efficient use of hash table.
2180 Making it an odd number is simplest for now.
2181 ??? Later take some measurements. */
2182 expr_hash_table_size |= 1;
2183 n = expr_hash_table_size * sizeof (struct expr *);
2184 expr_hash_table = (struct expr **) gmalloc (n);
2185 }
2186
2187 /* Free things allocated by alloc_expr_hash_table. */
2188
2189 static void
2190 free_expr_hash_table ()
2191 {
2192 free (expr_hash_table);
2193 }
2194
2195 /* Compute the hash table for doing GCSE. */
2196
2197 static void
2198 compute_expr_hash_table (f)
2199 rtx f;
2200 {
2201 /* Initialize count of number of entries in hash table. */
2202 n_exprs = 0;
2203 bzero ((char *) expr_hash_table, expr_hash_table_size * sizeof (struct expr *));
2204
2205 compute_hash_table (f, 0);
2206 }
2207 \f
2208 /* Expression tracking support. */
2209
2210 /* Lookup pattern PAT in the expression table.
2211 The result is a pointer to the table entry, or NULL if not found. */
2212
2213 static struct expr *
2214 lookup_expr (pat)
2215 rtx pat;
2216 {
2217 int do_not_record_p;
2218 unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p,
2219 expr_hash_table_size);
2220 struct expr *expr;
2221
2222 if (do_not_record_p)
2223 return NULL;
2224
2225 expr = expr_hash_table[hash];
2226
2227 while (expr && ! expr_equiv_p (expr->expr, pat))
2228 expr = expr->next_same_hash;
2229
2230 return expr;
2231 }
2232
2233 /* Lookup REGNO in the set table.
2234 If PAT is non-NULL look for the entry that matches it, otherwise return
2235 the first entry for REGNO.
2236 The result is a pointer to the table entry, or NULL if not found. */
2237
2238 static struct expr *
2239 lookup_set (regno, pat)
2240 int regno;
2241 rtx pat;
2242 {
2243 unsigned int hash = hash_set (regno, set_hash_table_size);
2244 struct expr *expr;
2245
2246 expr = set_hash_table[hash];
2247
2248 if (pat)
2249 {
2250 while (expr && ! expr_equiv_p (expr->expr, pat))
2251 expr = expr->next_same_hash;
2252 }
2253 else
2254 {
2255 while (expr && REGNO (SET_DEST (expr->expr)) != regno)
2256 expr = expr->next_same_hash;
2257 }
2258
2259 return expr;
2260 }
2261
2262 /* Return the next entry for REGNO in list EXPR. */
2263
2264 static struct expr *
2265 next_set (regno, expr)
2266 int regno;
2267 struct expr *expr;
2268 {
2269 do
2270 expr = expr->next_same_hash;
2271 while (expr && REGNO (SET_DEST (expr->expr)) != regno);
2272 return expr;
2273 }
2274
2275 /* Reset tables used to keep track of what's still available [since the
2276 start of the block]. */
2277
2278 static void
2279 reset_opr_set_tables ()
2280 {
2281 /* Maintain a bitmap of which regs have been set since beginning of
2282 the block. */
2283 sbitmap_zero (reg_set_bitmap);
2284 /* Also keep a record of the last instruction to modify memory.
2285 For now this is very trivial, we only record whether any memory
2286 location has been modified. */
2287 mem_last_set = 0;
2288 }
2289
2290 /* Return non-zero if the operands of X are not set before INSN in
2291 INSN's basic block. */
2292
2293 static int
2294 oprs_not_set_p (x, insn)
2295 rtx x, insn;
2296 {
2297 int i;
2298 enum rtx_code code;
2299 char *fmt;
2300
2301 /* repeat is used to turn tail-recursion into iteration. */
2302 repeat:
2303
2304 if (x == 0)
2305 return 1;
2306
2307 code = GET_CODE (x);
2308 switch (code)
2309 {
2310 case PC:
2311 case CC0:
2312 case CONST:
2313 case CONST_INT:
2314 case CONST_DOUBLE:
2315 case SYMBOL_REF:
2316 case LABEL_REF:
2317 case ADDR_VEC:
2318 case ADDR_DIFF_VEC:
2319 return 1;
2320
2321 case MEM:
2322 if (mem_last_set != 0)
2323 return 0;
2324 x = XEXP (x, 0);
2325 goto repeat;
2326
2327 case REG:
2328 return ! TEST_BIT (reg_set_bitmap, REGNO (x));
2329
2330 default:
2331 break;
2332 }
2333
2334 fmt = GET_RTX_FORMAT (code);
2335 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2336 {
2337 if (fmt[i] == 'e')
2338 {
2339 int not_set_p;
2340 /* If we are about to do the last recursive call
2341 needed at this level, change it into iteration.
2342 This function is called enough to be worth it. */
2343 if (i == 0)
2344 {
2345 x = XEXP (x, 0);
2346 goto repeat;
2347 }
2348 not_set_p = oprs_not_set_p (XEXP (x, i), insn);
2349 if (! not_set_p)
2350 return 0;
2351 }
2352 else if (fmt[i] == 'E')
2353 {
2354 int j;
2355 for (j = 0; j < XVECLEN (x, i); j++)
2356 {
2357 int not_set_p = oprs_not_set_p (XVECEXP (x, i, j), insn);
2358 if (! not_set_p)
2359 return 0;
2360 }
2361 }
2362 }
2363
2364 return 1;
2365 }
2366
2367 /* Mark things set by a CALL. */
2368
2369 static void
2370 mark_call (pat, insn)
2371 rtx pat ATTRIBUTE_UNUSED, insn;
2372 {
2373 mem_last_set = INSN_CUID (insn);
2374 }
2375
2376 /* Mark things set by a SET. */
2377
2378 static void
2379 mark_set (pat, insn)
2380 rtx pat, insn;
2381 {
2382 rtx dest = SET_DEST (pat);
2383
2384 while (GET_CODE (dest) == SUBREG
2385 || GET_CODE (dest) == ZERO_EXTRACT
2386 || GET_CODE (dest) == SIGN_EXTRACT
2387 || GET_CODE (dest) == STRICT_LOW_PART)
2388 dest = XEXP (dest, 0);
2389
2390 if (GET_CODE (dest) == REG)
2391 SET_BIT (reg_set_bitmap, REGNO (dest));
2392 else if (GET_CODE (dest) == MEM)
2393 mem_last_set = INSN_CUID (insn);
2394
2395 if (GET_CODE (SET_SRC (pat)) == CALL)
2396 mark_call (SET_SRC (pat), insn);
2397 }
2398
2399 /* Record things set by a CLOBBER. */
2400
2401 static void
2402 mark_clobber (pat, insn)
2403 rtx pat, insn;
2404 {
2405 rtx clob = XEXP (pat, 0);
2406
2407 while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2408 clob = XEXP (clob, 0);
2409
2410 if (GET_CODE (clob) == REG)
2411 SET_BIT (reg_set_bitmap, REGNO (clob));
2412 else
2413 mem_last_set = INSN_CUID (insn);
2414 }
2415
2416 /* Record things set by INSN.
2417 This data is used by oprs_not_set_p. */
2418
2419 static void
2420 mark_oprs_set (insn)
2421 rtx insn;
2422 {
2423 rtx pat = PATTERN (insn);
2424
2425 if (GET_CODE (pat) == SET)
2426 mark_set (pat, insn);
2427 else if (GET_CODE (pat) == PARALLEL)
2428 {
2429 int i;
2430
2431 for (i = 0; i < XVECLEN (pat, 0); i++)
2432 {
2433 rtx x = XVECEXP (pat, 0, i);
2434
2435 if (GET_CODE (x) == SET)
2436 mark_set (x, insn);
2437 else if (GET_CODE (x) == CLOBBER)
2438 mark_clobber (x, insn);
2439 else if (GET_CODE (x) == CALL)
2440 mark_call (x, insn);
2441 }
2442 }
2443 else if (GET_CODE (pat) == CLOBBER)
2444 mark_clobber (pat, insn);
2445 else if (GET_CODE (pat) == CALL)
2446 mark_call (pat, insn);
2447 }
2448 \f
2449 /* Classic GCSE reaching definition support. */
2450
2451 /* Allocate reaching def variables. */
2452
2453 static void
2454 alloc_rd_mem (n_blocks, n_insns)
2455 int n_blocks, n_insns;
2456 {
2457 rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2458 sbitmap_vector_zero (rd_kill, n_basic_blocks);
2459
2460 rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2461 sbitmap_vector_zero (rd_gen, n_basic_blocks);
2462
2463 reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2464 sbitmap_vector_zero (reaching_defs, n_basic_blocks);
2465
2466 rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns);
2467 sbitmap_vector_zero (rd_out, n_basic_blocks);
2468 }
2469
2470 /* Free reaching def variables. */
2471
2472 static void
2473 free_rd_mem ()
2474 {
2475 free (rd_kill);
2476 free (rd_gen);
2477 free (reaching_defs);
2478 free (rd_out);
2479 }
2480
2481 /* Add INSN to the kills of BB.
2482 REGNO, set in BB, is killed by INSN. */
2483
2484 static void
2485 handle_rd_kill_set (insn, regno, bb)
2486 rtx insn;
2487 int regno, bb;
2488 {
2489 struct reg_set *this_reg = reg_set_table[regno];
2490
2491 while (this_reg)
2492 {
2493 if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn))
2494 SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn));
2495 this_reg = this_reg->next;
2496 }
2497 }
2498
2499 void
2500 dump_rd_table (file, title, bmap)
2501 FILE *file;
2502 char *title;
2503 sbitmap *bmap;
2504 {
2505 int bb,cuid,i,j,n;
2506
2507 fprintf (file, "%s\n", title);
2508 for (bb = 0; bb < n_basic_blocks; bb++)
2509 {
2510 fprintf (file, "BB %d\n", bb);
2511 dump_sbitmap (file, bmap[bb]);
2512 for (i = n = cuid = 0; i < bmap[bb]->size; i++)
2513 {
2514 for (j = 0; j < SBITMAP_ELT_BITS; j++, cuid++)
2515 {
2516 if ((bmap[bb]->elms[i] & (1 << j)) != 0)
2517 {
2518 if (n % 10 == 0)
2519 fprintf (file, " ");
2520 fprintf (file, " %d", INSN_UID (CUID_INSN (cuid)));
2521 n++;
2522 }
2523 }
2524 }
2525 if (n != 0)
2526 fprintf (file, "\n");
2527 }
2528 fprintf (file, "\n");
2529 }
2530
2531 /* Compute the set of kill's for reaching definitions. */
2532
2533 static void
2534 compute_kill_rd ()
2535 {
2536 int bb,cuid;
2537
2538 /* For each block
2539 For each set bit in `gen' of the block (i.e each insn which
2540 generates a definition in the block)
2541 Call the reg set by the insn corresponding to that bit regx
2542 Look at the linked list starting at reg_set_table[regx]
2543 For each setting of regx in the linked list, which is not in
2544 this block
2545 Set the bit in `kill' corresponding to that insn
2546 */
2547
2548 for (bb = 0; bb < n_basic_blocks; bb++)
2549 {
2550 for (cuid = 0; cuid < max_cuid; cuid++)
2551 {
2552 if (TEST_BIT (rd_gen[bb], cuid))
2553 {
2554 rtx insn = CUID_INSN (cuid);
2555 rtx pat = PATTERN (insn);
2556
2557 if (GET_CODE (insn) == CALL_INSN)
2558 {
2559 int regno;
2560
2561 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2562 {
2563 if ((call_used_regs[regno]
2564 && regno != STACK_POINTER_REGNUM
2565 #if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
2566 && regno != HARD_FRAME_POINTER_REGNUM
2567 #endif
2568 #if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
2569 && ! (regno == ARG_POINTER_REGNUM
2570 && fixed_regs[regno])
2571 #endif
2572 #if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED)
2573 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic)
2574 #endif
2575 && regno != FRAME_POINTER_REGNUM)
2576 || global_regs[regno])
2577 handle_rd_kill_set (insn, regno, bb);
2578 }
2579 }
2580
2581 if (GET_CODE (pat) == PARALLEL)
2582 {
2583 int i;
2584
2585 /* We work backwards because ... */
2586 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
2587 {
2588 enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i));
2589 if ((code == SET || code == CLOBBER)
2590 && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG)
2591 handle_rd_kill_set (insn,
2592 REGNO (XEXP (XVECEXP (pat, 0, i), 0)),
2593 bb);
2594 }
2595 }
2596 else if (GET_CODE (pat) == SET)
2597 {
2598 if (GET_CODE (SET_DEST (pat)) == REG)
2599 {
2600 /* Each setting of this register outside of this block
2601 must be marked in the set of kills in this block. */
2602 handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb);
2603 }
2604 }
2605 /* FIXME: CLOBBER? */
2606 }
2607 }
2608 }
2609 }
2610
2611 /* Compute the reaching definitions as in
2612 Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman,
2613 Chapter 10. It is the same algorithm as used for computing available
2614 expressions but applied to the gens and kills of reaching definitions. */
2615
2616 static void
2617 compute_rd ()
2618 {
2619 int bb, changed, passes;
2620
2621 for (bb = 0; bb < n_basic_blocks; bb++)
2622 sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/);
2623
2624 passes = 0;
2625 changed = 1;
2626 while (changed)
2627 {
2628 changed = 0;
2629 for (bb = 0; bb < n_basic_blocks; bb++)
2630 {
2631 sbitmap_union_of_predecessors (reaching_defs[bb], rd_out,
2632 bb, s_preds);
2633 changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb],
2634 reaching_defs[bb], rd_kill[bb]);
2635 }
2636 passes++;
2637 }
2638
2639 if (gcse_file)
2640 fprintf (gcse_file, "reaching def computation: %d passes\n", passes);
2641 }
2642 \f
2643 /* Classic GCSE available expression support. */
2644
2645 /* Allocate memory for available expression computation. */
2646
2647 static void
2648 alloc_avail_expr_mem (n_blocks, n_exprs)
2649 int n_blocks, n_exprs;
2650 {
2651 ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2652 sbitmap_vector_zero (ae_kill, n_basic_blocks);
2653
2654 ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2655 sbitmap_vector_zero (ae_gen, n_basic_blocks);
2656
2657 ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2658 sbitmap_vector_zero (ae_in, n_basic_blocks);
2659
2660 ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs);
2661 sbitmap_vector_zero (ae_out, n_basic_blocks);
2662
2663 u_bitmap = (sbitmap) sbitmap_alloc (n_exprs);
2664 sbitmap_ones (u_bitmap);
2665 }
2666
2667 static void
2668 free_avail_expr_mem ()
2669 {
2670 free (ae_kill);
2671 free (ae_gen);
2672 free (ae_in);
2673 free (ae_out);
2674 free (u_bitmap);
2675 }
2676
2677 /* Compute the set of available expressions generated in each basic block. */
2678
2679 static void
2680 compute_ae_gen ()
2681 {
2682 int i;
2683
2684 /* For each recorded occurrence of each expression, set ae_gen[bb][expr].
2685 This is all we have to do because an expression is not recorded if it
2686 is not available, and the only expressions we want to work with are the
2687 ones that are recorded. */
2688
2689 for (i = 0; i < expr_hash_table_size; i++)
2690 {
2691 struct expr *expr = expr_hash_table[i];
2692 while (expr != NULL)
2693 {
2694 struct occr *occr = expr->avail_occr;
2695 while (occr != NULL)
2696 {
2697 SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index);
2698 occr = occr->next;
2699 }
2700 expr = expr->next_same_hash;
2701 }
2702 }
2703 }
2704
2705 /* Return non-zero if expression X is killed in BB. */
2706
2707 static int
2708 expr_killed_p (x, bb)
2709 rtx x;
2710 int bb;
2711 {
2712 int i;
2713 enum rtx_code code;
2714 char *fmt;
2715
2716 /* repeat is used to turn tail-recursion into iteration. */
2717 repeat:
2718
2719 if (x == 0)
2720 return 1;
2721
2722 code = GET_CODE (x);
2723 switch (code)
2724 {
2725 case REG:
2726 return TEST_BIT (reg_set_in_block[bb], REGNO (x));
2727
2728 case MEM:
2729 if (mem_set_in_block[bb])
2730 return 1;
2731 x = XEXP (x, 0);
2732 goto repeat;
2733
2734 case PC:
2735 case CC0: /*FIXME*/
2736 case CONST:
2737 case CONST_INT:
2738 case CONST_DOUBLE:
2739 case SYMBOL_REF:
2740 case LABEL_REF:
2741 case ADDR_VEC:
2742 case ADDR_DIFF_VEC:
2743 return 0;
2744
2745 default:
2746 break;
2747 }
2748
2749 i = GET_RTX_LENGTH (code) - 1;
2750 fmt = GET_RTX_FORMAT (code);
2751 for (; i >= 0; i--)
2752 {
2753 if (fmt[i] == 'e')
2754 {
2755 rtx tem = XEXP (x, i);
2756
2757 /* If we are about to do the last recursive call
2758 needed at this level, change it into iteration.
2759 This function is called enough to be worth it. */
2760 if (i == 0)
2761 {
2762 x = tem;
2763 goto repeat;
2764 }
2765 if (expr_killed_p (tem, bb))
2766 return 1;
2767 }
2768 else if (fmt[i] == 'E')
2769 {
2770 int j;
2771 for (j = 0; j < XVECLEN (x, i); j++)
2772 {
2773 if (expr_killed_p (XVECEXP (x, i, j), bb))
2774 return 1;
2775 }
2776 }
2777 }
2778
2779 return 0;
2780 }
2781
2782 /* Compute the set of available expressions killed in each basic block. */
2783
2784 static void
2785 compute_ae_kill ()
2786 {
2787 int bb,i;
2788
2789 for (bb = 0; bb < n_basic_blocks; bb++)
2790 {
2791 for (i = 0; i < expr_hash_table_size; i++)
2792 {
2793 struct expr *expr = expr_hash_table[i];
2794
2795 for ( ; expr != NULL; expr = expr->next_same_hash)
2796 {
2797 /* Skip EXPR if generated in this block. */
2798 if (TEST_BIT (ae_gen[bb], expr->bitmap_index))
2799 continue;
2800
2801 if (expr_killed_p (expr->expr, bb))
2802 SET_BIT (ae_kill[bb], expr->bitmap_index);
2803 }
2804 }
2805 }
2806 }
2807
2808 /* Compute available expressions.
2809
2810 Implement the algorithm to find available expressions
2811 as given in the Aho Sethi Ullman book, pages 627-631. */
2812
2813 static void
2814 compute_available ()
2815 {
2816 int bb, changed, passes;
2817
2818 sbitmap_zero (ae_in[0]);
2819
2820 sbitmap_copy (ae_out[0] /*dst*/, ae_gen[0] /*src*/);
2821
2822 for (bb = 1; bb < n_basic_blocks; bb++)
2823 sbitmap_difference (ae_out[bb], u_bitmap, ae_kill[bb]);
2824
2825 passes = 0;
2826 changed = 1;
2827 while (changed)
2828 {
2829 changed = 0;
2830 for (bb = 1; bb < n_basic_blocks; bb++)
2831 {
2832 sbitmap_intersect_of_predecessors (ae_in[bb], ae_out,
2833 bb, s_preds);
2834 changed |= sbitmap_union_of_diff (ae_out[bb], ae_gen[bb],
2835 ae_in[bb], ae_kill[bb]);
2836 }
2837 passes++;
2838 }
2839
2840 if (gcse_file)
2841 fprintf (gcse_file, "avail expr computation: %d passes\n", passes);
2842 }
2843 \f
2844 /* Actually perform the Classic GCSE optimizations. */
2845
2846 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
2847
2848 CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself
2849 as a positive reach. We want to do this when there are two computations
2850 of the expression in the block.
2851
2852 VISITED is a pointer to a working buffer for tracking which BB's have
2853 been visited. It is NULL for the top-level call.
2854
2855 We treat reaching expressions that go through blocks containing the same
2856 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
2857 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
2858 2 as not reaching. The intent is to improve the probability of finding
2859 only one reaching expression and to reduce register lifetimes by picking
2860 the closest such expression. */
2861
2862 static int
2863 expr_reaches_here_p (occr, expr, bb, check_self_loop, visited)
2864 struct occr *occr;
2865 struct expr *expr;
2866 int bb;
2867 int check_self_loop;
2868 char *visited;
2869 {
2870 int_list_ptr pred;
2871
2872 if (visited == NULL)
2873 {
2874 visited = (char *) alloca (n_basic_blocks);
2875 bzero (visited, n_basic_blocks);
2876 }
2877
2878 for (pred = s_preds[bb]; pred != NULL; pred = pred->next)
2879 {
2880 int pred_bb = INT_LIST_VAL (pred);
2881
2882 if (visited[pred_bb])
2883 {
2884 /* This predecessor has already been visited.
2885 Nothing to do. */
2886 ;
2887 }
2888 else if (pred_bb == bb)
2889 {
2890 /* BB loops on itself. */
2891 if (check_self_loop
2892 && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)
2893 && BLOCK_NUM (occr->insn) == pred_bb)
2894 return 1;
2895 visited[pred_bb] = 1;
2896 }
2897 /* Ignore this predecessor if it kills the expression. */
2898 else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index))
2899 visited[pred_bb] = 1;
2900 /* Does this predecessor generate this expression? */
2901 else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index))
2902 {
2903 /* Is this the occurrence we're looking for?
2904 Note that there's only one generating occurrence per block
2905 so we just need to check the block number. */
2906 if (BLOCK_NUM (occr->insn) == pred_bb)
2907 return 1;
2908 visited[pred_bb] = 1;
2909 }
2910 /* Neither gen nor kill. */
2911 else
2912 {
2913 visited[pred_bb] = 1;
2914 if (expr_reaches_here_p (occr, expr, pred_bb, check_self_loop, visited))
2915 return 1;
2916 }
2917 }
2918
2919 /* All paths have been checked. */
2920 return 0;
2921 }
2922
2923 /* Return the instruction that computes EXPR that reaches INSN's basic block.
2924 If there is more than one such instruction, return NULL.
2925
2926 Called only by handle_avail_expr. */
2927
2928 static rtx
2929 computing_insn (expr, insn)
2930 struct expr *expr;
2931 rtx insn;
2932 {
2933 int bb = BLOCK_NUM (insn);
2934
2935 if (expr->avail_occr->next == NULL)
2936 {
2937 if (BLOCK_NUM (expr->avail_occr->insn) == bb)
2938 {
2939 /* The available expression is actually itself
2940 (i.e. a loop in the flow graph) so do nothing. */
2941 return NULL;
2942 }
2943 /* (FIXME) Case that we found a pattern that was created by
2944 a substitution that took place. */
2945 return expr->avail_occr->insn;
2946 }
2947 else
2948 {
2949 /* Pattern is computed more than once.
2950 Search backwards from this insn to see how many of these
2951 computations actually reach this insn. */
2952 struct occr *occr;
2953 rtx insn_computes_expr = NULL;
2954 int can_reach = 0;
2955
2956 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
2957 {
2958 if (BLOCK_NUM (occr->insn) == bb)
2959 {
2960 /* The expression is generated in this block.
2961 The only time we care about this is when the expression
2962 is generated later in the block [and thus there's a loop].
2963 We let the normal cse pass handle the other cases. */
2964 if (INSN_CUID (insn) < INSN_CUID (occr->insn))
2965 {
2966 if (expr_reaches_here_p (occr, expr, bb, 1, NULL))
2967 {
2968 can_reach++;
2969 if (can_reach > 1)
2970 return NULL;
2971 insn_computes_expr = occr->insn;
2972 }
2973 }
2974 }
2975 else /* Computation of the pattern outside this block. */
2976 {
2977 if (expr_reaches_here_p (occr, expr, bb, 0, NULL))
2978 {
2979 can_reach++;
2980 if (can_reach > 1)
2981 return NULL;
2982 insn_computes_expr = occr->insn;
2983 }
2984 }
2985 }
2986
2987 if (insn_computes_expr == NULL)
2988 abort ();
2989 return insn_computes_expr;
2990 }
2991 }
2992
2993 /* Return non-zero if the definition in DEF_INSN can reach INSN.
2994 Only called by can_disregard_other_sets. */
2995
2996 static int
2997 def_reaches_here_p (insn, def_insn)
2998 rtx insn, def_insn;
2999 {
3000 rtx reg;
3001
3002 if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn)))
3003 return 1;
3004
3005 if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn))
3006 {
3007 if (INSN_CUID (def_insn) < INSN_CUID (insn))
3008 {
3009 if (GET_CODE (PATTERN (def_insn)) == PARALLEL)
3010 return 1;
3011 if (GET_CODE (PATTERN (def_insn)) == CLOBBER)
3012 reg = XEXP (PATTERN (def_insn), 0);
3013 else if (GET_CODE (PATTERN (def_insn)) == SET)
3014 reg = SET_DEST (PATTERN (def_insn));
3015 else
3016 abort ();
3017 return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn);
3018 }
3019 else
3020 return 0;
3021 }
3022
3023 return 0;
3024 }
3025
3026 /* Return non-zero if *ADDR_THIS_REG can only have one value at INSN.
3027 The value returned is the number of definitions that reach INSN.
3028 Returning a value of zero means that [maybe] more than one definition
3029 reaches INSN and the caller can't perform whatever optimization it is
3030 trying. i.e. it is always safe to return zero. */
3031
3032 static int
3033 can_disregard_other_sets (addr_this_reg, insn, for_combine)
3034 struct reg_set **addr_this_reg;
3035 rtx insn;
3036 int for_combine;
3037 {
3038 int number_of_reaching_defs = 0;
3039 struct reg_set *this_reg = *addr_this_reg;
3040
3041 while (this_reg)
3042 {
3043 if (def_reaches_here_p (insn, this_reg->insn))
3044 {
3045 number_of_reaching_defs++;
3046 /* Ignore parallels for now. */
3047 if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL)
3048 return 0;
3049 if (!for_combine
3050 && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER
3051 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3052 SET_SRC (PATTERN (insn)))))
3053 {
3054 /* A setting of the reg to a different value reaches INSN. */
3055 return 0;
3056 }
3057 if (number_of_reaching_defs > 1)
3058 {
3059 /* If in this setting the value the register is being
3060 set to is equal to the previous value the register
3061 was set to and this setting reaches the insn we are
3062 trying to do the substitution on then we are ok. */
3063
3064 if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER)
3065 return 0;
3066 if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)),
3067 SET_SRC (PATTERN (insn))))
3068 return 0;
3069 }
3070 *addr_this_reg = this_reg;
3071 }
3072
3073 /* prev_this_reg = this_reg; */
3074 this_reg = this_reg->next;
3075 }
3076
3077 return number_of_reaching_defs;
3078 }
3079
3080 /* Expression computed by insn is available and the substitution is legal,
3081 so try to perform the substitution.
3082
3083 The result is non-zero if any changes were made. */
3084
3085 static int
3086 handle_avail_expr (insn, expr)
3087 rtx insn;
3088 struct expr *expr;
3089 {
3090 rtx pat, insn_computes_expr;
3091 rtx to;
3092 struct reg_set *this_reg;
3093 int found_setting, use_src;
3094 int changed = 0;
3095
3096 /* We only handle the case where one computation of the expression
3097 reaches this instruction. */
3098 insn_computes_expr = computing_insn (expr, insn);
3099 if (insn_computes_expr == NULL)
3100 return 0;
3101
3102 found_setting = 0;
3103 use_src = 0;
3104
3105 /* At this point we know only one computation of EXPR outside of this
3106 block reaches this insn. Now try to find a register that the
3107 expression is computed into. */
3108
3109 if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG)
3110 {
3111 /* This is the case when the available expression that reaches
3112 here has already been handled as an available expression. */
3113 int regnum_for_replacing = REGNO (SET_SRC (PATTERN (insn_computes_expr)));
3114 /* If the register was created by GCSE we can't use `reg_set_table',
3115 however we know it's set only once. */
3116 if (regnum_for_replacing >= max_gcse_regno
3117 /* If the register the expression is computed into is set only once,
3118 or only one set reaches this insn, we can use it. */
3119 || (((this_reg = reg_set_table[regnum_for_replacing]),
3120 this_reg->next == NULL)
3121 || can_disregard_other_sets (&this_reg, insn, 0)))
3122 {
3123 use_src = 1;
3124 found_setting = 1;
3125 }
3126 }
3127
3128 if (!found_setting)
3129 {
3130 int regnum_for_replacing = REGNO (SET_DEST (PATTERN (insn_computes_expr)));
3131 /* This shouldn't happen. */
3132 if (regnum_for_replacing >= max_gcse_regno)
3133 abort ();
3134 this_reg = reg_set_table[regnum_for_replacing];
3135 /* If the register the expression is computed into is set only once,
3136 or only one set reaches this insn, use it. */
3137 if (this_reg->next == NULL
3138 || can_disregard_other_sets (&this_reg, insn, 0))
3139 found_setting = 1;
3140 }
3141
3142 if (found_setting)
3143 {
3144 pat = PATTERN (insn);
3145 if (use_src)
3146 to = SET_SRC (PATTERN (insn_computes_expr));
3147 else
3148 to = SET_DEST (PATTERN (insn_computes_expr));
3149 changed = validate_change (insn, &SET_SRC (pat), to, 0);
3150
3151 /* We should be able to ignore the return code from validate_change but
3152 to play it safe we check. */
3153 if (changed)
3154 {
3155 gcse_subst_count++;
3156 if (gcse_file != NULL)
3157 {
3158 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d %s insn %d\n",
3159 INSN_UID (insn), REGNO (to),
3160 use_src ? "from" : "set in",
3161 INSN_UID (insn_computes_expr));
3162 }
3163
3164 }
3165 }
3166 /* The register that the expr is computed into is set more than once. */
3167 else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/)
3168 {
3169 /* Insert an insn after insnx that copies the reg set in insnx
3170 into a new pseudo register call this new register REGN.
3171 From insnb until end of basic block or until REGB is set
3172 replace all uses of REGB with REGN. */
3173 rtx new_insn;
3174
3175 to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr))));
3176
3177 /* Generate the new insn. */
3178 /* ??? If the change fails, we return 0, even though we created
3179 an insn. I think this is ok. */
3180 new_insn
3181 = emit_insn_after (gen_rtx_SET (VOIDmode, to,
3182 SET_DEST (PATTERN (insn_computes_expr))),
3183 insn_computes_expr);
3184 /* Keep block number table up to date. */
3185 set_block_num (new_insn, BLOCK_NUM (insn_computes_expr));
3186 /* Keep register set table up to date. */
3187 record_one_set (REGNO (to), new_insn);
3188
3189 gcse_create_count++;
3190 if (gcse_file != NULL)
3191 {
3192 fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d, computed in insn %d,\n",
3193 INSN_UID (NEXT_INSN (insn_computes_expr)),
3194 REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))),
3195 INSN_UID (insn_computes_expr));
3196 fprintf (gcse_file, " into newly allocated reg %d\n", REGNO (to));
3197 }
3198
3199 pat = PATTERN (insn);
3200
3201 /* Do register replacement for INSN. */
3202 changed = validate_change (insn, &SET_SRC (pat),
3203 SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr))),
3204 0);
3205
3206 /* We should be able to ignore the return code from validate_change but
3207 to play it safe we check. */
3208 if (changed)
3209 {
3210 gcse_subst_count++;
3211 if (gcse_file != NULL)
3212 {
3213 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d set in insn %d\n",
3214 INSN_UID (insn),
3215 REGNO (SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr)))),
3216 INSN_UID (insn_computes_expr));
3217 }
3218
3219 }
3220 }
3221
3222 return changed;
3223 }
3224
3225 /* Perform classic GCSE.
3226 This is called by one_classic_gcse_pass after all the dataflow analysis
3227 has been done.
3228
3229 The result is non-zero if a change was made. */
3230
3231 static int
3232 classic_gcse ()
3233 {
3234 int bb, changed;
3235 rtx insn;
3236
3237 /* Note we start at block 1. */
3238
3239 changed = 0;
3240 for (bb = 1; bb < n_basic_blocks; bb++)
3241 {
3242 /* Reset tables used to keep track of what's still valid [since the
3243 start of the block]. */
3244 reset_opr_set_tables ();
3245
3246 for (insn = BLOCK_HEAD (bb);
3247 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3248 insn = NEXT_INSN (insn))
3249 {
3250 /* Is insn of form (set (pseudo-reg) ...)? */
3251
3252 if (GET_CODE (insn) == INSN
3253 && GET_CODE (PATTERN (insn)) == SET
3254 && GET_CODE (SET_DEST (PATTERN (insn))) == REG
3255 && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER)
3256 {
3257 rtx pat = PATTERN (insn);
3258 rtx src = SET_SRC (pat);
3259 struct expr *expr;
3260
3261 if (want_to_gcse_p (src)
3262 /* Is the expression recorded? */
3263 && ((expr = lookup_expr (src)) != NULL)
3264 /* Is the expression available [at the start of the
3265 block]? */
3266 && TEST_BIT (ae_in[bb], expr->bitmap_index)
3267 /* Are the operands unchanged since the start of the
3268 block? */
3269 && oprs_not_set_p (src, insn))
3270 changed |= handle_avail_expr (insn, expr);
3271 }
3272
3273 /* Keep track of everything modified by this insn. */
3274 /* ??? Need to be careful w.r.t. mods done to INSN. */
3275 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3276 mark_oprs_set (insn);
3277 }
3278 }
3279
3280 return changed;
3281 }
3282
3283 /* Top level routine to perform one classic GCSE pass.
3284
3285 Return non-zero if a change was made. */
3286
3287 static int
3288 one_classic_gcse_pass (f, pass)
3289 rtx f;
3290 int pass;
3291 {
3292 int changed = 0;
3293
3294 gcse_subst_count = 0;
3295 gcse_create_count = 0;
3296
3297 alloc_expr_hash_table (max_cuid);
3298 alloc_rd_mem (n_basic_blocks, max_cuid);
3299 compute_expr_hash_table (f);
3300 if (gcse_file)
3301 dump_hash_table (gcse_file, "Expression", expr_hash_table,
3302 expr_hash_table_size, n_exprs);
3303 if (n_exprs > 0)
3304 {
3305 compute_kill_rd ();
3306 compute_rd ();
3307 alloc_avail_expr_mem (n_basic_blocks, n_exprs);
3308 compute_ae_gen ();
3309 compute_ae_kill ();
3310 compute_available ();
3311 changed = classic_gcse ();
3312 free_avail_expr_mem ();
3313 }
3314 free_rd_mem ();
3315 free_expr_hash_table ();
3316
3317 if (gcse_file)
3318 {
3319 fprintf (gcse_file, "\n");
3320 fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
3321 current_function_name, pass,
3322 bytes_used, gcse_subst_count, gcse_create_count);
3323 }
3324
3325 return changed;
3326 }
3327 \f
3328 /* Compute copy/constant propagation working variables. */
3329
3330 /* Local properties of assignments. */
3331
3332 static sbitmap *cprop_pavloc;
3333 static sbitmap *cprop_absaltered;
3334
3335 /* Global properties of assignments (computed from the local properties). */
3336
3337 static sbitmap *cprop_avin;
3338 static sbitmap *cprop_avout;
3339
3340 /* Allocate vars used for copy/const propagation.
3341 N_BLOCKS is the number of basic blocks.
3342 N_SETS is the number of sets. */
3343
3344 static void
3345 alloc_cprop_mem (n_blocks, n_sets)
3346 int n_blocks, n_sets;
3347 {
3348 cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
3349 cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
3350
3351 cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
3352 cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
3353 }
3354
3355 /* Free vars used by copy/const propagation. */
3356
3357 static void
3358 free_cprop_mem ()
3359 {
3360 free (cprop_pavloc);
3361 free (cprop_absaltered);
3362 free (cprop_avin);
3363 free (cprop_avout);
3364 }
3365
3366 /* Dump copy/const propagation data. */
3367
3368 void
3369 dump_cprop_data (file)
3370 FILE *file;
3371 {
3372 dump_sbitmap_vector (file, "CPROP partially locally available sets", "BB",
3373 cprop_pavloc, n_basic_blocks);
3374 dump_sbitmap_vector (file, "CPROP absolutely altered sets", "BB",
3375 cprop_absaltered, n_basic_blocks);
3376
3377 dump_sbitmap_vector (file, "CPROP available incoming sets", "BB",
3378 cprop_avin, n_basic_blocks);
3379 dump_sbitmap_vector (file, "CPROP available outgoing sets", "BB",
3380 cprop_avout, n_basic_blocks);
3381 }
3382
3383 /* For each block, compute whether X is transparent.
3384 X is either an expression or an assignment [though we don't care which,
3385 for this context an assignment is treated as an expression].
3386 For each block where an element of X is modified, set (SET_P == 1) or reset
3387 (SET_P == 0) the INDX bit in BMAP. */
3388
3389 static void
3390 compute_transp (x, indx, bmap, set_p)
3391 rtx x;
3392 int indx;
3393 sbitmap *bmap;
3394 int set_p;
3395 {
3396 int bb,i;
3397 enum rtx_code code;
3398 char *fmt;
3399
3400 /* repeat is used to turn tail-recursion into iteration. */
3401 repeat:
3402
3403 if (x == 0)
3404 return;
3405
3406 code = GET_CODE (x);
3407 switch (code)
3408 {
3409 case REG:
3410 {
3411 reg_set *r;
3412 int regno = REGNO (x);
3413
3414 if (set_p)
3415 {
3416 if (regno < FIRST_PSEUDO_REGISTER)
3417 {
3418 for (bb = 0; bb < n_basic_blocks; bb++)
3419 if (TEST_BIT (reg_set_in_block[bb], regno))
3420 SET_BIT (bmap[bb], indx);
3421 }
3422 else
3423 {
3424 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3425 {
3426 bb = BLOCK_NUM (r->insn);
3427 SET_BIT (bmap[bb], indx);
3428 }
3429 }
3430 }
3431 else
3432 {
3433 if (regno < FIRST_PSEUDO_REGISTER)
3434 {
3435 for (bb = 0; bb < n_basic_blocks; bb++)
3436 if (TEST_BIT (reg_set_in_block[bb], regno))
3437 RESET_BIT (bmap[bb], indx);
3438 }
3439 else
3440 {
3441 for (r = reg_set_table[regno]; r != NULL; r = r->next)
3442 {
3443 bb = BLOCK_NUM (r->insn);
3444 RESET_BIT (bmap[bb], indx);
3445 }
3446 }
3447 }
3448 return;
3449 }
3450
3451 case MEM:
3452 if (set_p)
3453 {
3454 for (bb = 0; bb < n_basic_blocks; bb++)
3455 if (mem_set_in_block[bb])
3456 SET_BIT (bmap[bb], indx);
3457 }
3458 else
3459 {
3460 for (bb = 0; bb < n_basic_blocks; bb++)
3461 if (mem_set_in_block[bb])
3462 RESET_BIT (bmap[bb], indx);
3463 }
3464 x = XEXP (x, 0);
3465 goto repeat;
3466
3467 case PC:
3468 case CC0: /*FIXME*/
3469 case CONST:
3470 case CONST_INT:
3471 case CONST_DOUBLE:
3472 case SYMBOL_REF:
3473 case LABEL_REF:
3474 case ADDR_VEC:
3475 case ADDR_DIFF_VEC:
3476 return;
3477
3478 default:
3479 break;
3480 }
3481
3482 i = GET_RTX_LENGTH (code) - 1;
3483 fmt = GET_RTX_FORMAT (code);
3484 for (; i >= 0; i--)
3485 {
3486 if (fmt[i] == 'e')
3487 {
3488 rtx tem = XEXP (x, i);
3489
3490 /* If we are about to do the last recursive call
3491 needed at this level, change it into iteration.
3492 This function is called enough to be worth it. */
3493 if (i == 0)
3494 {
3495 x = tem;
3496 goto repeat;
3497 }
3498 compute_transp (tem, indx, bmap, set_p);
3499 }
3500 else if (fmt[i] == 'E')
3501 {
3502 int j;
3503 for (j = 0; j < XVECLEN (x, i); j++)
3504 compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
3505 }
3506 }
3507 }
3508
3509 static void
3510 compute_cprop_local_properties ()
3511 {
3512 int i;
3513
3514 sbitmap_vector_zero (cprop_absaltered, n_basic_blocks);
3515 sbitmap_vector_zero (cprop_pavloc, n_basic_blocks);
3516
3517 for (i = 0; i < set_hash_table_size; i++)
3518 {
3519 struct expr *expr;
3520
3521 for (expr = set_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
3522 {
3523 struct occr *occr;
3524 int indx = expr->bitmap_index;
3525
3526 /* The assignment is absolutely altered if any operand is modified
3527 by this block [excluding the assignment itself].
3528 We start by assuming all are transparent [none are killed], and
3529 then setting the bits for those that are. */
3530
3531 compute_transp (expr->expr, indx, cprop_absaltered, 1);
3532
3533 /* The occurrences recorded in avail_occr are exactly those that
3534 we want to set to non-zero in PAVLOC. */
3535
3536 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
3537 {
3538 int bb = BLOCK_NUM (occr->insn);
3539 SET_BIT (cprop_pavloc[bb], indx);
3540 }
3541 }
3542 }
3543 }
3544
3545 static void
3546 compute_cprop_avinout ()
3547 {
3548 int bb, changed, passes;
3549
3550 sbitmap_zero (cprop_avin[0]);
3551 sbitmap_vector_ones (cprop_avout, n_basic_blocks);
3552
3553 passes = 0;
3554 changed = 1;
3555 while (changed)
3556 {
3557 changed = 0;
3558 for (bb = 0; bb < n_basic_blocks; bb++)
3559 {
3560 if (bb != 0)
3561 sbitmap_intersect_of_predecessors (cprop_avin[bb], cprop_avout,
3562 bb, s_preds);
3563 changed |= sbitmap_union_of_diff (cprop_avout[bb],
3564 cprop_pavloc[bb],
3565 cprop_avin[bb],
3566 cprop_absaltered[bb]);
3567 }
3568 passes++;
3569 }
3570
3571 if (gcse_file)
3572 fprintf (gcse_file, "cprop avail expr computation: %d passes\n", passes);
3573 }
3574
3575 /* Top level routine to do the dataflow analysis needed by copy/const
3576 propagation. */
3577
3578 static void
3579 compute_cprop_data ()
3580 {
3581 compute_cprop_local_properties ();
3582 compute_cprop_avinout ();
3583 }
3584 \f
3585 /* Copy/constant propagation. */
3586
3587 struct reg_use {
3588 rtx reg_rtx;
3589 };
3590
3591 /* Maximum number of register uses in an insn that we handle. */
3592 #define MAX_USES 8
3593
3594 /* Table of uses found in an insn.
3595 Allocated statically to avoid alloc/free complexity and overhead. */
3596 static struct reg_use reg_use_table[MAX_USES];
3597
3598 /* Index into `reg_use_table' while building it. */
3599 static int reg_use_count;
3600
3601 /* Set up a list of register numbers used in INSN.
3602 The found uses are stored in `reg_use_table'.
3603 `reg_use_count' is initialized to zero before entry, and
3604 contains the number of uses in the table upon exit.
3605
3606 ??? If a register appears multiple times we will record it multiple
3607 times. This doesn't hurt anything but it will slow things down. */
3608
3609 static void
3610 find_used_regs (x)
3611 rtx x;
3612 {
3613 int i;
3614 enum rtx_code code;
3615 char *fmt;
3616
3617 /* repeat is used to turn tail-recursion into iteration. */
3618 repeat:
3619
3620 if (x == 0)
3621 return;
3622
3623 code = GET_CODE (x);
3624 switch (code)
3625 {
3626 case REG:
3627 if (reg_use_count == MAX_USES)
3628 return;
3629 reg_use_table[reg_use_count].reg_rtx = x;
3630 reg_use_count++;
3631 return;
3632
3633 case MEM:
3634 x = XEXP (x, 0);
3635 goto repeat;
3636
3637 case PC:
3638 case CC0:
3639 case CONST:
3640 case CONST_INT:
3641 case CONST_DOUBLE:
3642 case SYMBOL_REF:
3643 case LABEL_REF:
3644 case CLOBBER:
3645 case ADDR_VEC:
3646 case ADDR_DIFF_VEC:
3647 case ASM_INPUT: /*FIXME*/
3648 return;
3649
3650 case SET:
3651 if (GET_CODE (SET_DEST (x)) == MEM)
3652 find_used_regs (SET_DEST (x));
3653 x = SET_SRC (x);
3654 goto repeat;
3655
3656 default:
3657 break;
3658 }
3659
3660 /* Recursively scan the operands of this expression. */
3661
3662 fmt = GET_RTX_FORMAT (code);
3663 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3664 {
3665 if (fmt[i] == 'e')
3666 {
3667 /* If we are about to do the last recursive call
3668 needed at this level, change it into iteration.
3669 This function is called enough to be worth it. */
3670 if (i == 0)
3671 {
3672 x = XEXP (x, 0);
3673 goto repeat;
3674 }
3675 find_used_regs (XEXP (x, i));
3676 }
3677 else if (fmt[i] == 'E')
3678 {
3679 int j;
3680 for (j = 0; j < XVECLEN (x, i); j++)
3681 find_used_regs (XVECEXP (x, i, j));
3682 }
3683 }
3684 }
3685
3686 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
3687 Returns non-zero is successful. */
3688
3689 static int
3690 try_replace_reg (from, to, insn)
3691 rtx from, to, insn;
3692 {
3693 return validate_replace_src (from, to, insn);
3694 }
3695
3696 /* Find a set of REGNO that is available on entry to INSN's block.
3697 Returns NULL if not found. */
3698
3699 static struct expr *
3700 find_avail_set (regno, insn)
3701 int regno;
3702 rtx insn;
3703 {
3704 struct expr *set = lookup_set (regno, NULL_RTX);
3705
3706 while (set)
3707 {
3708 if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index))
3709 break;
3710 set = next_set (regno, set);
3711 }
3712
3713 return set;
3714 }
3715
3716 /* Perform constant and copy propagation on INSN.
3717 The result is non-zero if a change was made. */
3718
3719 static int
3720 cprop_insn (insn)
3721 rtx insn;
3722 {
3723 struct reg_use *reg_used;
3724 int changed = 0;
3725
3726 /* ??? For now only propagate into SETs. */
3727 if (GET_CODE (insn) != INSN
3728 || GET_CODE (PATTERN (insn)) != SET)
3729 return 0;
3730
3731 reg_use_count = 0;
3732 find_used_regs (PATTERN (insn));
3733
3734 reg_used = &reg_use_table[0];
3735 for ( ; reg_use_count > 0; reg_used++, reg_use_count--)
3736 {
3737 rtx pat, src;
3738 struct expr *set;
3739 int regno = REGNO (reg_used->reg_rtx);
3740
3741 /* Ignore registers created by GCSE.
3742 We do this because ... */
3743 if (regno >= max_gcse_regno)
3744 continue;
3745
3746 /* If the register has already been set in this block, there's
3747 nothing we can do. */
3748 if (! oprs_not_set_p (reg_used->reg_rtx, insn))
3749 continue;
3750
3751 /* Find an assignment that sets reg_used and is available
3752 at the start of the block. */
3753 set = find_avail_set (regno, insn);
3754 if (! set)
3755 continue;
3756
3757 pat = set->expr;
3758 /* ??? We might be able to handle PARALLELs. Later. */
3759 if (GET_CODE (pat) != SET)
3760 abort ();
3761 src = SET_SRC (pat);
3762
3763 if (GET_CODE (src) == CONST_INT)
3764 {
3765 if (try_replace_reg (reg_used->reg_rtx, src, insn))
3766 {
3767 changed = 1;
3768 const_prop_count++;
3769 if (gcse_file != NULL)
3770 {
3771 fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ",
3772 regno, INSN_UID (insn));
3773 fprintf (gcse_file, HOST_WIDE_INT_PRINT_DEC, INTVAL (src));
3774 fprintf (gcse_file, "\n");
3775 }
3776
3777 /* The original insn setting reg_used may or may not now be
3778 deletable. We leave the deletion to flow. */
3779 }
3780 }
3781 else if (GET_CODE (src) == REG
3782 && REGNO (src) >= FIRST_PSEUDO_REGISTER
3783 && REGNO (src) != regno)
3784 {
3785 /* We know the set is available.
3786 Now check that SET_SRC is ANTLOC (i.e. none of the source operands
3787 have changed since the start of the block). */
3788 if (oprs_not_set_p (src, insn))
3789 {
3790 if (try_replace_reg (reg_used->reg_rtx, src, insn))
3791 {
3792 changed = 1;
3793 copy_prop_count++;
3794 if (gcse_file != NULL)
3795 {
3796 fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d with reg %d\n",
3797 regno, INSN_UID (insn), REGNO (src));
3798 }
3799
3800 /* The original insn setting reg_used may or may not now be
3801 deletable. We leave the deletion to flow. */
3802 /* FIXME: If it turns out that the insn isn't deletable,
3803 then we may have unnecessarily extended register lifetimes
3804 and made things worse. */
3805 }
3806 }
3807 }
3808 }
3809
3810 return changed;
3811 }
3812
3813 /* Forward propagate copies.
3814 This includes copies and constants.
3815 Return non-zero if a change was made. */
3816
3817 static int
3818 cprop ()
3819 {
3820 int bb, changed;
3821 rtx insn;
3822
3823 /* Note we start at block 1. */
3824
3825 changed = 0;
3826 for (bb = 1; bb < n_basic_blocks; bb++)
3827 {
3828 /* Reset tables used to keep track of what's still valid [since the
3829 start of the block]. */
3830 reset_opr_set_tables ();
3831
3832 for (insn = BLOCK_HEAD (bb);
3833 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
3834 insn = NEXT_INSN (insn))
3835 {
3836 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
3837 {
3838 changed |= cprop_insn (insn);
3839
3840 /* Keep track of everything modified by this insn. */
3841 /* ??? Need to be careful w.r.t. mods done to INSN. */
3842 mark_oprs_set (insn);
3843 }
3844 }
3845 }
3846
3847 if (gcse_file != NULL)
3848 fprintf (gcse_file, "\n");
3849
3850 return changed;
3851 }
3852
3853 /* Perform one copy/constant propagation pass.
3854 F is the first insn in the function.
3855 PASS is the pass count. */
3856
3857 static int
3858 one_cprop_pass (f, pass)
3859 rtx f;
3860 int pass;
3861 {
3862 int changed = 0;
3863
3864 const_prop_count = 0;
3865 copy_prop_count = 0;
3866
3867 alloc_set_hash_table (max_cuid);
3868 compute_set_hash_table (f);
3869 if (gcse_file)
3870 dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size,
3871 n_sets);
3872 if (n_sets > 0)
3873 {
3874 alloc_cprop_mem (n_basic_blocks, n_sets);
3875 compute_cprop_data ();
3876 changed = cprop ();
3877 free_cprop_mem ();
3878 }
3879 free_set_hash_table ();
3880
3881 if (gcse_file)
3882 {
3883 fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, %d const props, %d copy props\n",
3884 current_function_name, pass,
3885 bytes_used, const_prop_count, copy_prop_count);
3886 fprintf (gcse_file, "\n");
3887 }
3888
3889 return changed;
3890 }
3891 \f
3892 /* Compute PRE working variables. */
3893
3894 /* Local properties of expressions. */
3895 /* Nonzero for expressions that are transparent in the block. */
3896 static sbitmap *pre_transp;
3897 /* Nonzero for expressions that are computed (available) in the block. */
3898 static sbitmap *pre_comp;
3899 /* Nonzero for expressions that are locally anticipatable in the block. */
3900 static sbitmap *pre_antloc;
3901
3902 /* Global properties (computed from the expression local properties). */
3903 /* Nonzero for expressions that are available on block entry/exit. */
3904 static sbitmap *pre_avin;
3905 static sbitmap *pre_avout;
3906 /* Nonzero for expressions that are anticipatable on block entry/exit. */
3907 static sbitmap *pre_antin;
3908 static sbitmap *pre_antout;
3909 /* Nonzero for expressions that are partially available on block entry/exit. */
3910 static sbitmap *pre_pavin;
3911 static sbitmap *pre_pavout;
3912 /* Nonzero for expressions that are "placement possible" on block entry/exit. */
3913 static sbitmap *pre_ppin;
3914 static sbitmap *pre_ppout;
3915
3916 /* Nonzero for expressions that are transparent at the end of the block.
3917 This is only zero for expressions killed by abnormal critical edge
3918 created by a calls. */
3919 static sbitmap *pre_transpout;
3920
3921 /* Used while performing PRE to denote which insns are redundant. */
3922 static sbitmap pre_redundant;
3923
3924 /* Allocate vars used for PRE analysis. */
3925
3926 static void
3927 alloc_pre_mem (n_blocks, n_exprs)
3928 int n_blocks, n_exprs;
3929 {
3930 pre_transp = sbitmap_vector_alloc (n_blocks, n_exprs);
3931 pre_comp = sbitmap_vector_alloc (n_blocks, n_exprs);
3932 pre_antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
3933
3934 pre_avin = sbitmap_vector_alloc (n_blocks, n_exprs);
3935 pre_avout = sbitmap_vector_alloc (n_blocks, n_exprs);
3936 pre_antin = sbitmap_vector_alloc (n_blocks, n_exprs);
3937 pre_antout = sbitmap_vector_alloc (n_blocks, n_exprs);
3938
3939 pre_pavin = sbitmap_vector_alloc (n_blocks, n_exprs);
3940 pre_pavout = sbitmap_vector_alloc (n_blocks, n_exprs);
3941 pre_ppin = sbitmap_vector_alloc (n_blocks, n_exprs);
3942 pre_ppout = sbitmap_vector_alloc (n_blocks, n_exprs);
3943
3944 pre_transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
3945 }
3946
3947 /* Free vars used for PRE analysis. */
3948
3949 static void
3950 free_pre_mem ()
3951 {
3952 free (pre_transp);
3953 free (pre_comp);
3954 free (pre_antloc);
3955 free (pre_avin);
3956 free (pre_avout);
3957 free (pre_antin);
3958 free (pre_antout);
3959
3960 free (pre_pavin);
3961 free (pre_pavout);
3962 free (pre_ppin);
3963 free (pre_ppout);
3964 free (pre_transpout);
3965 }
3966
3967 /* Dump PRE data. */
3968
3969 void
3970 dump_pre_data (file)
3971 FILE *file;
3972 {
3973 dump_sbitmap_vector (file, "PRE locally transparent expressions", "BB",
3974 pre_transp, n_basic_blocks);
3975 dump_sbitmap_vector (file, "PRE locally available expressions", "BB",
3976 pre_comp, n_basic_blocks);
3977 dump_sbitmap_vector (file, "PRE locally anticipatable expressions", "BB",
3978 pre_antloc, n_basic_blocks);
3979
3980 dump_sbitmap_vector (file, "PRE available incoming expressions", "BB",
3981 pre_avin, n_basic_blocks);
3982 dump_sbitmap_vector (file, "PRE available outgoing expressions", "BB",
3983 pre_avout, n_basic_blocks);
3984 dump_sbitmap_vector (file, "PRE anticipatable incoming expressions", "BB",
3985 pre_antin, n_basic_blocks);
3986 dump_sbitmap_vector (file, "PRE anticipatable outgoing expressions", "BB",
3987 pre_antout, n_basic_blocks);
3988
3989 dump_sbitmap_vector (file, "PRE partially available incoming expressions", "BB",
3990 pre_pavin, n_basic_blocks);
3991 dump_sbitmap_vector (file, "PRE partially available outgoing expressions", "BB",
3992 pre_pavout, n_basic_blocks);
3993 dump_sbitmap_vector (file, "PRE placement possible on incoming", "BB",
3994 pre_ppin, n_basic_blocks);
3995 dump_sbitmap_vector (file, "PRE placement possible on outgoing", "BB",
3996 pre_ppout, n_basic_blocks);
3997
3998 dump_sbitmap_vector (file, "PRE transparent on outgoing", "BB",
3999 pre_transpout, n_basic_blocks);
4000 }
4001
4002 /* Compute the local properties of each recorded expression.
4003 Local properties are those that are defined by the block, irrespective
4004 of other blocks.
4005
4006 An expression is transparent in a block if its operands are not modified
4007 in the block.
4008
4009 An expression is computed (locally available) in a block if it is computed
4010 at least once and expression would contain the same value if the
4011 computation was moved to the end of the block.
4012
4013 An expression is locally anticipatable in a block if it is computed at
4014 least once and expression would contain the same value if the computation
4015 was moved to the beginning of the block. */
4016
4017 static void
4018 compute_pre_local_properties ()
4019 {
4020 int i;
4021
4022 sbitmap_vector_ones (pre_transp, n_basic_blocks);
4023 sbitmap_vector_zero (pre_comp, n_basic_blocks);
4024 sbitmap_vector_zero (pre_antloc, n_basic_blocks);
4025
4026 for (i = 0; i < expr_hash_table_size; i++)
4027 {
4028 struct expr *expr;
4029
4030 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4031 {
4032 struct occr *occr;
4033 int indx = expr->bitmap_index;
4034
4035 /* The expression is transparent in this block if it is not killed.
4036 We start by assuming all are transparent [none are killed], and then
4037 reset the bits for those that are. */
4038
4039 compute_transp (expr->expr, indx, pre_transp, 0);
4040
4041 /* The occurrences recorded in antic_occr are exactly those that
4042 we want to set to non-zero in ANTLOC. */
4043
4044 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4045 {
4046 int bb = BLOCK_NUM (occr->insn);
4047 SET_BIT (pre_antloc[bb], indx);
4048
4049 /* While we're scanning the table, this is a good place to
4050 initialize this. */
4051 occr->deleted_p = 0;
4052 }
4053
4054 /* The occurrences recorded in avail_occr are exactly those that
4055 we want to set to non-zero in COMP. */
4056
4057 for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
4058 {
4059 int bb = BLOCK_NUM (occr->insn);
4060 SET_BIT (pre_comp[bb], indx);
4061
4062 /* While we're scanning the table, this is a good place to
4063 initialize this. */
4064 occr->copied_p = 0;
4065 }
4066
4067 /* While we're scanning the table, this is a good place to
4068 initialize this. */
4069 expr->reaching_reg = 0;
4070 }
4071 }
4072 }
4073
4074 /* Compute expression availability at entrance and exit of each block. */
4075
4076 static void
4077 compute_pre_avinout ()
4078 {
4079 int bb, changed, passes;
4080
4081 sbitmap_zero (pre_avin[0]);
4082 sbitmap_vector_ones (pre_avout, n_basic_blocks);
4083
4084 passes = 0;
4085 changed = 1;
4086 while (changed)
4087 {
4088 changed = 0;
4089 for (bb = 0; bb < n_basic_blocks; bb++)
4090 {
4091 if (bb != 0)
4092 sbitmap_intersect_of_predecessors (pre_avin[bb], pre_avout,
4093 bb, s_preds);
4094 changed |= sbitmap_a_or_b_and_c (pre_avout[bb], pre_comp[bb],
4095 pre_transp[bb], pre_avin[bb]);
4096 }
4097 passes++;
4098 }
4099
4100 if (gcse_file)
4101 fprintf (gcse_file, "avail expr computation: %d passes\n", passes);
4102 }
4103
4104 /* Compute expression anticipatability at entrance and exit of each block. */
4105
4106 static void
4107 compute_pre_antinout ()
4108 {
4109 int bb, changed, passes;
4110
4111 sbitmap_zero (pre_antout[n_basic_blocks - 1]);
4112 sbitmap_vector_ones (pre_antin, n_basic_blocks);
4113
4114 passes = 0;
4115 changed = 1;
4116 while (changed)
4117 {
4118 changed = 0;
4119 /* We scan the blocks in the reverse order to speed up
4120 the convergence. */
4121 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
4122 {
4123 if (bb != n_basic_blocks - 1)
4124 sbitmap_intersect_of_successors (pre_antout[bb], pre_antin,
4125 bb, s_succs);
4126 changed |= sbitmap_a_or_b_and_c (pre_antin[bb], pre_antloc[bb],
4127 pre_transp[bb], pre_antout[bb]);
4128 }
4129 passes++;
4130 }
4131
4132 if (gcse_file)
4133 fprintf (gcse_file, "antic expr computation: %d passes\n", passes);
4134 }
4135
4136 /* Compute expression partial availability at entrance and exit of
4137 each block. */
4138
4139 static void
4140 compute_pre_pavinout ()
4141 {
4142 int bb, changed, passes;
4143
4144 sbitmap_zero (pre_pavin[0]);
4145 sbitmap_vector_zero (pre_pavout, n_basic_blocks);
4146
4147 passes = 0;
4148 changed = 1;
4149 while (changed)
4150 {
4151 changed = 0;
4152 for (bb = 0; bb < n_basic_blocks; bb++)
4153 {
4154 if (bb != 0)
4155 sbitmap_union_of_predecessors (pre_pavin[bb], pre_pavout,
4156 bb, s_preds);
4157 changed |= sbitmap_a_or_b_and_c (pre_pavout[bb], pre_comp[bb],
4158 pre_transp[bb], pre_pavin[bb]);
4159 }
4160 passes++;
4161 }
4162
4163 if (gcse_file)
4164 fprintf (gcse_file, "partially avail expr computation: %d passes\n", passes);
4165 }
4166
4167 /* Compute transparent outgoing information for each block.
4168
4169 An expression is transparent to an edge unless it is killed by
4170 the edge itself. This can only happen with abnormal control flow,
4171 when the edge is traversed through a call. This happens with
4172 non-local labels and exceptions.
4173
4174 This would not be necessary if we split the edge. While this is
4175 normally impossible for abnormal critical edges, with some effort
4176 it should be possible with exception handling, since we still have
4177 control over which handler should be invoked. But due to increased
4178 EH table sizes, this may not be worthwhile. */
4179
4180 static void
4181 compute_pre_transpout ()
4182 {
4183 int bb;
4184
4185 sbitmap_vector_ones (pre_transpout, n_basic_blocks);
4186
4187 for (bb = 0; bb < n_basic_blocks; ++bb)
4188 {
4189 int i;
4190
4191 /* Note that flow inserted a nop a the end of basic blocks that
4192 end in call instructions for reasons other than abnormal
4193 control flow. */
4194 if (GET_CODE (BLOCK_END (bb)) != CALL_INSN)
4195 continue;
4196
4197 for (i = 0; i < expr_hash_table_size; i++)
4198 {
4199 struct expr *expr;
4200 for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash)
4201 if (GET_CODE (expr->expr) == MEM)
4202 {
4203 rtx addr = XEXP (expr->expr, 0);
4204
4205 if (GET_CODE (addr) == SYMBOL_REF
4206 && CONSTANT_POOL_ADDRESS_P (addr))
4207 continue;
4208
4209 /* ??? Optimally, we would use interprocedural alias
4210 analysis to determine if this mem is actually killed
4211 by this call. */
4212 RESET_BIT (pre_transpout[bb], expr->bitmap_index);
4213 }
4214 }
4215 }
4216 }
4217
4218 /* Compute "placement possible" information on entrance and exit of
4219 each block.
4220
4221 From Fred Chow's Thesis:
4222 A computation `e' is PP at a point `p' if it is anticipated at `p' and
4223 all the anticipated e's can be rendered redundant by zero or more
4224 insertions at that point and some other points in the procedure, and
4225 these insertions satisfy the conditions that the insertions are always
4226 at points that `e' is anticipated and the first anticipated e's after the
4227 insertions are rendered redundant. */
4228
4229 static void
4230 compute_pre_ppinout ()
4231 {
4232 int bb, i, changed, size, passes;
4233
4234 sbitmap_vector_ones (pre_ppin, n_basic_blocks);
4235 /* ??? Inefficient as we set pre_ppin[0] twice, but simple. */
4236 sbitmap_zero (pre_ppin[0]);
4237
4238 sbitmap_vector_ones (pre_ppout, n_basic_blocks);
4239 /* ??? Inefficient as we set pre_ppout[n_basic_blocks-1] twice, but simple. */
4240 sbitmap_zero (pre_ppout[n_basic_blocks - 1]);
4241
4242 size = pre_ppin[0]->size;
4243 passes = 0;
4244 changed = 1;
4245 while (changed)
4246 {
4247 changed = 0;
4248 for (bb = 1; bb < n_basic_blocks; bb++)
4249 {
4250 sbitmap_ptr antin = pre_antin[bb]->elms;
4251 sbitmap_ptr pavin = pre_pavin[bb]->elms;
4252 sbitmap_ptr antloc = pre_antloc[bb]->elms;
4253 sbitmap_ptr transp = pre_transp[bb]->elms;
4254 sbitmap_ptr ppout = pre_ppout[bb]->elms;
4255 sbitmap_ptr ppin = pre_ppin[bb]->elms;
4256
4257 for (i = 0; i < size; i++)
4258 {
4259 int_list_ptr pred;
4260 SBITMAP_ELT_TYPE tmp = *antin & *pavin & (*antloc | (*transp & *ppout));
4261 SBITMAP_ELT_TYPE pred_val = (SBITMAP_ELT_TYPE) -1;
4262
4263 for (pred = s_preds[bb]; pred != NULL; pred = pred->next)
4264 {
4265 int pred_bb = INT_LIST_VAL (pred);
4266 sbitmap_ptr ppout_j,avout_j;
4267
4268 if (pred_bb == ENTRY_BLOCK)
4269 continue;
4270
4271 /* If this is a back edge, propagate info along the back
4272 edge to allow for loop invariant code motion.
4273
4274 See FOLLOW_BACK_EDGES at the top of this file for a longer
4275 discussion about loop invariant code motion in pre. */
4276 if (! FOLLOW_BACK_EDGES
4277 && (INSN_CUID (BLOCK_HEAD (bb))
4278 < INSN_CUID (BLOCK_END (pred_bb))))
4279 {
4280 pred_val = 0;
4281 }
4282 else
4283 {
4284 ppout_j = pre_ppout[pred_bb]->elms + i;
4285 avout_j = pre_avout[pred_bb]->elms + i;
4286 pred_val &= *ppout_j | *avout_j;
4287 }
4288 }
4289 tmp &= pred_val;
4290 *ppin = tmp;
4291 antin++; pavin++; antloc++; transp++; ppout++; ppin++;
4292 }
4293 }
4294
4295 for (bb = 0; bb < n_basic_blocks - 1; bb++)
4296 {
4297 sbitmap_ptr ppout = pre_ppout[bb]->elms;
4298 sbitmap_ptr transpout = pre_transpout[bb]->elms;
4299
4300 for (i = 0; i < size; i++)
4301 {
4302 int_list_ptr succ;
4303 SBITMAP_ELT_TYPE tmp = *transpout;
4304
4305 for (succ = s_succs[bb]; succ != NULL; succ = succ->next)
4306 {
4307 int succ_bb = INT_LIST_VAL (succ);
4308 sbitmap_ptr ppin;
4309
4310 if (succ_bb == EXIT_BLOCK)
4311 continue;
4312
4313 ppin = pre_ppin[succ_bb]->elms + i;
4314 tmp &= *ppin;
4315 }
4316
4317 if (*ppout != tmp)
4318 {
4319 changed = 1;
4320 *ppout = tmp;
4321 }
4322
4323 ppout++; transpout++;
4324 }
4325 }
4326
4327 passes++;
4328 }
4329
4330 if (gcse_file)
4331 fprintf (gcse_file, "placement possible computation: %d passes\n", passes);
4332 }
4333
4334 /* Top level routine to do the dataflow analysis needed by PRE. */
4335
4336 static void
4337 compute_pre_data ()
4338 {
4339 compute_pre_local_properties ();
4340 compute_pre_avinout ();
4341 compute_pre_antinout ();
4342 compute_pre_pavinout ();
4343 compute_pre_transpout ();
4344 compute_pre_ppinout ();
4345 if (gcse_file)
4346 fprintf (gcse_file, "\n");
4347 }
4348 \f
4349 /* PRE utilities */
4350
4351 /* Return non-zero if occurrence OCCR of expression EXPR reaches block BB.
4352
4353 VISITED is a pointer to a working buffer for tracking which BB's have
4354 been visited. It is NULL for the top-level call.
4355
4356 We treat reaching expressions that go through blocks containing the same
4357 reaching expression as "not reaching". E.g. if EXPR is generated in blocks
4358 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
4359 2 as not reaching. The intent is to improve the probability of finding
4360 only one reaching expression and to reduce register lifetimes by picking
4361 the closest such expression. */
4362
4363 static int
4364 pre_expr_reaches_here_p (occr, expr, bb, visited)
4365 struct occr *occr;
4366 struct expr *expr;
4367 int bb;
4368 char *visited;
4369 {
4370 int_list_ptr pred;
4371
4372 if (visited == NULL)
4373 {
4374 visited = (char *) alloca (n_basic_blocks);
4375 bzero (visited, n_basic_blocks);
4376 }
4377
4378 for (pred = s_preds[bb]; pred != NULL; pred = pred->next)
4379 {
4380 int pred_bb = INT_LIST_VAL (pred);
4381
4382 if (pred_bb == ENTRY_BLOCK
4383 /* Has predecessor has already been visited? */
4384 || visited[pred_bb])
4385 {
4386 /* Nothing to do. */
4387 }
4388 /* Does this predecessor generate this expression? */
4389 else if (TEST_BIT (pre_comp[pred_bb], expr->bitmap_index))
4390 {
4391 /* Is this the occurrence we're looking for?
4392 Note that there's only one generating occurrence per block
4393 so we just need to check the block number. */
4394 if (BLOCK_NUM (occr->insn) == pred_bb)
4395 return 1;
4396 visited[pred_bb] = 1;
4397 }
4398 /* Ignore this predecessor if it kills the expression. */
4399 else if (! TEST_BIT (pre_transp[pred_bb], expr->bitmap_index))
4400 visited[pred_bb] = 1;
4401 /* Neither gen nor kill. */
4402 else
4403 {
4404 visited[pred_bb] = 1;
4405 if (pre_expr_reaches_here_p (occr, expr, pred_bb, visited))
4406 return 1;
4407 }
4408 }
4409
4410 /* All paths have been checked. */
4411 return 0;
4412 }
4413 \f
4414 /* Add EXPR to the end of basic block BB. */
4415
4416 static void
4417 pre_insert_insn (expr, bb)
4418 struct expr *expr;
4419 int bb;
4420 {
4421 rtx insn = BLOCK_END (bb);
4422 rtx new_insn;
4423 rtx reg = expr->reaching_reg;
4424 int regno = REGNO (reg);
4425 rtx pat;
4426
4427 pat = gen_rtx_SET (VOIDmode, reg, copy_rtx (expr->expr));
4428
4429 /* If the last insn is a jump, insert EXPR in front [taking care to
4430 handle cc0, etc. properly]. */
4431
4432 if (GET_CODE (insn) == JUMP_INSN)
4433 {
4434 #ifdef HAVE_cc0
4435 rtx note;
4436 #endif
4437
4438 /* If this is a jump table, then we can't insert stuff here. Since
4439 we know the previous real insn must be the tablejump, we insert
4440 the new instruction just before the tablejump. */
4441 if (GET_CODE (PATTERN (insn)) == ADDR_VEC
4442 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
4443 insn = prev_real_insn (insn);
4444
4445 #ifdef HAVE_cc0
4446 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
4447 if cc0 isn't set. */
4448 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
4449 if (note)
4450 insn = XEXP (note, 0);
4451 else
4452 {
4453 rtx maybe_cc0_setter = prev_nonnote_insn (insn);
4454 if (maybe_cc0_setter
4455 && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i'
4456 && sets_cc0_p (PATTERN (maybe_cc0_setter)))
4457 insn = maybe_cc0_setter;
4458 }
4459 #endif
4460 /* FIXME: What if something in cc0/jump uses value set in new insn? */
4461 new_insn = emit_insn_before (pat, insn);
4462 add_label_notes (SET_SRC (pat), new_insn);
4463 if (BLOCK_HEAD (bb) == insn)
4464 BLOCK_HEAD (bb) = new_insn;
4465 }
4466 /* Likewise if the last insn is a call, as will happen in the presence
4467 of exception handling. */
4468 else if (GET_CODE (insn) == CALL_INSN)
4469 {
4470 HARD_REG_SET parm_regs;
4471 int nparm_regs;
4472 rtx p;
4473
4474 /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers,
4475 we search backward and place the instructions before the first
4476 parameter is loaded. Do this for everyone for consistency and a
4477 presumtion that we'll get better code elsewhere as well. */
4478
4479 /* It should always be the case that we can put these instructions
4480 anywhere in the basic block. Check this. */
4481 /* ??? Well, it would be the case if we'd split all critical edges.
4482 Since we didn't, we may very well abort. */
4483 if (!TEST_BIT (pre_antloc[bb], expr->bitmap_index)
4484 && !TEST_BIT (pre_transp[bb], expr->bitmap_index))
4485 abort ();
4486
4487 /* Since different machines initialize their parameter registers
4488 in different orders, assume nothing. Collect the set of all
4489 parameter registers. */
4490 CLEAR_HARD_REG_SET (parm_regs);
4491 nparm_regs = 0;
4492 for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1))
4493 if (GET_CODE (XEXP (p, 0)) == USE
4494 && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG)
4495 {
4496 int regno = REGNO (XEXP (XEXP (p, 0), 0));
4497 if (regno >= FIRST_PSEUDO_REGISTER)
4498 abort ();
4499 SET_HARD_REG_BIT (parm_regs, regno);
4500 nparm_regs++;
4501 }
4502
4503 /* Search backward for the first set of a register in this set. */
4504 while (nparm_regs && BLOCK_HEAD (bb) != insn)
4505 {
4506 insn = PREV_INSN (insn);
4507 p = single_set (insn);
4508 if (p && GET_CODE (SET_DEST (p)) == REG
4509 && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER
4510 && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))))
4511 {
4512 CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)));
4513 nparm_regs--;
4514 }
4515 }
4516
4517 new_insn = emit_insn_before (pat, insn);
4518 if (BLOCK_HEAD (bb) == insn)
4519 BLOCK_HEAD (bb) = new_insn;
4520 }
4521 else
4522 {
4523 new_insn = emit_insn_after (pat, insn);
4524 add_label_notes (SET_SRC (pat), new_insn);
4525 BLOCK_END (bb) = new_insn;
4526 }
4527
4528 /* Keep block number table up to date. */
4529 set_block_num (new_insn, bb);
4530 /* Keep register set table up to date. */
4531 record_one_set (regno, new_insn);
4532
4533 gcse_create_count++;
4534
4535 if (gcse_file)
4536 {
4537 fprintf (gcse_file, "PRE: end of bb %d, insn %d, copying expression %d to reg %d\n",
4538 bb, INSN_UID (new_insn), expr->bitmap_index, regno);
4539 }
4540 }
4541
4542 /* Insert partially redundant expressions at the ends of appropriate basic
4543 blocks making them now redundant. */
4544
4545 static void
4546 pre_insert (index_map)
4547 struct expr **index_map;
4548 {
4549 int bb, i, size;
4550
4551 /* Compute INSERT = PPOUT & (~AVOUT) & (~PPIN | ~TRANSP) for each
4552 expression. Where INSERT == TRUE, add the expression at the end of
4553 the basic block. */
4554
4555 size = pre_ppout[0]->size;
4556 for (bb = 0; bb < n_basic_blocks; bb++)
4557 {
4558 int indx;
4559 sbitmap_ptr ppout = pre_ppout[bb]->elms;
4560 sbitmap_ptr avout = pre_avout[bb]->elms;
4561 sbitmap_ptr ppin = pre_ppin[bb]->elms;
4562 sbitmap_ptr transp = pre_transp[bb]->elms;
4563
4564 for (i = indx = 0;
4565 i < size;
4566 i++, indx += SBITMAP_ELT_BITS, ppout++, avout++, ppin++, transp++)
4567 {
4568 int j;
4569 SBITMAP_ELT_TYPE insert = *ppout & (~*avout) & (~*ppin | ~*transp);
4570
4571 for (j = indx; insert != 0 && j < n_exprs; j++, insert >>= 1)
4572 {
4573 if ((insert & 1) != 0
4574 /* If the basic block isn't reachable, PPOUT will be TRUE.
4575 However, we don't want to insert a copy here because the
4576 expression may not really be redundant. So only insert
4577 an insn if the expression was deleted. */
4578 && index_map[j]->reaching_reg != NULL)
4579 pre_insert_insn (index_map[j], bb);
4580 }
4581 }
4582 }
4583 }
4584
4585 /* Copy the result of INSN to REG.
4586 INDX is the expression number. */
4587
4588 static void
4589 pre_insert_copy_insn (expr, insn)
4590 struct expr *expr;
4591 rtx insn;
4592 {
4593 rtx reg = expr->reaching_reg;
4594 int regno = REGNO (reg);
4595 int indx = expr->bitmap_index;
4596 rtx set = single_set (insn);
4597 rtx new_insn;
4598
4599 if (!set)
4600 abort ();
4601 new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)),
4602 insn);
4603 /* Keep block number table up to date. */
4604 set_block_num (new_insn, BLOCK_NUM (insn));
4605 /* Keep register set table up to date. */
4606 record_one_set (regno, new_insn);
4607
4608 gcse_create_count++;
4609
4610 if (gcse_file)
4611 {
4612 fprintf (gcse_file, "PRE: bb %d, insn %d, copying expression %d in insn %d to reg %d\n",
4613 BLOCK_NUM (insn), INSN_UID (new_insn), indx, INSN_UID (insn), regno);
4614 }
4615 }
4616
4617 /* Copy available expressions that reach the redundant expression
4618 to `reaching_reg'. */
4619
4620 static void
4621 pre_insert_copies ()
4622 {
4623 int i;
4624
4625 /* For each available expression in the table, copy the result to
4626 `reaching_reg' if the expression reaches a deleted one.
4627
4628 ??? The current algorithm is rather brute force.
4629 Need to do some profiling. */
4630
4631 for (i = 0; i < expr_hash_table_size; i++)
4632 {
4633 struct expr *expr;
4634
4635 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4636 {
4637 struct occr *occr;
4638
4639 /* If the basic block isn't reachable, PPOUT will be TRUE.
4640 However, we don't want to insert a copy here because the
4641 expression may not really be redundant. So only insert
4642 an insn if the expression was deleted.
4643 This test also avoids further processing if the expression
4644 wasn't deleted anywhere. */
4645 if (expr->reaching_reg == NULL)
4646 continue;
4647
4648 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4649 {
4650 struct occr *avail;
4651
4652 if (! occr->deleted_p)
4653 continue;
4654
4655 for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
4656 {
4657 rtx insn = avail->insn;
4658
4659 /* No need to handle this one if handled already. */
4660 if (avail->copied_p)
4661 continue;
4662 /* Don't handle this one if it's a redundant one. */
4663 if (TEST_BIT (pre_redundant, INSN_CUID (insn)))
4664 continue;
4665 /* Or if the expression doesn't reach the deleted one. */
4666 if (! pre_expr_reaches_here_p (avail, expr,
4667 BLOCK_NUM (occr->insn),
4668 NULL))
4669 continue;
4670
4671 /* Copy the result of avail to reaching_reg. */
4672 pre_insert_copy_insn (expr, insn);
4673 avail->copied_p = 1;
4674 }
4675 }
4676 }
4677 }
4678 }
4679
4680 /* Delete redundant computations.
4681 These are ones that satisy ANTLOC & PPIN.
4682 Deletion is done by changing the insn to copy the `reaching_reg' of
4683 the expression into the result of the SET. It is left to later passes
4684 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
4685
4686 Returns non-zero if a change is made. */
4687
4688 static int
4689 pre_delete ()
4690 {
4691 int i, changed;
4692
4693 changed = 0;
4694 for (i = 0; i < expr_hash_table_size; i++)
4695 {
4696 struct expr *expr;
4697
4698 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4699 {
4700 struct occr *occr;
4701 int indx = expr->bitmap_index;
4702
4703 /* We only need to search antic_occr since we require
4704 ANTLOC != 0. */
4705
4706 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
4707 {
4708 rtx insn = occr->insn;
4709 rtx set;
4710 int bb = BLOCK_NUM (insn);
4711 sbitmap ppin = pre_ppin[bb];
4712
4713 if (TEST_BIT (ppin, indx))
4714 {
4715 set = single_set (insn);
4716 if (! set)
4717 abort ();
4718
4719 /* Create a pseudo-reg to store the result of reaching
4720 expressions into. Get the mode for the new pseudo
4721 from the mode of the original destination pseudo. */
4722 if (expr->reaching_reg == NULL)
4723 expr->reaching_reg
4724 = gen_reg_rtx (GET_MODE (SET_DEST (set)));
4725
4726 /* In theory this should never fail since we're creating
4727 a reg->reg copy.
4728
4729 However, on the x86 some of the movXX patterns actually
4730 contain clobbers of scratch regs. This may cause the
4731 insn created by validate_change to not patch any pattern
4732 and thus cause validate_change to fail. */
4733 if (validate_change (insn, &SET_SRC (set),
4734 expr->reaching_reg, 0))
4735 {
4736 occr->deleted_p = 1;
4737 SET_BIT (pre_redundant, INSN_CUID (insn));
4738 changed = 1;
4739 gcse_subst_count++;
4740 }
4741
4742 if (gcse_file)
4743 {
4744 fprintf (gcse_file, "PRE: redundant insn %d (expression %d) in bb %d, reaching reg is %d\n",
4745 INSN_UID (insn), indx, bb, REGNO (expr->reaching_reg));
4746 }
4747 }
4748 }
4749 }
4750 }
4751
4752 return changed;
4753 }
4754
4755 /* Perform GCSE optimizations using PRE.
4756 This is called by one_pre_gcse_pass after all the dataflow analysis
4757 has been done.
4758
4759 This is based on the original Morel-Renvoise paper and Fred Chow's thesis.
4760
4761 The M-R paper uses "TRANSP" to describe an expression as being transparent
4762 in a block where as Chow's thesis uses "ALTERED". We use TRANSP.
4763
4764 ??? A new pseudo reg is created to hold the reaching expression.
4765 The nice thing about the classical approach is that it would try to
4766 use an existing reg. If the register can't be adequately optimized
4767 [i.e. we introduce reload problems], one could add a pass here to
4768 propagate the new register through the block.
4769
4770 ??? We don't handle single sets in PARALLELs because we're [currently]
4771 not able to copy the rest of the parallel when we insert copies to create
4772 full redundancies from partial redundancies. However, there's no reason
4773 why we can't handle PARALLELs in the cases where there are no partial
4774 redundancies. */
4775
4776 static int
4777 pre_gcse ()
4778 {
4779 int i;
4780 int changed;
4781 struct expr **index_map;
4782
4783 /* Compute a mapping from expression number (`bitmap_index') to
4784 hash table entry. */
4785
4786 index_map = (struct expr **) alloca (n_exprs * sizeof (struct expr *));
4787 bzero ((char *) index_map, n_exprs * sizeof (struct expr *));
4788 for (i = 0; i < expr_hash_table_size; i++)
4789 {
4790 struct expr *expr;
4791
4792 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
4793 index_map[expr->bitmap_index] = expr;
4794 }
4795
4796 /* Reset bitmap used to track which insns are redundant. */
4797 pre_redundant = sbitmap_alloc (max_cuid);
4798 sbitmap_zero (pre_redundant);
4799
4800 /* Delete the redundant insns first so that
4801 - we know what register to use for the new insns and for the other
4802 ones with reaching expressions
4803 - we know which insns are redundant when we go to create copies */
4804 changed = pre_delete ();
4805
4806 /* Insert insns in places that make partially redundant expressions
4807 fully redundant. */
4808 pre_insert (index_map);
4809
4810 /* In other places with reaching expressions, copy the expression to the
4811 specially allocated pseudo-reg that reaches the redundant expression. */
4812 pre_insert_copies ();
4813
4814 free (pre_redundant);
4815
4816 return changed;
4817 }
4818
4819 /* Top level routine to perform one PRE GCSE pass.
4820
4821 Return non-zero if a change was made. */
4822
4823 static int
4824 one_pre_gcse_pass (f, pass)
4825 rtx f;
4826 int pass;
4827 {
4828 int changed = 0;
4829
4830 gcse_subst_count = 0;
4831 gcse_create_count = 0;
4832
4833 alloc_expr_hash_table (max_cuid);
4834 compute_expr_hash_table (f);
4835 if (gcse_file)
4836 dump_hash_table (gcse_file, "Expression", expr_hash_table,
4837 expr_hash_table_size, n_exprs);
4838 if (n_exprs > 0)
4839 {
4840 alloc_pre_mem (n_basic_blocks, n_exprs);
4841 compute_pre_data ();
4842 changed |= pre_gcse ();
4843 free_pre_mem ();
4844 }
4845 free_expr_hash_table ();
4846
4847 if (gcse_file)
4848 {
4849 fprintf (gcse_file, "\n");
4850 fprintf (gcse_file, "PRE GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n",
4851 current_function_name, pass,
4852 bytes_used, gcse_subst_count, gcse_create_count);
4853 }
4854
4855 return changed;
4856 }
4857 \f
4858 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN.
4859 We have to add REG_LABEL notes, because the following loop optimization
4860 pass requires them. */
4861
4862 /* ??? This is very similar to the loop.c add_label_notes function. We
4863 could probably share code here. */
4864
4865 /* ??? If there was a jump optimization pass after gcse and before loop,
4866 then we would not need to do this here, because jump would add the
4867 necessary REG_LABEL notes. */
4868
4869 static void
4870 add_label_notes (x, insn)
4871 rtx x;
4872 rtx insn;
4873 {
4874 enum rtx_code code = GET_CODE (x);
4875 int i, j;
4876 char *fmt;
4877
4878 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4879 {
4880 /* This code used to ignore labels that referred to dispatch tables to
4881 avoid flow generating (slighly) worse code.
4882
4883 We no longer ignore such label references (see LABEL_REF handling in
4884 mark_jump_label for additional information). */
4885 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0),
4886 REG_NOTES (insn));
4887 return;
4888 }
4889
4890 fmt = GET_RTX_FORMAT (code);
4891 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4892 {
4893 if (fmt[i] == 'e')
4894 add_label_notes (XEXP (x, i), insn);
4895 else if (fmt[i] == 'E')
4896 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4897 add_label_notes (XVECEXP (x, i, j), insn);
4898 }
4899 }