1 /* GIMPLE store merging pass.
2 Copyright (C) 2016-2017 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* The purpose of this pass is to combine multiple memory stores of
22 constant values to consecutive memory locations into fewer wider stores.
23 For example, if we have a sequence peforming four byte stores to
24 consecutive memory locations:
29 we can transform this into a single 4-byte store if the target supports it:
30 [p] := imm1:imm2:imm3:imm4 //concatenated immediates according to endianness.
32 The algorithm is applied to each basic block in three phases:
34 1) Scan through the basic block recording constant assignments to
35 destinations that can be expressed as a store to memory of a certain size
36 at a certain bit offset. Record store chains to different bases in a
37 hash_map (m_stores) and make sure to terminate such chains when appropriate
38 (for example when when the stored values get used subsequently).
39 These stores can be a result of structure element initializers, array stores
40 etc. A store_immediate_info object is recorded for every such store.
41 Record as many such assignments to a single base as possible until a
42 statement that interferes with the store sequence is encountered.
44 2) Analyze the chain of stores recorded in phase 1) (i.e. the vector of
45 store_immediate_info objects) and coalesce contiguous stores into
46 merged_store_group objects.
48 For example, given the stores:
55 This phase would produce two merged_store_group objects, one recording the
56 two bytes stored in the memory region [p : p + 1] and another
57 recording the four bytes stored in the memory region [p + 3 : p + 6].
59 3) The merged_store_group objects produced in phase 2) are processed
60 to generate the sequence of wider stores that set the contiguous memory
61 regions to the sequence of bytes that correspond to it. This may emit
62 multiple stores per store group to handle contiguous stores that are not
63 of a size that is a power of 2. For example it can try to emit a 40-bit
64 store as a 32-bit store followed by an 8-bit store.
65 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT or
66 TARGET_SLOW_UNALIGNED_ACCESS rules.
68 Note on endianness and example:
69 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
75 The memory layout for little-endian (LE) and big-endian (BE) must be:
85 To merge these into a single 48-bit merged value 'val' in phase 2)
86 on little-endian we insert stores to higher (consecutive) bitpositions
87 into the most significant bits of the merged value.
88 The final merged value would be: 0xcdab56781234
90 For big-endian we insert stores to higher bitpositions into the least
91 significant bits of the merged value.
92 The final merged value would be: 0x12345678abcd
94 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
95 followed by a 16-bit store. Again, we must consider endianness when
96 breaking down the 48-bit value 'val' computed above.
97 For little endian we emit:
98 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
99 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
101 Whereas for big-endian we emit:
102 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
103 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
107 #include "coretypes.h"
111 #include "builtins.h"
112 #include "fold-const.h"
113 #include "tree-pass.h"
115 #include "gimple-pretty-print.h"
117 #include "fold-const.h"
119 #include "print-tree.h"
120 #include "tree-hash-traits.h"
121 #include "gimple-iterator.h"
122 #include "gimplify.h"
123 #include "stor-layout.h"
125 #include "tree-cfg.h"
128 #include "gimplify-me.h"
130 #include "expr.h" /* For get_bit_range. */
131 #include "selftest.h"
133 /* The maximum size (in bits) of the stores this pass should generate. */
134 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
135 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
139 /* Struct recording the information about a single store of an immediate
140 to memory. These are created in the first phase and coalesced into
141 merged_store_group objects in the second phase. */
143 struct store_immediate_info
145 unsigned HOST_WIDE_INT bitsize
;
146 unsigned HOST_WIDE_INT bitpos
;
147 unsigned HOST_WIDE_INT bitregion_start
;
148 /* This is one past the last bit of the bit region. */
149 unsigned HOST_WIDE_INT bitregion_end
;
152 store_immediate_info (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
153 unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
154 gimple
*, unsigned int);
157 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs
,
158 unsigned HOST_WIDE_INT bp
,
159 unsigned HOST_WIDE_INT brs
,
160 unsigned HOST_WIDE_INT bre
,
163 : bitsize (bs
), bitpos (bp
), bitregion_start (brs
), bitregion_end (bre
),
164 stmt (st
), order (ord
)
168 /* Struct representing a group of stores to contiguous memory locations.
169 These are produced by the second phase (coalescing) and consumed in the
170 third phase that outputs the widened stores. */
172 struct merged_store_group
174 unsigned HOST_WIDE_INT start
;
175 unsigned HOST_WIDE_INT width
;
176 unsigned HOST_WIDE_INT bitregion_start
;
177 unsigned HOST_WIDE_INT bitregion_end
;
178 /* The size of the allocated memory for val and mask. */
179 unsigned HOST_WIDE_INT buf_size
;
180 unsigned HOST_WIDE_INT align_base
;
183 unsigned int first_order
;
184 unsigned int last_order
;
186 auto_vec
<store_immediate_info
*> stores
;
187 /* We record the first and last original statements in the sequence because
188 we'll need their vuse/vdef and replacement position. It's easier to keep
189 track of them separately as 'stores' is reordered by apply_stores. */
195 merged_store_group (store_immediate_info
*);
196 ~merged_store_group ();
197 void merge_into (store_immediate_info
*);
198 void merge_overlapping (store_immediate_info
*);
199 bool apply_stores ();
201 void do_merge (store_immediate_info
*);
204 /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
207 dump_char_array (FILE *fd
, unsigned char *ptr
, unsigned int len
)
212 for (unsigned int i
= 0; i
< len
; i
++)
213 fprintf (fd
, "%x ", ptr
[i
]);
217 /* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
218 bits between adjacent elements. AMNT should be within
221 00011111|11100000 << 2 = 01111111|10000000
222 PTR[1] | PTR[0] PTR[1] | PTR[0]. */
225 shift_bytes_in_array (unsigned char *ptr
, unsigned int sz
, unsigned int amnt
)
230 unsigned char carry_over
= 0U;
231 unsigned char carry_mask
= (~0U) << (unsigned char) (BITS_PER_UNIT
- amnt
);
232 unsigned char clear_mask
= (~0U) << amnt
;
234 for (unsigned int i
= 0; i
< sz
; i
++)
236 unsigned prev_carry_over
= carry_over
;
237 carry_over
= (ptr
[i
] & carry_mask
) >> (BITS_PER_UNIT
- amnt
);
242 ptr
[i
] &= clear_mask
;
243 ptr
[i
] |= prev_carry_over
;
248 /* Like shift_bytes_in_array but for big-endian.
249 Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
250 bits between adjacent elements. AMNT should be within
253 00011111|11100000 >> 2 = 00000111|11111000
254 PTR[0] | PTR[1] PTR[0] | PTR[1]. */
257 shift_bytes_in_array_right (unsigned char *ptr
, unsigned int sz
,
263 unsigned char carry_over
= 0U;
264 unsigned char carry_mask
= ~(~0U << amnt
);
266 for (unsigned int i
= 0; i
< sz
; i
++)
268 unsigned prev_carry_over
= carry_over
;
269 carry_over
= ptr
[i
] & carry_mask
;
271 carry_over
<<= (unsigned char) BITS_PER_UNIT
- amnt
;
273 ptr
[i
] |= prev_carry_over
;
277 /* Clear out LEN bits starting from bit START in the byte array
278 PTR. This clears the bits to the *right* from START.
279 START must be within [0, BITS_PER_UNIT) and counts starting from
280 the least significant bit. */
283 clear_bit_region_be (unsigned char *ptr
, unsigned int start
,
288 /* Clear len bits to the right of start. */
289 else if (len
<= start
+ 1)
291 unsigned char mask
= (~(~0U << len
));
292 mask
= mask
<< (start
+ 1U - len
);
295 else if (start
!= BITS_PER_UNIT
- 1)
297 clear_bit_region_be (ptr
, start
, (start
% BITS_PER_UNIT
) + 1);
298 clear_bit_region_be (ptr
+ 1, BITS_PER_UNIT
- 1,
299 len
- (start
% BITS_PER_UNIT
) - 1);
301 else if (start
== BITS_PER_UNIT
- 1
302 && len
> BITS_PER_UNIT
)
304 unsigned int nbytes
= len
/ BITS_PER_UNIT
;
305 memset (ptr
, 0, nbytes
);
306 if (len
% BITS_PER_UNIT
!= 0)
307 clear_bit_region_be (ptr
+ nbytes
, BITS_PER_UNIT
- 1,
308 len
% BITS_PER_UNIT
);
314 /* In the byte array PTR clear the bit region starting at bit
315 START and is LEN bits wide.
316 For regions spanning multiple bytes do this recursively until we reach
317 zero LEN or a region contained within a single byte. */
320 clear_bit_region (unsigned char *ptr
, unsigned int start
,
323 /* Degenerate base case. */
326 else if (start
>= BITS_PER_UNIT
)
327 clear_bit_region (ptr
+ 1, start
- BITS_PER_UNIT
, len
);
328 /* Second base case. */
329 else if ((start
+ len
) <= BITS_PER_UNIT
)
331 unsigned char mask
= (~0U) << (unsigned char) (BITS_PER_UNIT
- len
);
332 mask
>>= BITS_PER_UNIT
- (start
+ len
);
338 /* Clear most significant bits in a byte and proceed with the next byte. */
341 clear_bit_region (ptr
, start
, BITS_PER_UNIT
- start
);
342 clear_bit_region (ptr
+ 1, 0, len
- (BITS_PER_UNIT
- start
));
344 /* Whole bytes need to be cleared. */
345 else if (start
== 0 && len
> BITS_PER_UNIT
)
347 unsigned int nbytes
= len
/ BITS_PER_UNIT
;
348 /* We could recurse on each byte but we clear whole bytes, so a simple
350 memset (ptr
, '\0', nbytes
);
351 /* Clear the remaining sub-byte region if there is one. */
352 if (len
% BITS_PER_UNIT
!= 0)
353 clear_bit_region (ptr
+ nbytes
, 0, len
% BITS_PER_UNIT
);
359 /* Write BITLEN bits of EXPR to the byte array PTR at
360 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
361 Return true if the operation succeeded. */
364 encode_tree_to_bitpos (tree expr
, unsigned char *ptr
, int bitlen
, int bitpos
,
365 unsigned int total_bytes
)
367 unsigned int first_byte
= bitpos
/ BITS_PER_UNIT
;
369 bool sub_byte_op_p
= ((bitlen
% BITS_PER_UNIT
)
370 || (bitpos
% BITS_PER_UNIT
)
371 || !int_mode_for_size (bitlen
, 0).exists ());
374 return native_encode_expr (tmp_int
, ptr
+ first_byte
, total_bytes
) != 0;
377 We are writing a non byte-sized quantity or at a position that is not
379 |--------|--------|--------| ptr + first_byte
381 xxx xxxxxxxx xxx< bp>
384 First native_encode_expr EXPR into a temporary buffer and shift each
385 byte in the buffer by 'bp' (carrying the bits over as necessary).
386 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
387 <------bitlen---->< bp>
388 Then we clear the destination bits:
389 |---00000|00000000|000-----| ptr + first_byte
390 <-------bitlen--->< bp>
392 Finally we ORR the bytes of the shifted EXPR into the cleared region:
393 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
396 We are writing a non byte-sized quantity or at a position that is not
398 ptr + first_byte |--------|--------|--------|
400 <bp >xxx xxxxxxxx xxx
403 First native_encode_expr EXPR into a temporary buffer and shift each
404 byte in the buffer to the right by (carrying the bits over as necessary).
405 We shift by as much as needed to align the most significant bit of EXPR
407 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
408 <---bitlen----> <bp ><-----bitlen----->
409 Then we clear the destination bits:
410 ptr + first_byte |-----000||00000000||00000---|
411 <bp ><-------bitlen----->
413 Finally we ORR the bytes of the shifted EXPR into the cleared region:
414 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
415 The awkwardness comes from the fact that bitpos is counted from the
416 most significant bit of a byte. */
418 /* Allocate an extra byte so that we have space to shift into. */
419 unsigned int byte_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (expr
))) + 1;
420 unsigned char *tmpbuf
= XALLOCAVEC (unsigned char, byte_size
);
421 memset (tmpbuf
, '\0', byte_size
);
422 /* The store detection code should only have allowed constants that are
423 accepted by native_encode_expr. */
424 if (native_encode_expr (expr
, tmpbuf
, byte_size
- 1) == 0)
427 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
428 bytes to write. This means it can write more than
429 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
430 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
431 bitlen and zero out the bits that are not relevant as well (that may
432 contain a sign bit due to sign-extension). */
434 = byte_size
- ROUND_UP (bitlen
, BITS_PER_UNIT
) / BITS_PER_UNIT
- 1;
435 /* On big-endian the padding is at the 'front' so just skip the initial
437 if (BYTES_BIG_ENDIAN
)
440 byte_size
-= padding
;
442 if (bitlen
% BITS_PER_UNIT
!= 0)
444 if (BYTES_BIG_ENDIAN
)
445 clear_bit_region_be (tmpbuf
, BITS_PER_UNIT
- 1,
446 BITS_PER_UNIT
- (bitlen
% BITS_PER_UNIT
));
448 clear_bit_region (tmpbuf
, bitlen
,
449 byte_size
* BITS_PER_UNIT
- bitlen
);
451 /* Left shifting relies on the last byte being clear if bitlen is
452 a multiple of BITS_PER_UNIT, which might not be clear if
453 there are padding bytes. */
454 else if (!BYTES_BIG_ENDIAN
)
455 tmpbuf
[byte_size
- 1] = '\0';
457 /* Clear the bit region in PTR where the bits from TMPBUF will be
459 if (BYTES_BIG_ENDIAN
)
460 clear_bit_region_be (ptr
+ first_byte
,
461 BITS_PER_UNIT
- 1 - (bitpos
% BITS_PER_UNIT
), bitlen
);
463 clear_bit_region (ptr
+ first_byte
, bitpos
% BITS_PER_UNIT
, bitlen
);
466 int bitlen_mod
= bitlen
% BITS_PER_UNIT
;
467 int bitpos_mod
= bitpos
% BITS_PER_UNIT
;
469 bool skip_byte
= false;
470 if (BYTES_BIG_ENDIAN
)
472 /* BITPOS and BITLEN are exactly aligned and no shifting
474 if (bitpos_mod
+ bitlen_mod
== BITS_PER_UNIT
475 || (bitpos_mod
== 0 && bitlen_mod
== 0))
479 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
480 of the value until it aligns with 'bp' in the next byte over. */
481 else if (bitpos_mod
+ bitlen_mod
< BITS_PER_UNIT
)
483 shift_amnt
= bitlen_mod
+ bitpos_mod
;
484 skip_byte
= bitlen_mod
!= 0;
489 Shift the value right within the same byte so it aligns with 'bp'. */
491 shift_amnt
= bitlen_mod
+ bitpos_mod
- BITS_PER_UNIT
;
494 shift_amnt
= bitpos
% BITS_PER_UNIT
;
496 /* Create the shifted version of EXPR. */
497 if (!BYTES_BIG_ENDIAN
)
499 shift_bytes_in_array (tmpbuf
, byte_size
, shift_amnt
);
505 gcc_assert (BYTES_BIG_ENDIAN
);
506 shift_bytes_in_array_right (tmpbuf
, byte_size
, shift_amnt
);
507 /* If shifting right forced us to move into the next byte skip the now
516 /* Insert the bits from TMPBUF. */
517 for (unsigned int i
= 0; i
< byte_size
; i
++)
518 ptr
[first_byte
+ i
] |= tmpbuf
[i
];
523 /* Sorting function for store_immediate_info objects.
524 Sorts them by bitposition. */
527 sort_by_bitpos (const void *x
, const void *y
)
529 store_immediate_info
*const *tmp
= (store_immediate_info
* const *) x
;
530 store_immediate_info
*const *tmp2
= (store_immediate_info
* const *) y
;
532 if ((*tmp
)->bitpos
< (*tmp2
)->bitpos
)
534 else if ((*tmp
)->bitpos
> (*tmp2
)->bitpos
)
537 /* If they are the same let's use the order which is guaranteed to
539 return (*tmp
)->order
- (*tmp2
)->order
;
542 /* Sorting function for store_immediate_info objects.
543 Sorts them by the order field. */
546 sort_by_order (const void *x
, const void *y
)
548 store_immediate_info
*const *tmp
= (store_immediate_info
* const *) x
;
549 store_immediate_info
*const *tmp2
= (store_immediate_info
* const *) y
;
551 if ((*tmp
)->order
< (*tmp2
)->order
)
553 else if ((*tmp
)->order
> (*tmp2
)->order
)
559 /* Initialize a merged_store_group object from a store_immediate_info
562 merged_store_group::merged_store_group (store_immediate_info
*info
)
564 start
= info
->bitpos
;
565 width
= info
->bitsize
;
566 bitregion_start
= info
->bitregion_start
;
567 bitregion_end
= info
->bitregion_end
;
568 /* VAL has memory allocated for it in apply_stores once the group
569 width has been finalized. */
572 unsigned HOST_WIDE_INT align_bitpos
= 0;
573 get_object_alignment_1 (gimple_assign_lhs (info
->stmt
),
574 &align
, &align_bitpos
);
575 align_base
= start
- align_bitpos
;
577 stores
.safe_push (info
);
578 last_stmt
= info
->stmt
;
579 last_order
= info
->order
;
580 first_stmt
= last_stmt
;
581 first_order
= last_order
;
585 merged_store_group::~merged_store_group ()
591 /* Helper method for merge_into and merge_overlapping to do
594 merged_store_group::do_merge (store_immediate_info
*info
)
596 bitregion_start
= MIN (bitregion_start
, info
->bitregion_start
);
597 bitregion_end
= MAX (bitregion_end
, info
->bitregion_end
);
599 unsigned int this_align
;
600 unsigned HOST_WIDE_INT align_bitpos
= 0;
601 get_object_alignment_1 (gimple_assign_lhs (info
->stmt
),
602 &this_align
, &align_bitpos
);
603 if (this_align
> align
)
606 align_base
= info
->bitpos
- align_bitpos
;
609 gimple
*stmt
= info
->stmt
;
610 stores
.safe_push (info
);
611 if (info
->order
> last_order
)
613 last_order
= info
->order
;
616 else if (info
->order
< first_order
)
618 first_order
= info
->order
;
623 /* Merge a store recorded by INFO into this merged store.
624 The store is not overlapping with the existing recorded
628 merged_store_group::merge_into (store_immediate_info
*info
)
630 unsigned HOST_WIDE_INT wid
= info
->bitsize
;
631 /* Make sure we're inserting in the position we think we're inserting. */
632 gcc_assert (info
->bitpos
>= start
+ width
633 && info
->bitregion_start
<= bitregion_end
);
639 /* Merge a store described by INFO into this merged store.
640 INFO overlaps in some way with the current store (i.e. it's not contiguous
641 which is handled by merged_store_group::merge_into). */
644 merged_store_group::merge_overlapping (store_immediate_info
*info
)
646 /* If the store extends the size of the group, extend the width. */
647 if (info
->bitpos
+ info
->bitsize
> start
+ width
)
648 width
+= info
->bitpos
+ info
->bitsize
- (start
+ width
);
653 /* Go through all the recorded stores in this group in program order and
654 apply their values to the VAL byte array to create the final merged
655 value. Return true if the operation succeeded. */
658 merged_store_group::apply_stores ()
660 /* Make sure we have more than one store in the group, otherwise we cannot
662 if (bitregion_start
% BITS_PER_UNIT
!= 0
663 || bitregion_end
% BITS_PER_UNIT
!= 0
664 || stores
.length () == 1)
667 stores
.qsort (sort_by_order
);
668 store_immediate_info
*info
;
670 /* Create a buffer of a size that is 2 times the number of bytes we're
671 storing. That way native_encode_expr can write power-of-2-sized
672 chunks without overrunning. */
673 buf_size
= 2 * ((bitregion_end
- bitregion_start
) / BITS_PER_UNIT
);
674 val
= XNEWVEC (unsigned char, 2 * buf_size
);
675 mask
= val
+ buf_size
;
676 memset (val
, 0, buf_size
);
677 memset (mask
, ~0U, buf_size
);
679 FOR_EACH_VEC_ELT (stores
, i
, info
)
681 unsigned int pos_in_buffer
= info
->bitpos
- bitregion_start
;
682 bool ret
= encode_tree_to_bitpos (gimple_assign_rhs1 (info
->stmt
),
684 pos_in_buffer
, buf_size
);
685 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
689 fprintf (dump_file
, "After writing ");
690 print_generic_expr (dump_file
,
691 gimple_assign_rhs1 (info
->stmt
), 0);
692 fprintf (dump_file
, " of size " HOST_WIDE_INT_PRINT_DEC
693 " at position %d the merged region contains:\n",
694 info
->bitsize
, pos_in_buffer
);
695 dump_char_array (dump_file
, val
, buf_size
);
698 fprintf (dump_file
, "Failed to merge stores\n");
702 unsigned char *m
= mask
+ (pos_in_buffer
/ BITS_PER_UNIT
);
703 if (BYTES_BIG_ENDIAN
)
704 clear_bit_region_be (m
, (BITS_PER_UNIT
- 1
705 - (pos_in_buffer
% BITS_PER_UNIT
)),
708 clear_bit_region (m
, pos_in_buffer
% BITS_PER_UNIT
, info
->bitsize
);
713 /* Structure describing the store chain. */
715 struct imm_store_chain_info
717 /* Doubly-linked list that imposes an order on chain processing.
718 PNXP (prev's next pointer) points to the head of a list, or to
719 the next field in the previous chain in the list.
720 See pass_store_merging::m_stores_head for more rationale. */
721 imm_store_chain_info
*next
, **pnxp
;
723 auto_vec
<store_immediate_info
*> m_store_info
;
724 auto_vec
<merged_store_group
*> m_merged_store_groups
;
726 imm_store_chain_info (imm_store_chain_info
*&inspt
, tree b_a
)
727 : next (inspt
), pnxp (&inspt
), base_addr (b_a
)
732 gcc_checking_assert (pnxp
== next
->pnxp
);
736 ~imm_store_chain_info ()
741 gcc_checking_assert (&next
== next
->pnxp
);
745 bool terminate_and_process_chain ();
746 bool coalesce_immediate_stores ();
747 bool output_merged_store (merged_store_group
*);
748 bool output_merged_stores ();
751 const pass_data pass_data_tree_store_merging
= {
752 GIMPLE_PASS
, /* type */
753 "store-merging", /* name */
754 OPTGROUP_NONE
, /* optinfo_flags */
755 TV_GIMPLE_STORE_MERGING
, /* tv_id */
756 PROP_ssa
, /* properties_required */
757 0, /* properties_provided */
758 0, /* properties_destroyed */
759 0, /* todo_flags_start */
760 TODO_update_ssa
, /* todo_flags_finish */
763 class pass_store_merging
: public gimple_opt_pass
766 pass_store_merging (gcc::context
*ctxt
)
767 : gimple_opt_pass (pass_data_tree_store_merging
, ctxt
), m_stores_head ()
771 /* Pass not supported for PDP-endianness, nor for insane hosts
772 or target character sizes where native_{encode,interpret}_expr
773 doesn't work properly. */
777 return flag_store_merging
778 && WORDS_BIG_ENDIAN
== BYTES_BIG_ENDIAN
780 && BITS_PER_UNIT
== 8;
783 virtual unsigned int execute (function
*);
786 hash_map
<tree_operand_hash
, struct imm_store_chain_info
*> m_stores
;
788 /* Form a doubly-linked stack of the elements of m_stores, so that
789 we can iterate over them in a predictable way. Using this order
790 avoids extraneous differences in the compiler output just because
791 of tree pointer variations (e.g. different chains end up in
792 different positions of m_stores, so they are handled in different
793 orders, so they allocate or release SSA names in different
794 orders, and when they get reused, subsequent passes end up
795 getting different SSA names, which may ultimately change
796 decisions when going out of SSA). */
797 imm_store_chain_info
*m_stores_head
;
799 bool terminate_and_process_all_chains ();
800 bool terminate_all_aliasing_chains (imm_store_chain_info
**,
802 bool terminate_and_release_chain (imm_store_chain_info
*);
803 }; // class pass_store_merging
805 /* Terminate and process all recorded chains. Return true if any changes
809 pass_store_merging::terminate_and_process_all_chains ()
812 while (m_stores_head
)
813 ret
|= terminate_and_release_chain (m_stores_head
);
814 gcc_assert (m_stores
.elements () == 0);
815 gcc_assert (m_stores_head
== NULL
);
820 /* Terminate all chains that are affected by the assignment to DEST, appearing
821 in statement STMT and ultimately points to the object BASE. Return true if
822 at least one aliasing chain was terminated. BASE and DEST are allowed to
823 be NULL_TREE. In that case the aliasing checks are performed on the whole
824 statement rather than a particular operand in it. VAR_OFFSET_P signifies
825 whether STMT represents a store to BASE offset by a variable amount.
826 If that is the case we have to terminate any chain anchored at BASE. */
829 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
836 /* If the statement doesn't touch memory it can't alias. */
837 if (!gimple_vuse (stmt
))
840 /* Check if the assignment destination (BASE) is part of a store chain.
841 This is to catch non-constant stores to destinations that may be part
845 /* We have a chain at BASE and we're writing to [BASE + <variable>].
846 This can interfere with any of the stores so terminate
850 terminate_and_release_chain (*chain_info
);
853 /* Otherwise go through every store in the chain to see if it
854 aliases with any of them. */
857 store_immediate_info
*info
;
859 FOR_EACH_VEC_ELT ((*chain_info
)->m_store_info
, i
, info
)
861 if (ref_maybe_used_by_stmt_p (stmt
,
862 gimple_assign_lhs (info
->stmt
))
863 || stmt_may_clobber_ref_p (stmt
,
864 gimple_assign_lhs (info
->stmt
)))
866 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
869 "stmt causes chain termination:\n");
870 print_gimple_stmt (dump_file
, stmt
, 0);
872 terminate_and_release_chain (*chain_info
);
880 /* Check for aliasing with all other store chains. */
881 for (imm_store_chain_info
*next
= m_stores_head
, *cur
= next
; cur
; cur
= next
)
885 /* We already checked all the stores in chain_info and terminated the
886 chain if necessary. Skip it here. */
887 if (chain_info
&& (*chain_info
) == cur
)
890 /* We can't use the base object here as that does not reliably exist.
891 Build a ao_ref from the base object address (if we know the
892 minimum and maximum offset and the maximum size we could improve
895 ao_ref_init_from_ptr_and_size (&chain_ref
, cur
->base_addr
, NULL_TREE
);
896 if (ref_maybe_used_by_stmt_p (stmt
, &chain_ref
)
897 || stmt_may_clobber_ref_p_1 (stmt
, &chain_ref
))
899 terminate_and_release_chain (cur
);
907 /* Helper function. Terminate the recorded chain storing to base object
908 BASE. Return true if the merging and output was successful. The m_stores
909 entry is removed after the processing in any case. */
912 pass_store_merging::terminate_and_release_chain (imm_store_chain_info
*chain_info
)
914 bool ret
= chain_info
->terminate_and_process_chain ();
915 m_stores
.remove (chain_info
->base_addr
);
920 /* Go through the candidate stores recorded in m_store_info and merge them
921 into merged_store_group objects recorded into m_merged_store_groups
922 representing the widened stores. Return true if coalescing was successful
923 and the number of widened stores is fewer than the original number
927 imm_store_chain_info::coalesce_immediate_stores ()
929 /* Anything less can't be processed. */
930 if (m_store_info
.length () < 2)
933 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
934 fprintf (dump_file
, "Attempting to coalesce %u stores in chain.\n",
935 m_store_info
.length ());
937 store_immediate_info
*info
;
940 /* Order the stores by the bitposition they write to. */
941 m_store_info
.qsort (sort_by_bitpos
);
943 info
= m_store_info
[0];
944 merged_store_group
*merged_store
= new merged_store_group (info
);
946 FOR_EACH_VEC_ELT (m_store_info
, i
, info
)
948 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
950 fprintf (dump_file
, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
951 " bitpos:" HOST_WIDE_INT_PRINT_DEC
" val:\n",
952 i
, info
->bitsize
, info
->bitpos
);
953 print_generic_expr (dump_file
, gimple_assign_rhs1 (info
->stmt
));
954 fprintf (dump_file
, "\n------------\n");
962 Overlapping stores. */
963 unsigned HOST_WIDE_INT start
= info
->bitpos
;
964 if (IN_RANGE (start
, merged_store
->start
,
965 merged_store
->start
+ merged_store
->width
- 1))
967 merged_store
->merge_overlapping (info
);
971 /* |---store 1---| <gap> |---store 2---|.
972 Gap between stores. Start a new group if there are any gaps
973 between bitregions. */
974 if (info
->bitregion_start
> merged_store
->bitregion_end
)
976 /* Try to apply all the stores recorded for the group to determine
977 the bitpattern they write and discard it if that fails.
978 This will also reject single-store groups. */
979 if (!merged_store
->apply_stores ())
982 m_merged_store_groups
.safe_push (merged_store
);
984 merged_store
= new merged_store_group (info
);
989 /* |---store 1---||---store 2---|
990 This store is consecutive to the previous one.
991 Merge it into the current store group. */
992 merged_store
->merge_into (info
);
995 /* Record or discard the last store group. */
996 if (!merged_store
->apply_stores ())
999 m_merged_store_groups
.safe_push (merged_store
);
1001 gcc_assert (m_merged_store_groups
.length () <= m_store_info
.length ());
1003 = !m_merged_store_groups
.is_empty ()
1004 && m_merged_store_groups
.length () < m_store_info
.length ();
1006 if (success
&& dump_file
)
1007 fprintf (dump_file
, "Coalescing successful!\n"
1008 "Merged into %u stores\n",
1009 m_merged_store_groups
.length ());
1014 /* Return the type to use for the merged stores described by STMTS.
1015 This is needed to get the alias sets right. */
1018 get_alias_type_for_stmts (auto_vec
<gimple
*> &stmts
)
1022 tree lhs
= gimple_assign_lhs (stmts
[0]);
1023 tree type
= reference_alias_ptr_type (lhs
);
1025 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1030 lhs
= gimple_assign_lhs (stmt
);
1031 tree type1
= reference_alias_ptr_type (lhs
);
1032 if (!alias_ptr_types_compatible_p (type
, type1
))
1033 return ptr_type_node
;
1038 /* Return the location_t information we can find among the statements
1042 get_location_for_stmts (auto_vec
<gimple
*> &stmts
)
1047 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1048 if (gimple_has_location (stmt
))
1049 return gimple_location (stmt
);
1051 return UNKNOWN_LOCATION
;
1054 /* Used to decribe a store resulting from splitting a wide store in smaller
1055 regularly-sized stores in split_group. */
1059 unsigned HOST_WIDE_INT bytepos
;
1060 unsigned HOST_WIDE_INT size
;
1061 unsigned HOST_WIDE_INT align
;
1062 auto_vec
<gimple
*> orig_stmts
;
1063 /* True if there is a single orig stmt covering the whole split store. */
1065 split_store (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
1066 unsigned HOST_WIDE_INT
);
1069 /* Simple constructor. */
1071 split_store::split_store (unsigned HOST_WIDE_INT bp
,
1072 unsigned HOST_WIDE_INT sz
,
1073 unsigned HOST_WIDE_INT al
)
1074 : bytepos (bp
), size (sz
), align (al
), orig (false)
1076 orig_stmts
.create (0);
1079 /* Record all statements corresponding to stores in GROUP that write to
1080 the region starting at BITPOS and is of size BITSIZE. Record such
1081 statements in STMTS if non-NULL. The stores in GROUP must be sorted by
1082 bitposition. Return INFO if there is exactly one original store
1085 static store_immediate_info
*
1086 find_constituent_stmts (struct merged_store_group
*group
,
1087 vec
<gimple
*> *stmts
,
1088 unsigned int *first
,
1089 unsigned HOST_WIDE_INT bitpos
,
1090 unsigned HOST_WIDE_INT bitsize
)
1092 store_immediate_info
*info
, *ret
= NULL
;
1094 bool second
= false;
1095 bool update_first
= true;
1096 unsigned HOST_WIDE_INT end
= bitpos
+ bitsize
;
1097 for (i
= *first
; group
->stores
.iterate (i
, &info
); ++i
)
1099 unsigned HOST_WIDE_INT stmt_start
= info
->bitpos
;
1100 unsigned HOST_WIDE_INT stmt_end
= stmt_start
+ info
->bitsize
;
1101 if (stmt_end
<= bitpos
)
1103 /* BITPOS passed to this function never decreases from within the
1104 same split_group call, so optimize and don't scan info records
1105 which are known to end before or at BITPOS next time.
1106 Only do it if all stores before this one also pass this. */
1112 update_first
= false;
1114 /* The stores in GROUP are ordered by bitposition so if we're past
1115 the region for this group return early. */
1116 if (stmt_start
>= end
)
1121 stmts
->safe_push (info
->stmt
);
1136 /* Split a merged store described by GROUP by populating the SPLIT_STORES
1137 vector (if non-NULL) with split_store structs describing the byte offset
1138 (from the base), the bit size and alignment of each store as well as the
1139 original statements involved in each such split group.
1140 This is to separate the splitting strategy from the statement
1141 building/emission/linking done in output_merged_store.
1142 Return number of new stores.
1143 If SPLIT_STORES is NULL, it is just a dry run to count number of
1147 split_group (merged_store_group
*group
, bool allow_unaligned
,
1148 vec
<struct split_store
*> *split_stores
)
1150 unsigned HOST_WIDE_INT pos
= group
->bitregion_start
;
1151 unsigned HOST_WIDE_INT size
= group
->bitregion_end
- pos
;
1152 unsigned HOST_WIDE_INT bytepos
= pos
/ BITS_PER_UNIT
;
1153 unsigned HOST_WIDE_INT group_align
= group
->align
;
1154 unsigned HOST_WIDE_INT align_base
= group
->align_base
;
1156 gcc_assert ((size
% BITS_PER_UNIT
== 0) && (pos
% BITS_PER_UNIT
== 0));
1158 unsigned int ret
= 0, first
= 0;
1159 unsigned HOST_WIDE_INT try_pos
= bytepos
;
1160 group
->stores
.qsort (sort_by_bitpos
);
1164 if ((allow_unaligned
|| group_align
<= BITS_PER_UNIT
)
1165 && group
->mask
[try_pos
- bytepos
] == (unsigned char) ~0U)
1167 /* Skip padding bytes. */
1169 size
-= BITS_PER_UNIT
;
1173 unsigned HOST_WIDE_INT try_bitpos
= try_pos
* BITS_PER_UNIT
;
1174 unsigned int try_size
= MAX_STORE_BITSIZE
, nonmasked
;
1175 unsigned HOST_WIDE_INT align_bitpos
1176 = (try_bitpos
- align_base
) & (group_align
- 1);
1177 unsigned HOST_WIDE_INT align
= group_align
;
1179 align
= least_bit_hwi (align_bitpos
);
1180 if (!allow_unaligned
)
1181 try_size
= MIN (try_size
, align
);
1182 store_immediate_info
*info
1183 = find_constituent_stmts (group
, NULL
, &first
, try_bitpos
, try_size
);
1186 /* If there is just one original statement for the range, see if
1187 we can just reuse the original store which could be even larger
1189 unsigned HOST_WIDE_INT stmt_end
1190 = ROUND_UP (info
->bitpos
+ info
->bitsize
, BITS_PER_UNIT
);
1191 info
= find_constituent_stmts (group
, NULL
, &first
, try_bitpos
,
1192 stmt_end
- try_bitpos
);
1193 if (info
&& info
->bitpos
>= try_bitpos
)
1195 try_size
= stmt_end
- try_bitpos
;
1200 /* Approximate store bitsize for the case when there are no padding
1202 while (try_size
> size
)
1204 /* Now look for whole padding bytes at the end of that bitsize. */
1205 for (nonmasked
= try_size
/ BITS_PER_UNIT
; nonmasked
> 0; --nonmasked
)
1206 if (group
->mask
[try_pos
- bytepos
+ nonmasked
- 1]
1207 != (unsigned char) ~0U)
1211 /* If entire try_size range is padding, skip it. */
1212 try_pos
+= try_size
/ BITS_PER_UNIT
;
1216 /* Otherwise try to decrease try_size if second half, last 3 quarters
1217 etc. are padding. */
1218 nonmasked
*= BITS_PER_UNIT
;
1219 while (nonmasked
<= try_size
/ 2)
1221 if (!allow_unaligned
&& group_align
> BITS_PER_UNIT
)
1223 /* Now look for whole padding bytes at the start of that bitsize. */
1224 unsigned int try_bytesize
= try_size
/ BITS_PER_UNIT
, masked
;
1225 for (masked
= 0; masked
< try_bytesize
; ++masked
)
1226 if (group
->mask
[try_pos
- bytepos
+ masked
] != (unsigned char) ~0U)
1228 masked
*= BITS_PER_UNIT
;
1229 gcc_assert (masked
< try_size
);
1230 if (masked
>= try_size
/ 2)
1232 while (masked
>= try_size
/ 2)
1235 try_pos
+= try_size
/ BITS_PER_UNIT
;
1239 /* Need to recompute the alignment, so just retry at the new
1250 struct split_store
*store
1251 = new split_store (try_pos
, try_size
, align
);
1252 info
= find_constituent_stmts (group
, &store
->orig_stmts
,
1253 &first
, try_bitpos
, try_size
);
1255 && info
->bitpos
>= try_bitpos
1256 && info
->bitpos
+ info
->bitsize
<= try_bitpos
+ try_size
)
1258 split_stores
->safe_push (store
);
1261 try_pos
+= try_size
/ BITS_PER_UNIT
;
1268 /* Given a merged store group GROUP output the widened version of it.
1269 The store chain is against the base object BASE.
1270 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
1271 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
1272 Make sure that the number of statements output is less than the number of
1273 original statements. If a better sequence is possible emit it and
1277 imm_store_chain_info::output_merged_store (merged_store_group
*group
)
1279 unsigned HOST_WIDE_INT start_byte_pos
1280 = group
->bitregion_start
/ BITS_PER_UNIT
;
1282 unsigned int orig_num_stmts
= group
->stores
.length ();
1283 if (orig_num_stmts
< 2)
1286 auto_vec
<struct split_store
*, 32> split_stores
;
1287 split_stores
.create (0);
1288 bool allow_unaligned
1289 = !STRICT_ALIGNMENT
&& PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED
);
1290 if (allow_unaligned
)
1292 /* If unaligned stores are allowed, see how many stores we'd emit
1293 for unaligned and how many stores we'd emit for aligned stores.
1294 Only use unaligned stores if it allows fewer stores than aligned. */
1295 unsigned aligned_cnt
= split_group (group
, false, NULL
);
1296 unsigned unaligned_cnt
= split_group (group
, true, NULL
);
1297 if (aligned_cnt
<= unaligned_cnt
)
1298 allow_unaligned
= false;
1300 split_group (group
, allow_unaligned
, &split_stores
);
1302 if (split_stores
.length () >= orig_num_stmts
)
1304 /* We didn't manage to reduce the number of statements. Bail out. */
1305 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1307 fprintf (dump_file
, "Exceeded original number of stmts (%u)."
1308 " Not profitable to emit new sequence.\n",
1314 gimple_stmt_iterator last_gsi
= gsi_for_stmt (group
->last_stmt
);
1315 gimple_seq seq
= NULL
;
1316 tree last_vdef
, new_vuse
;
1317 last_vdef
= gimple_vdef (group
->last_stmt
);
1318 new_vuse
= gimple_vuse (group
->last_stmt
);
1320 gimple
*stmt
= NULL
;
1321 split_store
*split_store
;
1324 tree addr
= force_gimple_operand_1 (unshare_expr (base_addr
), &seq
,
1325 is_gimple_mem_ref_addr
, NULL_TREE
);
1326 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
1328 unsigned HOST_WIDE_INT try_size
= split_store
->size
;
1329 unsigned HOST_WIDE_INT try_pos
= split_store
->bytepos
;
1330 unsigned HOST_WIDE_INT align
= split_store
->align
;
1333 if (split_store
->orig
)
1335 /* If there is just a single constituent store which covers
1336 the whole area, just reuse the lhs and rhs. */
1337 dest
= gimple_assign_lhs (split_store
->orig_stmts
[0]);
1338 src
= gimple_assign_rhs1 (split_store
->orig_stmts
[0]);
1339 loc
= gimple_location (split_store
->orig_stmts
[0]);
1344 = get_alias_type_for_stmts (split_store
->orig_stmts
);
1345 loc
= get_location_for_stmts (split_store
->orig_stmts
);
1347 tree int_type
= build_nonstandard_integer_type (try_size
, UNSIGNED
);
1348 int_type
= build_aligned_type (int_type
, align
);
1349 dest
= fold_build2 (MEM_REF
, int_type
, addr
,
1350 build_int_cst (offset_type
, try_pos
));
1351 src
= native_interpret_expr (int_type
,
1352 group
->val
+ try_pos
- start_byte_pos
,
1355 = native_interpret_expr (int_type
,
1356 group
->mask
+ try_pos
- start_byte_pos
,
1358 if (!integer_zerop (mask
))
1360 tree tem
= make_ssa_name (int_type
);
1361 tree load_src
= unshare_expr (dest
);
1362 /* The load might load some or all bits uninitialized,
1363 avoid -W*uninitialized warnings in that case.
1364 As optimization, it would be nice if all the bits are
1365 provably uninitialized (no stores at all yet or previous
1366 store a CLOBBER) we'd optimize away the load and replace
1368 TREE_NO_WARNING (load_src
) = 1;
1369 stmt
= gimple_build_assign (tem
, load_src
);
1370 gimple_set_location (stmt
, loc
);
1371 gimple_set_vuse (stmt
, new_vuse
);
1372 gimple_seq_add_stmt_without_update (&seq
, stmt
);
1374 /* FIXME: If there is a single chunk of zero bits in mask,
1375 perhaps use BIT_INSERT_EXPR instead? */
1376 stmt
= gimple_build_assign (make_ssa_name (int_type
),
1377 BIT_AND_EXPR
, tem
, mask
);
1378 gimple_set_location (stmt
, loc
);
1379 gimple_seq_add_stmt_without_update (&seq
, stmt
);
1380 tem
= gimple_assign_lhs (stmt
);
1382 src
= wide_int_to_tree (int_type
,
1383 wi::bit_and_not (wi::to_wide (src
),
1384 wi::to_wide (mask
)));
1385 stmt
= gimple_build_assign (make_ssa_name (int_type
),
1386 BIT_IOR_EXPR
, tem
, src
);
1387 gimple_set_location (stmt
, loc
);
1388 gimple_seq_add_stmt_without_update (&seq
, stmt
);
1389 src
= gimple_assign_lhs (stmt
);
1393 stmt
= gimple_build_assign (dest
, src
);
1394 gimple_set_location (stmt
, loc
);
1395 gimple_set_vuse (stmt
, new_vuse
);
1396 gimple_seq_add_stmt_without_update (&seq
, stmt
);
1399 if (i
< split_stores
.length () - 1)
1400 new_vdef
= make_ssa_name (gimple_vop (cfun
), stmt
);
1402 new_vdef
= last_vdef
;
1404 gimple_set_vdef (stmt
, new_vdef
);
1405 SSA_NAME_DEF_STMT (new_vdef
) = stmt
;
1406 new_vuse
= new_vdef
;
1409 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
1416 "New sequence of %u stmts to replace old one of %u stmts\n",
1417 split_stores
.length (), orig_num_stmts
);
1418 if (dump_flags
& TDF_DETAILS
)
1419 print_gimple_seq (dump_file
, seq
, 0, TDF_VOPS
| TDF_MEMSYMS
);
1421 gsi_insert_seq_after (&last_gsi
, seq
, GSI_SAME_STMT
);
1426 /* Process the merged_store_group objects created in the coalescing phase.
1427 The stores are all against the base object BASE.
1428 Try to output the widened stores and delete the original statements if
1429 successful. Return true iff any changes were made. */
1432 imm_store_chain_info::output_merged_stores ()
1435 merged_store_group
*merged_store
;
1437 FOR_EACH_VEC_ELT (m_merged_store_groups
, i
, merged_store
)
1439 if (output_merged_store (merged_store
))
1442 store_immediate_info
*store
;
1443 FOR_EACH_VEC_ELT (merged_store
->stores
, j
, store
)
1445 gimple
*stmt
= store
->stmt
;
1446 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1447 gsi_remove (&gsi
, true);
1448 if (stmt
!= merged_store
->last_stmt
)
1450 unlink_stmt_vdef (stmt
);
1451 release_defs (stmt
);
1457 if (ret
&& dump_file
)
1458 fprintf (dump_file
, "Merging successful!\n");
1463 /* Coalesce the store_immediate_info objects recorded against the base object
1464 BASE in the first phase and output them.
1465 Delete the allocated structures.
1466 Return true if any changes were made. */
1469 imm_store_chain_info::terminate_and_process_chain ()
1471 /* Process store chain. */
1473 if (m_store_info
.length () > 1)
1475 ret
= coalesce_immediate_stores ();
1477 ret
= output_merged_stores ();
1480 /* Delete all the entries we allocated ourselves. */
1481 store_immediate_info
*info
;
1483 FOR_EACH_VEC_ELT (m_store_info
, i
, info
)
1486 merged_store_group
*merged_info
;
1487 FOR_EACH_VEC_ELT (m_merged_store_groups
, i
, merged_info
)
1493 /* Return true iff LHS is a destination potentially interesting for
1494 store merging. In practice these are the codes that get_inner_reference
1498 lhs_valid_for_store_merging_p (tree lhs
)
1500 tree_code code
= TREE_CODE (lhs
);
1502 if (code
== ARRAY_REF
|| code
== ARRAY_RANGE_REF
|| code
== MEM_REF
1503 || code
== COMPONENT_REF
|| code
== BIT_FIELD_REF
)
1509 /* Return true if the tree RHS is a constant we want to consider
1510 during store merging. In practice accept all codes that
1511 native_encode_expr accepts. */
1514 rhs_valid_for_store_merging_p (tree rhs
)
1516 return native_encode_expr (rhs
, NULL
,
1517 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs
)))) != 0;
1520 /* Entry point for the pass. Go over each basic block recording chains of
1521 immediate stores. Upon encountering a terminating statement (as defined
1522 by stmt_terminates_chain_p) process the recorded stores and emit the widened
1526 pass_store_merging::execute (function
*fun
)
1529 hash_set
<gimple
*> orig_stmts
;
1531 FOR_EACH_BB_FN (bb
, fun
)
1533 gimple_stmt_iterator gsi
;
1534 unsigned HOST_WIDE_INT num_statements
= 0;
1535 /* Record the original statements so that we can keep track of
1536 statements emitted in this pass and not re-process new
1538 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1540 if (is_gimple_debug (gsi_stmt (gsi
)))
1543 if (++num_statements
>= 2)
1547 if (num_statements
< 2)
1550 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1551 fprintf (dump_file
, "Processing basic block <%d>:\n", bb
->index
);
1553 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1555 gimple
*stmt
= gsi_stmt (gsi
);
1557 if (is_gimple_debug (stmt
))
1560 if (gimple_has_volatile_ops (stmt
))
1562 /* Terminate all chains. */
1563 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1564 fprintf (dump_file
, "Volatile access terminates "
1566 terminate_and_process_all_chains ();
1570 if (gimple_assign_single_p (stmt
) && gimple_vdef (stmt
)
1571 && !stmt_can_throw_internal (stmt
)
1572 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt
)))
1574 tree lhs
= gimple_assign_lhs (stmt
);
1575 tree rhs
= gimple_assign_rhs1 (stmt
);
1577 HOST_WIDE_INT bitsize
, bitpos
;
1578 unsigned HOST_WIDE_INT bitregion_start
= 0;
1579 unsigned HOST_WIDE_INT bitregion_end
= 0;
1581 int unsignedp
= 0, reversep
= 0, volatilep
= 0;
1582 tree offset
, base_addr
;
1584 = get_inner_reference (lhs
, &bitsize
, &bitpos
, &offset
, &mode
,
1585 &unsignedp
, &reversep
, &volatilep
);
1586 if (TREE_CODE (lhs
) == COMPONENT_REF
1587 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (lhs
, 1)))
1589 get_bit_range (&bitregion_start
, &bitregion_end
, lhs
,
1597 /* As a future enhancement we could handle stores with the same
1599 bool invalid
= reversep
1600 || ((bitsize
> MAX_BITSIZE_MODE_ANY_INT
)
1601 && (TREE_CODE (rhs
) != INTEGER_CST
))
1602 || !rhs_valid_for_store_merging_p (rhs
);
1604 /* We do not want to rewrite TARGET_MEM_REFs. */
1605 if (TREE_CODE (base_addr
) == TARGET_MEM_REF
)
1607 /* In some cases get_inner_reference may return a
1608 MEM_REF [ptr + byteoffset]. For the purposes of this pass
1609 canonicalize the base_addr to MEM_REF [ptr] and take
1610 byteoffset into account in the bitpos. This occurs in
1611 PR 23684 and this way we can catch more chains. */
1612 else if (TREE_CODE (base_addr
) == MEM_REF
)
1614 offset_int bit_off
, byte_off
= mem_ref_offset (base_addr
);
1615 bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
1617 if (!wi::neg_p (bit_off
) && wi::fits_shwi_p (bit_off
))
1619 bitpos
= bit_off
.to_shwi ();
1622 bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
1623 bit_off
+= bitregion_start
;
1624 if (wi::fits_uhwi_p (bit_off
))
1626 bitregion_start
= bit_off
.to_uhwi ();
1627 bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
1628 bit_off
+= bitregion_end
;
1629 if (wi::fits_uhwi_p (bit_off
))
1630 bitregion_end
= bit_off
.to_uhwi ();
1640 base_addr
= TREE_OPERAND (base_addr
, 0);
1642 /* get_inner_reference returns the base object, get at its
1648 base_addr
= build_fold_addr_expr (base_addr
);
1653 bitregion_start
= ROUND_DOWN (bitpos
, BITS_PER_UNIT
);
1654 bitregion_end
= ROUND_UP (bitpos
+ bitsize
, BITS_PER_UNIT
);
1658 && offset
!= NULL_TREE
)
1660 /* If the access is variable offset then a base
1661 decl has to be address-taken to be able to
1662 emit pointer-based stores to it.
1663 ??? We might be able to get away with
1664 re-using the original base up to the first
1665 variable part and then wrapping that inside
1667 tree base
= get_base_address (base_addr
);
1670 && ! TREE_ADDRESSABLE (base
)))
1673 base_addr
= build2 (POINTER_PLUS_EXPR
,
1674 TREE_TYPE (base_addr
),
1678 struct imm_store_chain_info
**chain_info
1679 = m_stores
.get (base_addr
);
1683 store_immediate_info
*info
;
1686 unsigned int ord
= (*chain_info
)->m_store_info
.length ();
1687 info
= new store_immediate_info (bitsize
, bitpos
,
1691 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1694 "Recording immediate store from stmt:\n");
1695 print_gimple_stmt (dump_file
, stmt
, 0);
1697 (*chain_info
)->m_store_info
.safe_push (info
);
1698 /* If we reach the limit of stores to merge in a chain
1699 terminate and process the chain now. */
1700 if ((*chain_info
)->m_store_info
.length ()
1702 PARAM_VALUE (PARAM_MAX_STORES_TO_MERGE
))
1704 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1706 "Reached maximum number of statements"
1708 terminate_and_release_chain (*chain_info
);
1713 /* Store aliases any existing chain? */
1714 terminate_all_aliasing_chains (chain_info
, false, stmt
);
1715 /* Start a new chain. */
1716 struct imm_store_chain_info
*new_chain
1717 = new imm_store_chain_info (m_stores_head
, base_addr
);
1718 info
= new store_immediate_info (bitsize
, bitpos
,
1722 new_chain
->m_store_info
.safe_push (info
);
1723 m_stores
.put (base_addr
, new_chain
);
1724 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1727 "Starting new chain with statement:\n");
1728 print_gimple_stmt (dump_file
, stmt
, 0);
1729 fprintf (dump_file
, "The base object is:\n");
1730 print_generic_expr (dump_file
, base_addr
);
1731 fprintf (dump_file
, "\n");
1735 terminate_all_aliasing_chains (chain_info
,
1736 offset
!= NULL_TREE
, stmt
);
1741 terminate_all_aliasing_chains (NULL
, false, stmt
);
1743 terminate_and_process_all_chains ();
1750 /* Construct and return a store merging pass object. */
1753 make_pass_store_merging (gcc::context
*ctxt
)
1755 return new pass_store_merging (ctxt
);
1760 namespace selftest
{
1762 /* Selftests for store merging helpers. */
1764 /* Assert that all elements of the byte arrays X and Y, both of length N
1768 verify_array_eq (unsigned char *x
, unsigned char *y
, unsigned int n
)
1770 for (unsigned int i
= 0; i
< n
; i
++)
1774 fprintf (stderr
, "Arrays do not match. X:\n");
1775 dump_char_array (stderr
, x
, n
);
1776 fprintf (stderr
, "Y:\n");
1777 dump_char_array (stderr
, y
, n
);
1779 ASSERT_EQ (x
[i
], y
[i
]);
1783 /* Test shift_bytes_in_array and that it carries bits across between
1787 verify_shift_bytes_in_array (void)
1790 00011111 | 11100000. */
1791 unsigned char orig
[2] = { 0xe0, 0x1f };
1792 unsigned char in
[2];
1793 memcpy (in
, orig
, sizeof orig
);
1795 unsigned char expected
[2] = { 0x80, 0x7f };
1796 shift_bytes_in_array (in
, sizeof (in
), 2);
1797 verify_array_eq (in
, expected
, sizeof (in
));
1799 memcpy (in
, orig
, sizeof orig
);
1800 memcpy (expected
, orig
, sizeof orig
);
1801 /* Check that shifting by zero doesn't change anything. */
1802 shift_bytes_in_array (in
, sizeof (in
), 0);
1803 verify_array_eq (in
, expected
, sizeof (in
));
1807 /* Test shift_bytes_in_array_right and that it carries bits across between
1811 verify_shift_bytes_in_array_right (void)
1814 00011111 | 11100000. */
1815 unsigned char orig
[2] = { 0x1f, 0xe0};
1816 unsigned char in
[2];
1817 memcpy (in
, orig
, sizeof orig
);
1818 unsigned char expected
[2] = { 0x07, 0xf8};
1819 shift_bytes_in_array_right (in
, sizeof (in
), 2);
1820 verify_array_eq (in
, expected
, sizeof (in
));
1822 memcpy (in
, orig
, sizeof orig
);
1823 memcpy (expected
, orig
, sizeof orig
);
1824 /* Check that shifting by zero doesn't change anything. */
1825 shift_bytes_in_array_right (in
, sizeof (in
), 0);
1826 verify_array_eq (in
, expected
, sizeof (in
));
1829 /* Test clear_bit_region that it clears exactly the bits asked and
1833 verify_clear_bit_region (void)
1835 /* Start with all bits set and test clearing various patterns in them. */
1836 unsigned char orig
[3] = { 0xff, 0xff, 0xff};
1837 unsigned char in
[3];
1838 unsigned char expected
[3];
1839 memcpy (in
, orig
, sizeof in
);
1841 /* Check zeroing out all the bits. */
1842 clear_bit_region (in
, 0, 3 * BITS_PER_UNIT
);
1843 expected
[0] = expected
[1] = expected
[2] = 0;
1844 verify_array_eq (in
, expected
, sizeof in
);
1846 memcpy (in
, orig
, sizeof in
);
1847 /* Leave the first and last bits intact. */
1848 clear_bit_region (in
, 1, 3 * BITS_PER_UNIT
- 2);
1852 verify_array_eq (in
, expected
, sizeof in
);
1855 /* Test verify_clear_bit_region_be that it clears exactly the bits asked and
1859 verify_clear_bit_region_be (void)
1861 /* Start with all bits set and test clearing various patterns in them. */
1862 unsigned char orig
[3] = { 0xff, 0xff, 0xff};
1863 unsigned char in
[3];
1864 unsigned char expected
[3];
1865 memcpy (in
, orig
, sizeof in
);
1867 /* Check zeroing out all the bits. */
1868 clear_bit_region_be (in
, BITS_PER_UNIT
- 1, 3 * BITS_PER_UNIT
);
1869 expected
[0] = expected
[1] = expected
[2] = 0;
1870 verify_array_eq (in
, expected
, sizeof in
);
1872 memcpy (in
, orig
, sizeof in
);
1873 /* Leave the first and last bits intact. */
1874 clear_bit_region_be (in
, BITS_PER_UNIT
- 2, 3 * BITS_PER_UNIT
- 2);
1878 verify_array_eq (in
, expected
, sizeof in
);
1882 /* Run all of the selftests within this file. */
1885 store_merging_c_tests (void)
1887 verify_shift_bytes_in_array ();
1888 verify_shift_bytes_in_array_right ();
1889 verify_clear_bit_region ();
1890 verify_clear_bit_region_be ();
1893 } // namespace selftest
1894 #endif /* CHECKING_P. */