1 /* GIMPLE store merging and byte swapping passes.
2 Copyright (C) 2009-2020 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
31 we can transform this into a single 4-byte store if the target supports it:
32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
56 The algorithm is applied to each basic block in three phases:
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
64 terminate such chains when appropriate (for example when when the stored
65 values get used subsequently).
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76 store_immediate_info objects) and coalesce contiguous stores into
77 merged_store_group objects. For bit-field stores, we don't need to
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
84 For example, given the stores:
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
143 #include "coretypes.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
151 #include "gimple-pretty-print.h"
153 #include "fold-const.h"
154 #include "print-tree.h"
155 #include "tree-hash-traits.h"
156 #include "gimple-iterator.h"
157 #include "gimplify.h"
158 #include "gimple-fold.h"
159 #include "stor-layout.h"
162 #include "cfgcleanup.h"
163 #include "tree-cfg.h"
167 #include "gimplify-me.h"
169 #include "expr.h" /* For get_bit_range. */
170 #include "optabs-tree.h"
172 #include "selftest.h"
174 /* The maximum size (in bits) of the stores this pass should generate. */
175 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
178 /* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180 #define MAX_STORE_ALIAS_CHECKS 64
186 /* Number of hand-written 16-bit nop / bswaps found. */
189 /* Number of hand-written 32-bit nop / bswaps found. */
192 /* Number of hand-written 64-bit nop / bswaps found. */
194 } nop_stats
, bswap_stats
;
196 /* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
221 Note 2: for non-memory sources, range holds the same value as size.
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
225 struct symbolic_number
{
230 poly_int64_pod bytepos
;
234 unsigned HOST_WIDE_INT range
;
238 #define BITS_PER_MARKER 8
239 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 #define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
244 /* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
250 /* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
256 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
261 do_shift_rotate (enum tree_code code
,
262 struct symbolic_number
*n
,
265 int i
, size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
266 unsigned head_marker
;
269 || count
>= TYPE_PRECISION (n
->type
)
270 || count
% BITS_PER_UNIT
!= 0)
272 count
= (count
/ BITS_PER_UNIT
) * BITS_PER_MARKER
;
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size
< 64 / BITS_PER_MARKER
)
277 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
285 head_marker
= HEAD_MARKER (n
->n
, size
);
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n
->type
) && head_marker
)
289 for (i
= 0; i
< count
/ BITS_PER_MARKER
; i
++)
290 n
->n
|= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size
- 1 - i
) * BITS_PER_MARKER
);
294 n
->n
= (n
->n
<< count
) | (n
->n
>> ((size
* BITS_PER_MARKER
) - count
));
297 n
->n
= (n
->n
>> count
) | (n
->n
<< ((size
* BITS_PER_MARKER
) - count
));
302 /* Zero unused bits for size. */
303 if (size
< 64 / BITS_PER_MARKER
)
304 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
308 /* Perform sanity checking for the symbolic number N and the gimple
312 verify_symbolic_number_p (struct symbolic_number
*n
, gimple
*stmt
)
316 lhs_type
= gimple_expr_type (stmt
);
318 if (TREE_CODE (lhs_type
) != INTEGER_TYPE
)
321 if (TYPE_PRECISION (lhs_type
) != TYPE_PRECISION (n
->type
))
327 /* Initialize the symbolic number N for the bswap pass from the base element
328 SRC manipulated by the bitwise OR expression. */
331 init_symbolic_number (struct symbolic_number
*n
, tree src
)
335 if (! INTEGRAL_TYPE_P (TREE_TYPE (src
)))
338 n
->base_addr
= n
->offset
= n
->alias_set
= n
->vuse
= NULL_TREE
;
341 /* Set up the symbolic number N by setting each byte to a value between 1 and
342 the byte size of rhs1. The highest order byte is set to n->size and the
343 lowest order byte to 1. */
344 n
->type
= TREE_TYPE (src
);
345 size
= TYPE_PRECISION (n
->type
);
346 if (size
% BITS_PER_UNIT
!= 0)
348 size
/= BITS_PER_UNIT
;
349 if (size
> 64 / BITS_PER_MARKER
)
355 if (size
< 64 / BITS_PER_MARKER
)
356 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
361 /* Check if STMT might be a byte swap or a nop from a memory source and returns
362 the answer. If so, REF is that memory source and the base of the memory area
363 accessed and the offset of the access from that base are recorded in N. */
366 find_bswap_or_nop_load (gimple
*stmt
, tree ref
, struct symbolic_number
*n
)
368 /* Leaf node is an array or component ref. Memorize its base and
369 offset from base to compare to other such leaf node. */
370 poly_int64 bitsize
, bitpos
, bytepos
;
372 int unsignedp
, reversep
, volatilep
;
373 tree offset
, base_addr
;
375 /* Not prepared to handle PDP endian. */
376 if (BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
379 if (!gimple_assign_load_p (stmt
) || gimple_has_volatile_ops (stmt
))
382 base_addr
= get_inner_reference (ref
, &bitsize
, &bitpos
, &offset
, &mode
,
383 &unsignedp
, &reversep
, &volatilep
);
385 if (TREE_CODE (base_addr
) == TARGET_MEM_REF
)
386 /* Do not rewrite TARGET_MEM_REF. */
388 else if (TREE_CODE (base_addr
) == MEM_REF
)
390 poly_offset_int bit_offset
= 0;
391 tree off
= TREE_OPERAND (base_addr
, 1);
393 if (!integer_zerop (off
))
395 poly_offset_int boff
= mem_ref_offset (base_addr
);
396 boff
<<= LOG2_BITS_PER_UNIT
;
400 base_addr
= TREE_OPERAND (base_addr
, 0);
402 /* Avoid returning a negative bitpos as this may wreak havoc later. */
403 if (maybe_lt (bit_offset
, 0))
405 tree byte_offset
= wide_int_to_tree
406 (sizetype
, bits_to_bytes_round_down (bit_offset
));
407 bit_offset
= num_trailing_bits (bit_offset
);
409 offset
= size_binop (PLUS_EXPR
, offset
, byte_offset
);
411 offset
= byte_offset
;
414 bitpos
+= bit_offset
.force_shwi ();
417 base_addr
= build_fold_addr_expr (base_addr
);
419 if (!multiple_p (bitpos
, BITS_PER_UNIT
, &bytepos
))
421 if (!multiple_p (bitsize
, BITS_PER_UNIT
))
426 if (!init_symbolic_number (n
, ref
))
428 n
->base_addr
= base_addr
;
430 n
->bytepos
= bytepos
;
431 n
->alias_set
= reference_alias_ptr_type (ref
);
432 n
->vuse
= gimple_vuse (stmt
);
436 /* Compute the symbolic number N representing the result of a bitwise OR on 2
437 symbolic number N1 and N2 whose source statements are respectively
438 SOURCE_STMT1 and SOURCE_STMT2. */
441 perform_symbolic_merge (gimple
*source_stmt1
, struct symbolic_number
*n1
,
442 gimple
*source_stmt2
, struct symbolic_number
*n2
,
443 struct symbolic_number
*n
)
448 struct symbolic_number
*n_start
;
450 tree rhs1
= gimple_assign_rhs1 (source_stmt1
);
451 if (TREE_CODE (rhs1
) == BIT_FIELD_REF
452 && TREE_CODE (TREE_OPERAND (rhs1
, 0)) == SSA_NAME
)
453 rhs1
= TREE_OPERAND (rhs1
, 0);
454 tree rhs2
= gimple_assign_rhs1 (source_stmt2
);
455 if (TREE_CODE (rhs2
) == BIT_FIELD_REF
456 && TREE_CODE (TREE_OPERAND (rhs2
, 0)) == SSA_NAME
)
457 rhs2
= TREE_OPERAND (rhs2
, 0);
459 /* Sources are different, cancel bswap if they are not memory location with
460 the same base (array, structure, ...). */
464 HOST_WIDE_INT start1
, start2
, start_sub
, end_sub
, end1
, end2
, end
;
465 struct symbolic_number
*toinc_n_ptr
, *n_end
;
466 basic_block bb1
, bb2
;
468 if (!n1
->base_addr
|| !n2
->base_addr
469 || !operand_equal_p (n1
->base_addr
, n2
->base_addr
, 0))
472 if (!n1
->offset
!= !n2
->offset
473 || (n1
->offset
&& !operand_equal_p (n1
->offset
, n2
->offset
, 0)))
477 if (!(n2
->bytepos
- n1
->bytepos
).is_constant (&start2
))
483 start_sub
= start2
- start1
;
488 start_sub
= start1
- start2
;
491 bb1
= gimple_bb (source_stmt1
);
492 bb2
= gimple_bb (source_stmt2
);
493 if (dominated_by_p (CDI_DOMINATORS
, bb1
, bb2
))
494 source_stmt
= source_stmt1
;
496 source_stmt
= source_stmt2
;
498 /* Find the highest address at which a load is performed and
499 compute related info. */
500 end1
= start1
+ (n1
->range
- 1);
501 end2
= start2
+ (n2
->range
- 1);
505 end_sub
= end2
- end1
;
510 end_sub
= end1
- end2
;
512 n_end
= (end2
> end1
) ? n2
: n1
;
514 /* Find symbolic number whose lsb is the most significant. */
515 if (BYTES_BIG_ENDIAN
)
516 toinc_n_ptr
= (n_end
== n1
) ? n2
: n1
;
518 toinc_n_ptr
= (n_start
== n1
) ? n2
: n1
;
520 n
->range
= end
- MIN (start1
, start2
) + 1;
522 /* Check that the range of memory covered can be represented by
523 a symbolic number. */
524 if (n
->range
> 64 / BITS_PER_MARKER
)
527 /* Reinterpret byte marks in symbolic number holding the value of
528 bigger weight according to target endianness. */
529 inc
= BYTES_BIG_ENDIAN
? end_sub
: start_sub
;
530 size
= TYPE_PRECISION (n1
->type
) / BITS_PER_UNIT
;
531 for (i
= 0; i
< size
; i
++, inc
<<= BITS_PER_MARKER
)
534 = (toinc_n_ptr
->n
>> (i
* BITS_PER_MARKER
)) & MARKER_MASK
;
535 if (marker
&& marker
!= MARKER_BYTE_UNKNOWN
)
536 toinc_n_ptr
->n
+= inc
;
541 n
->range
= n1
->range
;
543 source_stmt
= source_stmt1
;
547 || alias_ptr_types_compatible_p (n1
->alias_set
, n2
->alias_set
))
548 n
->alias_set
= n1
->alias_set
;
550 n
->alias_set
= ptr_type_node
;
551 n
->vuse
= n_start
->vuse
;
552 n
->base_addr
= n_start
->base_addr
;
553 n
->offset
= n_start
->offset
;
554 n
->src
= n_start
->src
;
555 n
->bytepos
= n_start
->bytepos
;
556 n
->type
= n_start
->type
;
557 size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
559 for (i
= 0, mask
= MARKER_MASK
; i
< size
; i
++, mask
<<= BITS_PER_MARKER
)
561 uint64_t masked1
, masked2
;
563 masked1
= n1
->n
& mask
;
564 masked2
= n2
->n
& mask
;
565 if (masked1
&& masked2
&& masked1
!= masked2
)
568 n
->n
= n1
->n
| n2
->n
;
569 n
->n_ops
= n1
->n_ops
+ n2
->n_ops
;
574 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
575 the operation given by the rhs of STMT on the result. If the operation
576 could successfully be executed the function returns a gimple stmt whose
577 rhs's first tree is the expression of the source operand and NULL
581 find_bswap_or_nop_1 (gimple
*stmt
, struct symbolic_number
*n
, int limit
)
584 tree rhs1
, rhs2
= NULL
;
585 gimple
*rhs1_stmt
, *rhs2_stmt
, *source_stmt1
;
586 enum gimple_rhs_class rhs_class
;
588 if (!limit
|| !is_gimple_assign (stmt
))
591 rhs1
= gimple_assign_rhs1 (stmt
);
593 if (find_bswap_or_nop_load (stmt
, rhs1
, n
))
596 /* Handle BIT_FIELD_REF. */
597 if (TREE_CODE (rhs1
) == BIT_FIELD_REF
598 && TREE_CODE (TREE_OPERAND (rhs1
, 0)) == SSA_NAME
)
600 unsigned HOST_WIDE_INT bitsize
= tree_to_uhwi (TREE_OPERAND (rhs1
, 1));
601 unsigned HOST_WIDE_INT bitpos
= tree_to_uhwi (TREE_OPERAND (rhs1
, 2));
602 if (bitpos
% BITS_PER_UNIT
== 0
603 && bitsize
% BITS_PER_UNIT
== 0
604 && init_symbolic_number (n
, TREE_OPERAND (rhs1
, 0)))
606 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
607 if (BYTES_BIG_ENDIAN
)
608 bitpos
= TYPE_PRECISION (n
->type
) - bitpos
- bitsize
;
611 if (!do_shift_rotate (RSHIFT_EXPR
, n
, bitpos
))
616 uint64_t tmp
= (1 << BITS_PER_UNIT
) - 1;
617 for (unsigned i
= 0; i
< bitsize
/ BITS_PER_UNIT
;
618 i
++, tmp
<<= BITS_PER_UNIT
)
619 mask
|= (uint64_t) MARKER_MASK
<< (i
* BITS_PER_MARKER
);
623 n
->type
= TREE_TYPE (rhs1
);
625 n
->range
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
627 return verify_symbolic_number_p (n
, stmt
) ? stmt
: NULL
;
633 if (TREE_CODE (rhs1
) != SSA_NAME
)
636 code
= gimple_assign_rhs_code (stmt
);
637 rhs_class
= gimple_assign_rhs_class (stmt
);
638 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
640 if (rhs_class
== GIMPLE_BINARY_RHS
)
641 rhs2
= gimple_assign_rhs2 (stmt
);
643 /* Handle unary rhs and binary rhs with integer constants as second
646 if (rhs_class
== GIMPLE_UNARY_RHS
647 || (rhs_class
== GIMPLE_BINARY_RHS
648 && TREE_CODE (rhs2
) == INTEGER_CST
))
650 if (code
!= BIT_AND_EXPR
651 && code
!= LSHIFT_EXPR
652 && code
!= RSHIFT_EXPR
653 && code
!= LROTATE_EXPR
654 && code
!= RROTATE_EXPR
655 && !CONVERT_EXPR_CODE_P (code
))
658 source_stmt1
= find_bswap_or_nop_1 (rhs1_stmt
, n
, limit
- 1);
660 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
661 we have to initialize the symbolic number. */
664 if (gimple_assign_load_p (stmt
)
665 || !init_symbolic_number (n
, rhs1
))
674 int i
, size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
675 uint64_t val
= int_cst_value (rhs2
), mask
= 0;
676 uint64_t tmp
= (1 << BITS_PER_UNIT
) - 1;
678 /* Only constants masking full bytes are allowed. */
679 for (i
= 0; i
< size
; i
++, tmp
<<= BITS_PER_UNIT
)
680 if ((val
& tmp
) != 0 && (val
& tmp
) != tmp
)
683 mask
|= (uint64_t) MARKER_MASK
<< (i
* BITS_PER_MARKER
);
692 if (!do_shift_rotate (code
, n
, (int) TREE_INT_CST_LOW (rhs2
)))
697 int i
, type_size
, old_type_size
;
700 type
= gimple_expr_type (stmt
);
701 type_size
= TYPE_PRECISION (type
);
702 if (type_size
% BITS_PER_UNIT
!= 0)
704 type_size
/= BITS_PER_UNIT
;
705 if (type_size
> 64 / BITS_PER_MARKER
)
708 /* Sign extension: result is dependent on the value. */
709 old_type_size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
710 if (!TYPE_UNSIGNED (n
->type
) && type_size
> old_type_size
711 && HEAD_MARKER (n
->n
, old_type_size
))
712 for (i
= 0; i
< type_size
- old_type_size
; i
++)
713 n
->n
|= (uint64_t) MARKER_BYTE_UNKNOWN
714 << ((type_size
- 1 - i
) * BITS_PER_MARKER
);
716 if (type_size
< 64 / BITS_PER_MARKER
)
718 /* If STMT casts to a smaller type mask out the bits not
719 belonging to the target type. */
720 n
->n
&= ((uint64_t) 1 << (type_size
* BITS_PER_MARKER
)) - 1;
724 n
->range
= type_size
;
730 return verify_symbolic_number_p (n
, stmt
) ? source_stmt1
: NULL
;
733 /* Handle binary rhs. */
735 if (rhs_class
== GIMPLE_BINARY_RHS
)
737 struct symbolic_number n1
, n2
;
738 gimple
*source_stmt
, *source_stmt2
;
740 if (code
!= BIT_IOR_EXPR
)
743 if (TREE_CODE (rhs2
) != SSA_NAME
)
746 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
751 source_stmt1
= find_bswap_or_nop_1 (rhs1_stmt
, &n1
, limit
- 1);
756 source_stmt2
= find_bswap_or_nop_1 (rhs2_stmt
, &n2
, limit
- 1);
761 if (TYPE_PRECISION (n1
.type
) != TYPE_PRECISION (n2
.type
))
764 if (n1
.vuse
!= n2
.vuse
)
768 = perform_symbolic_merge (source_stmt1
, &n1
, source_stmt2
, &n2
, n
);
773 if (!verify_symbolic_number_p (n
, stmt
))
785 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
786 *CMPXCHG, *CMPNOP and adjust *N. */
789 find_bswap_or_nop_finalize (struct symbolic_number
*n
, uint64_t *cmpxchg
,
795 /* The number which the find_bswap_or_nop_1 result should match in order
796 to have a full byte swap. The number is shifted to the right
797 according to the size of the symbolic number before using it. */
801 /* Find real size of result (highest non-zero byte). */
803 for (tmpn
= n
->n
, rsize
= 0; tmpn
; tmpn
>>= BITS_PER_MARKER
, rsize
++);
807 /* Zero out the bits corresponding to untouched bytes in original gimple
809 if (n
->range
< (int) sizeof (int64_t))
811 mask
= ((uint64_t) 1 << (n
->range
* BITS_PER_MARKER
)) - 1;
812 *cmpxchg
>>= (64 / BITS_PER_MARKER
- n
->range
) * BITS_PER_MARKER
;
816 /* Zero out the bits corresponding to unused bytes in the result of the
817 gimple expression. */
818 if (rsize
< n
->range
)
820 if (BYTES_BIG_ENDIAN
)
822 mask
= ((uint64_t) 1 << (rsize
* BITS_PER_MARKER
)) - 1;
824 *cmpnop
>>= (n
->range
- rsize
) * BITS_PER_MARKER
;
828 mask
= ((uint64_t) 1 << (rsize
* BITS_PER_MARKER
)) - 1;
829 *cmpxchg
>>= (n
->range
- rsize
) * BITS_PER_MARKER
;
835 n
->range
*= BITS_PER_UNIT
;
838 /* Check if STMT completes a bswap implementation or a read in a given
839 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
840 accordingly. It also sets N to represent the kind of operations
841 performed: size of the resulting expression and whether it works on
842 a memory source, and if so alias-set and vuse. At last, the
843 function returns a stmt whose rhs's first tree is the source
847 find_bswap_or_nop (gimple
*stmt
, struct symbolic_number
*n
, bool *bswap
)
849 /* The last parameter determines the depth search limit. It usually
850 correlates directly to the number n of bytes to be touched. We
851 increase that number by 2 * (log2(n) + 1) here in order to also
852 cover signed -> unsigned conversions of the src operand as can be seen
853 in libgcc, and for initial shift/and operation of the src operand. */
854 int limit
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt
)));
855 limit
+= 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT
) limit
));
856 gimple
*ins_stmt
= find_bswap_or_nop_1 (stmt
, n
, limit
);
861 uint64_t cmpxchg
, cmpnop
;
862 find_bswap_or_nop_finalize (n
, &cmpxchg
, &cmpnop
);
864 /* A complete byte swap should make the symbolic number to start with
865 the largest digit in the highest order byte. Unchanged symbolic
866 number indicates a read with same endianness as target architecture. */
869 else if (n
->n
== cmpxchg
)
874 /* Useless bit manipulation performed by code. */
875 if (!n
->base_addr
&& n
->n
== cmpnop
&& n
->n_ops
== 1)
881 const pass_data pass_data_optimize_bswap
=
883 GIMPLE_PASS
, /* type */
885 OPTGROUP_NONE
, /* optinfo_flags */
887 PROP_ssa
, /* properties_required */
888 0, /* properties_provided */
889 0, /* properties_destroyed */
890 0, /* todo_flags_start */
891 0, /* todo_flags_finish */
894 class pass_optimize_bswap
: public gimple_opt_pass
897 pass_optimize_bswap (gcc::context
*ctxt
)
898 : gimple_opt_pass (pass_data_optimize_bswap
, ctxt
)
901 /* opt_pass methods: */
902 virtual bool gate (function
*)
904 return flag_expensive_optimizations
&& optimize
&& BITS_PER_UNIT
== 8;
907 virtual unsigned int execute (function
*);
909 }; // class pass_optimize_bswap
911 /* Perform the bswap optimization: replace the expression computed in the rhs
912 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
913 bswap, load or load + bswap expression.
914 Which of these alternatives replace the rhs is given by N->base_addr (non
915 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
916 load to perform are also given in N while the builtin bswap invoke is given
917 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
918 load statements involved to construct the rhs in gsi_stmt (GSI) and
919 N->range gives the size of the rhs expression for maintaining some
922 Note that if the replacement involve a load and if gsi_stmt (GSI) is
923 non-NULL, that stmt is moved just after INS_STMT to do the load with the
924 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
927 bswap_replace (gimple_stmt_iterator gsi
, gimple
*ins_stmt
, tree fndecl
,
928 tree bswap_type
, tree load_type
, struct symbolic_number
*n
,
931 tree src
, tmp
, tgt
= NULL_TREE
;
934 gimple
*cur_stmt
= gsi_stmt (gsi
);
937 tgt
= gimple_assign_lhs (cur_stmt
);
939 /* Need to load the value from memory first. */
942 gimple_stmt_iterator gsi_ins
= gsi
;
944 gsi_ins
= gsi_for_stmt (ins_stmt
);
945 tree addr_expr
, addr_tmp
, val_expr
, val_tmp
;
946 tree load_offset_ptr
, aligned_load_type
;
948 unsigned align
= get_object_alignment (src
);
949 poly_int64 load_offset
= 0;
953 basic_block ins_bb
= gimple_bb (ins_stmt
);
954 basic_block cur_bb
= gimple_bb (cur_stmt
);
955 if (!dominated_by_p (CDI_DOMINATORS
, cur_bb
, ins_bb
))
958 /* Move cur_stmt just before one of the load of the original
959 to ensure it has the same VUSE. See PR61517 for what could
961 if (gimple_bb (cur_stmt
) != gimple_bb (ins_stmt
))
962 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt
));
963 gsi_move_before (&gsi
, &gsi_ins
);
964 gsi
= gsi_for_stmt (cur_stmt
);
969 /* Compute address to load from and cast according to the size
971 addr_expr
= build_fold_addr_expr (src
);
972 if (is_gimple_mem_ref_addr (addr_expr
))
973 addr_tmp
= unshare_expr (addr_expr
);
976 addr_tmp
= unshare_expr (n
->base_addr
);
977 if (!is_gimple_mem_ref_addr (addr_tmp
))
978 addr_tmp
= force_gimple_operand_gsi_1 (&gsi
, addr_tmp
,
979 is_gimple_mem_ref_addr
,
982 load_offset
= n
->bytepos
;
986 = force_gimple_operand_gsi (&gsi
, unshare_expr (n
->offset
),
987 true, NULL_TREE
, true,
990 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp
)),
991 POINTER_PLUS_EXPR
, addr_tmp
, off
);
992 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
993 addr_tmp
= gimple_assign_lhs (stmt
);
997 /* Perform the load. */
998 aligned_load_type
= load_type
;
999 if (align
< TYPE_ALIGN (load_type
))
1000 aligned_load_type
= build_aligned_type (load_type
, align
);
1001 load_offset_ptr
= build_int_cst (n
->alias_set
, load_offset
);
1002 val_expr
= fold_build2 (MEM_REF
, aligned_load_type
, addr_tmp
,
1008 nop_stats
.found_16bit
++;
1009 else if (n
->range
== 32)
1010 nop_stats
.found_32bit
++;
1013 gcc_assert (n
->range
== 64);
1014 nop_stats
.found_64bit
++;
1017 /* Convert the result of load if necessary. */
1018 if (tgt
&& !useless_type_conversion_p (TREE_TYPE (tgt
), load_type
))
1020 val_tmp
= make_temp_ssa_name (aligned_load_type
, NULL
,
1022 load_stmt
= gimple_build_assign (val_tmp
, val_expr
);
1023 gimple_set_vuse (load_stmt
, n
->vuse
);
1024 gsi_insert_before (&gsi
, load_stmt
, GSI_SAME_STMT
);
1025 gimple_assign_set_rhs_with_ops (&gsi
, NOP_EXPR
, val_tmp
);
1026 update_stmt (cur_stmt
);
1030 gimple_assign_set_rhs_with_ops (&gsi
, MEM_REF
, val_expr
);
1031 gimple_set_vuse (cur_stmt
, n
->vuse
);
1032 update_stmt (cur_stmt
);
1036 tgt
= make_ssa_name (load_type
);
1037 cur_stmt
= gimple_build_assign (tgt
, MEM_REF
, val_expr
);
1038 gimple_set_vuse (cur_stmt
, n
->vuse
);
1039 gsi_insert_before (&gsi
, cur_stmt
, GSI_SAME_STMT
);
1045 "%d bit load in target endianness found at: ",
1047 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1053 val_tmp
= make_temp_ssa_name (aligned_load_type
, NULL
, "load_dst");
1054 load_stmt
= gimple_build_assign (val_tmp
, val_expr
);
1055 gimple_set_vuse (load_stmt
, n
->vuse
);
1056 gsi_insert_before (&gsi
, load_stmt
, GSI_SAME_STMT
);
1063 if (tgt
&& !useless_type_conversion_p (TREE_TYPE (tgt
), TREE_TYPE (src
)))
1065 if (!is_gimple_val (src
))
1067 g
= gimple_build_assign (tgt
, NOP_EXPR
, src
);
1070 g
= gimple_build_assign (tgt
, src
);
1074 nop_stats
.found_16bit
++;
1075 else if (n
->range
== 32)
1076 nop_stats
.found_32bit
++;
1079 gcc_assert (n
->range
== 64);
1080 nop_stats
.found_64bit
++;
1085 "%d bit reshuffle in target endianness found at: ",
1088 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1091 print_generic_expr (dump_file
, tgt
, TDF_NONE
);
1092 fprintf (dump_file
, "\n");
1096 gsi_replace (&gsi
, g
, true);
1099 else if (TREE_CODE (src
) == BIT_FIELD_REF
)
1100 src
= TREE_OPERAND (src
, 0);
1103 bswap_stats
.found_16bit
++;
1104 else if (n
->range
== 32)
1105 bswap_stats
.found_32bit
++;
1108 gcc_assert (n
->range
== 64);
1109 bswap_stats
.found_64bit
++;
1114 /* Convert the src expression if necessary. */
1115 if (!useless_type_conversion_p (TREE_TYPE (tmp
), bswap_type
))
1117 gimple
*convert_stmt
;
1119 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapsrc");
1120 convert_stmt
= gimple_build_assign (tmp
, NOP_EXPR
, src
);
1121 gsi_insert_before (&gsi
, convert_stmt
, GSI_SAME_STMT
);
1124 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1125 are considered as rotation of 2N bit values by N bits is generally not
1126 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1127 gives 0x03040102 while a bswap for that value is 0x04030201. */
1128 if (bswap
&& n
->range
== 16)
1130 tree count
= build_int_cst (NULL
, BITS_PER_UNIT
);
1131 src
= fold_build2 (LROTATE_EXPR
, bswap_type
, tmp
, count
);
1132 bswap_stmt
= gimple_build_assign (NULL
, src
);
1135 bswap_stmt
= gimple_build_call (fndecl
, 1, tmp
);
1137 if (tgt
== NULL_TREE
)
1138 tgt
= make_ssa_name (bswap_type
);
1141 /* Convert the result if necessary. */
1142 if (!useless_type_conversion_p (TREE_TYPE (tgt
), bswap_type
))
1144 gimple
*convert_stmt
;
1146 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapdst");
1147 convert_stmt
= gimple_build_assign (tgt
, NOP_EXPR
, tmp
);
1148 gsi_insert_after (&gsi
, convert_stmt
, GSI_SAME_STMT
);
1151 gimple_set_lhs (bswap_stmt
, tmp
);
1155 fprintf (dump_file
, "%d bit bswap implementation found at: ",
1158 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1161 print_generic_expr (dump_file
, tgt
, TDF_NONE
);
1162 fprintf (dump_file
, "\n");
1168 gsi_insert_after (&gsi
, bswap_stmt
, GSI_SAME_STMT
);
1169 gsi_remove (&gsi
, true);
1172 gsi_insert_before (&gsi
, bswap_stmt
, GSI_SAME_STMT
);
1176 /* Find manual byte swap implementations as well as load in a given
1177 endianness. Byte swaps are turned into a bswap builtin invokation
1178 while endian loads are converted to bswap builtin invokation or
1179 simple load according to the target endianness. */
1182 pass_optimize_bswap::execute (function
*fun
)
1185 bool bswap32_p
, bswap64_p
;
1186 bool changed
= false;
1187 tree bswap32_type
= NULL_TREE
, bswap64_type
= NULL_TREE
;
1189 bswap32_p
= (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
1190 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
);
1191 bswap64_p
= (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
1192 && (optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
1193 || (bswap32_p
&& word_mode
== SImode
)));
1195 /* Determine the argument type of the builtins. The code later on
1196 assumes that the return and argument type are the same. */
1199 tree fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
1200 bswap32_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1205 tree fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
1206 bswap64_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1209 memset (&nop_stats
, 0, sizeof (nop_stats
));
1210 memset (&bswap_stats
, 0, sizeof (bswap_stats
));
1211 calculate_dominance_info (CDI_DOMINATORS
);
1213 FOR_EACH_BB_FN (bb
, fun
)
1215 gimple_stmt_iterator gsi
;
1217 /* We do a reverse scan for bswap patterns to make sure we get the
1218 widest match. As bswap pattern matching doesn't handle previously
1219 inserted smaller bswap replacements as sub-patterns, the wider
1220 variant wouldn't be detected. */
1221 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
);)
1223 gimple
*ins_stmt
, *cur_stmt
= gsi_stmt (gsi
);
1224 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
1225 enum tree_code code
;
1226 struct symbolic_number n
;
1229 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1230 might be moved to a different basic block by bswap_replace and gsi
1231 must not points to it if that's the case. Moving the gsi_prev
1232 there make sure that gsi points to the statement previous to
1233 cur_stmt while still making sure that all statements are
1234 considered in this basic block. */
1237 if (!is_gimple_assign (cur_stmt
))
1240 code
= gimple_assign_rhs_code (cur_stmt
);
1245 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt
))
1246 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt
))
1256 ins_stmt
= find_bswap_or_nop (cur_stmt
, &n
, &bswap
);
1264 /* Already in canonical form, nothing to do. */
1265 if (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
1267 load_type
= bswap_type
= uint16_type_node
;
1270 load_type
= uint32_type_node
;
1273 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
1274 bswap_type
= bswap32_type
;
1278 load_type
= uint64_type_node
;
1281 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
1282 bswap_type
= bswap64_type
;
1289 if (bswap
&& !fndecl
&& n
.range
!= 16)
1292 if (bswap_replace (gsi_for_stmt (cur_stmt
), ins_stmt
, fndecl
,
1293 bswap_type
, load_type
, &n
, bswap
))
1298 statistics_counter_event (fun
, "16-bit nop implementations found",
1299 nop_stats
.found_16bit
);
1300 statistics_counter_event (fun
, "32-bit nop implementations found",
1301 nop_stats
.found_32bit
);
1302 statistics_counter_event (fun
, "64-bit nop implementations found",
1303 nop_stats
.found_64bit
);
1304 statistics_counter_event (fun
, "16-bit bswap implementations found",
1305 bswap_stats
.found_16bit
);
1306 statistics_counter_event (fun
, "32-bit bswap implementations found",
1307 bswap_stats
.found_32bit
);
1308 statistics_counter_event (fun
, "64-bit bswap implementations found",
1309 bswap_stats
.found_64bit
);
1311 return (changed
? TODO_update_ssa
: 0);
1317 make_pass_optimize_bswap (gcc::context
*ctxt
)
1319 return new pass_optimize_bswap (ctxt
);
1324 /* Struct recording one operand for the store, which is either a constant,
1325 then VAL represents the constant and all the other fields are zero, or
1326 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1327 and the other fields also reflect the memory load, or an SSA name, then
1328 VAL represents the SSA name and all the other fields are zero, */
1330 class store_operand_info
1335 poly_uint64 bitsize
;
1337 poly_uint64 bitregion_start
;
1338 poly_uint64 bitregion_end
;
1341 store_operand_info ();
1344 store_operand_info::store_operand_info ()
1345 : val (NULL_TREE
), base_addr (NULL_TREE
), bitsize (0), bitpos (0),
1346 bitregion_start (0), bitregion_end (0), stmt (NULL
), bit_not_p (false)
1350 /* Struct recording the information about a single store of an immediate
1351 to memory. These are created in the first phase and coalesced into
1352 merged_store_group objects in the second phase. */
1354 class store_immediate_info
1357 unsigned HOST_WIDE_INT bitsize
;
1358 unsigned HOST_WIDE_INT bitpos
;
1359 unsigned HOST_WIDE_INT bitregion_start
;
1360 /* This is one past the last bit of the bit region. */
1361 unsigned HOST_WIDE_INT bitregion_end
;
1364 /* INTEGER_CST for constant stores, MEM_REF for memory copy,
1365 BIT_*_EXPR for logical bitwise operation, BIT_INSERT_EXPR
1367 LROTATE_EXPR if it can be only bswap optimized and
1368 ops are not really meaningful.
1369 NOP_EXPR if bswap optimization detected identity, ops
1370 are not meaningful. */
1371 enum tree_code rhs_code
;
1372 /* Two fields for bswap optimization purposes. */
1373 struct symbolic_number n
;
1375 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
1377 /* True if ops have been swapped and thus ops[1] represents
1378 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1380 /* The index number of the landing pad, or 0 if there is none. */
1382 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1383 just the first one. */
1384 store_operand_info ops
[2];
1385 store_immediate_info (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
1386 unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
1387 gimple
*, unsigned int, enum tree_code
,
1388 struct symbolic_number
&, gimple
*, bool, int,
1389 const store_operand_info
&,
1390 const store_operand_info
&);
1393 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs
,
1394 unsigned HOST_WIDE_INT bp
,
1395 unsigned HOST_WIDE_INT brs
,
1396 unsigned HOST_WIDE_INT bre
,
1399 enum tree_code rhscode
,
1400 struct symbolic_number
&nr
,
1404 const store_operand_info
&op0r
,
1405 const store_operand_info
&op1r
)
1406 : bitsize (bs
), bitpos (bp
), bitregion_start (brs
), bitregion_end (bre
),
1407 stmt (st
), order (ord
), rhs_code (rhscode
), n (nr
),
1408 ins_stmt (ins_stmtp
), bit_not_p (bitnotp
), ops_swapped_p (false),
1410 #if __cplusplus >= 201103L
1411 , ops
{ op0r
, op1r
}
1421 /* Struct representing a group of stores to contiguous memory locations.
1422 These are produced by the second phase (coalescing) and consumed in the
1423 third phase that outputs the widened stores. */
1425 class merged_store_group
1428 unsigned HOST_WIDE_INT start
;
1429 unsigned HOST_WIDE_INT width
;
1430 unsigned HOST_WIDE_INT bitregion_start
;
1431 unsigned HOST_WIDE_INT bitregion_end
;
1432 /* The size of the allocated memory for val and mask. */
1433 unsigned HOST_WIDE_INT buf_size
;
1434 unsigned HOST_WIDE_INT align_base
;
1435 poly_uint64 load_align_base
[2];
1438 unsigned int load_align
[2];
1439 unsigned int first_order
;
1440 unsigned int last_order
;
1442 bool only_constants
;
1443 unsigned int first_nonmergeable_order
;
1446 auto_vec
<store_immediate_info
*> stores
;
1447 /* We record the first and last original statements in the sequence because
1448 we'll need their vuse/vdef and replacement position. It's easier to keep
1449 track of them separately as 'stores' is reordered by apply_stores. */
1453 unsigned char *mask
;
1455 merged_store_group (store_immediate_info
*);
1456 ~merged_store_group ();
1457 bool can_be_merged_into (store_immediate_info
*);
1458 void merge_into (store_immediate_info
*);
1459 void merge_overlapping (store_immediate_info
*);
1460 bool apply_stores ();
1462 void do_merge (store_immediate_info
*);
1465 /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1468 dump_char_array (FILE *fd
, unsigned char *ptr
, unsigned int len
)
1473 for (unsigned int i
= 0; i
< len
; i
++)
1474 fprintf (fd
, "%02x ", ptr
[i
]);
1478 /* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
1479 bits between adjacent elements. AMNT should be within
1482 00011111|11100000 << 2 = 01111111|10000000
1483 PTR[1] | PTR[0] PTR[1] | PTR[0]. */
1486 shift_bytes_in_array (unsigned char *ptr
, unsigned int sz
, unsigned int amnt
)
1491 unsigned char carry_over
= 0U;
1492 unsigned char carry_mask
= (~0U) << (unsigned char) (BITS_PER_UNIT
- amnt
);
1493 unsigned char clear_mask
= (~0U) << amnt
;
1495 for (unsigned int i
= 0; i
< sz
; i
++)
1497 unsigned prev_carry_over
= carry_over
;
1498 carry_over
= (ptr
[i
] & carry_mask
) >> (BITS_PER_UNIT
- amnt
);
1503 ptr
[i
] &= clear_mask
;
1504 ptr
[i
] |= prev_carry_over
;
1509 /* Like shift_bytes_in_array but for big-endian.
1510 Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
1511 bits between adjacent elements. AMNT should be within
1514 00011111|11100000 >> 2 = 00000111|11111000
1515 PTR[0] | PTR[1] PTR[0] | PTR[1]. */
1518 shift_bytes_in_array_right (unsigned char *ptr
, unsigned int sz
,
1524 unsigned char carry_over
= 0U;
1525 unsigned char carry_mask
= ~(~0U << amnt
);
1527 for (unsigned int i
= 0; i
< sz
; i
++)
1529 unsigned prev_carry_over
= carry_over
;
1530 carry_over
= ptr
[i
] & carry_mask
;
1532 carry_over
<<= (unsigned char) BITS_PER_UNIT
- amnt
;
1534 ptr
[i
] |= prev_carry_over
;
1538 /* Clear out LEN bits starting from bit START in the byte array
1539 PTR. This clears the bits to the *right* from START.
1540 START must be within [0, BITS_PER_UNIT) and counts starting from
1541 the least significant bit. */
1544 clear_bit_region_be (unsigned char *ptr
, unsigned int start
,
1549 /* Clear len bits to the right of start. */
1550 else if (len
<= start
+ 1)
1552 unsigned char mask
= (~(~0U << len
));
1553 mask
= mask
<< (start
+ 1U - len
);
1556 else if (start
!= BITS_PER_UNIT
- 1)
1558 clear_bit_region_be (ptr
, start
, (start
% BITS_PER_UNIT
) + 1);
1559 clear_bit_region_be (ptr
+ 1, BITS_PER_UNIT
- 1,
1560 len
- (start
% BITS_PER_UNIT
) - 1);
1562 else if (start
== BITS_PER_UNIT
- 1
1563 && len
> BITS_PER_UNIT
)
1565 unsigned int nbytes
= len
/ BITS_PER_UNIT
;
1566 memset (ptr
, 0, nbytes
);
1567 if (len
% BITS_PER_UNIT
!= 0)
1568 clear_bit_region_be (ptr
+ nbytes
, BITS_PER_UNIT
- 1,
1569 len
% BITS_PER_UNIT
);
1575 /* In the byte array PTR clear the bit region starting at bit
1576 START and is LEN bits wide.
1577 For regions spanning multiple bytes do this recursively until we reach
1578 zero LEN or a region contained within a single byte. */
1581 clear_bit_region (unsigned char *ptr
, unsigned int start
,
1584 /* Degenerate base case. */
1587 else if (start
>= BITS_PER_UNIT
)
1588 clear_bit_region (ptr
+ 1, start
- BITS_PER_UNIT
, len
);
1589 /* Second base case. */
1590 else if ((start
+ len
) <= BITS_PER_UNIT
)
1592 unsigned char mask
= (~0U) << (unsigned char) (BITS_PER_UNIT
- len
);
1593 mask
>>= BITS_PER_UNIT
- (start
+ len
);
1599 /* Clear most significant bits in a byte and proceed with the next byte. */
1600 else if (start
!= 0)
1602 clear_bit_region (ptr
, start
, BITS_PER_UNIT
- start
);
1603 clear_bit_region (ptr
+ 1, 0, len
- (BITS_PER_UNIT
- start
));
1605 /* Whole bytes need to be cleared. */
1606 else if (start
== 0 && len
> BITS_PER_UNIT
)
1608 unsigned int nbytes
= len
/ BITS_PER_UNIT
;
1609 /* We could recurse on each byte but we clear whole bytes, so a simple
1611 memset (ptr
, '\0', nbytes
);
1612 /* Clear the remaining sub-byte region if there is one. */
1613 if (len
% BITS_PER_UNIT
!= 0)
1614 clear_bit_region (ptr
+ nbytes
, 0, len
% BITS_PER_UNIT
);
1620 /* Write BITLEN bits of EXPR to the byte array PTR at
1621 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1622 Return true if the operation succeeded. */
1625 encode_tree_to_bitpos (tree expr
, unsigned char *ptr
, int bitlen
, int bitpos
,
1626 unsigned int total_bytes
)
1628 unsigned int first_byte
= bitpos
/ BITS_PER_UNIT
;
1629 bool sub_byte_op_p
= ((bitlen
% BITS_PER_UNIT
)
1630 || (bitpos
% BITS_PER_UNIT
)
1631 || !int_mode_for_size (bitlen
, 0).exists ());
1633 = (TREE_CODE (expr
) == CONSTRUCTOR
1634 && CONSTRUCTOR_NELTS (expr
) == 0
1635 && TYPE_SIZE_UNIT (TREE_TYPE (expr
))
1636 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr
))));
1640 if (first_byte
>= total_bytes
)
1642 total_bytes
-= first_byte
;
1645 unsigned HOST_WIDE_INT rhs_bytes
1646 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr
)));
1647 if (rhs_bytes
> total_bytes
)
1649 memset (ptr
+ first_byte
, '\0', rhs_bytes
);
1652 return native_encode_expr (expr
, ptr
+ first_byte
, total_bytes
) != 0;
1656 We are writing a non byte-sized quantity or at a position that is not
1658 |--------|--------|--------| ptr + first_byte
1660 xxx xxxxxxxx xxx< bp>
1663 First native_encode_expr EXPR into a temporary buffer and shift each
1664 byte in the buffer by 'bp' (carrying the bits over as necessary).
1665 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1666 <------bitlen---->< bp>
1667 Then we clear the destination bits:
1668 |---00000|00000000|000-----| ptr + first_byte
1669 <-------bitlen--->< bp>
1671 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1672 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1675 We are writing a non byte-sized quantity or at a position that is not
1677 ptr + first_byte |--------|--------|--------|
1679 <bp >xxx xxxxxxxx xxx
1682 First native_encode_expr EXPR into a temporary buffer and shift each
1683 byte in the buffer to the right by (carrying the bits over as necessary).
1684 We shift by as much as needed to align the most significant bit of EXPR
1686 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1687 <---bitlen----> <bp ><-----bitlen----->
1688 Then we clear the destination bits:
1689 ptr + first_byte |-----000||00000000||00000---|
1690 <bp ><-------bitlen----->
1692 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1693 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1694 The awkwardness comes from the fact that bitpos is counted from the
1695 most significant bit of a byte. */
1697 /* We must be dealing with fixed-size data at this point, since the
1698 total size is also fixed. */
1699 unsigned int byte_size
;
1702 unsigned HOST_WIDE_INT rhs_bytes
1703 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr
)));
1704 if (rhs_bytes
> total_bytes
)
1706 byte_size
= rhs_bytes
;
1710 fixed_size_mode mode
1711 = as_a
<fixed_size_mode
> (TYPE_MODE (TREE_TYPE (expr
)));
1712 byte_size
= GET_MODE_SIZE (mode
);
1714 /* Allocate an extra byte so that we have space to shift into. */
1716 unsigned char *tmpbuf
= XALLOCAVEC (unsigned char, byte_size
);
1717 memset (tmpbuf
, '\0', byte_size
);
1718 /* The store detection code should only have allowed constants that are
1719 accepted by native_encode_expr or empty ctors. */
1721 && native_encode_expr (expr
, tmpbuf
, byte_size
- 1) == 0)
1724 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1725 bytes to write. This means it can write more than
1726 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1727 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1728 bitlen and zero out the bits that are not relevant as well (that may
1729 contain a sign bit due to sign-extension). */
1730 unsigned int padding
1731 = byte_size
- ROUND_UP (bitlen
, BITS_PER_UNIT
) / BITS_PER_UNIT
- 1;
1732 /* On big-endian the padding is at the 'front' so just skip the initial
1734 if (BYTES_BIG_ENDIAN
)
1737 byte_size
-= padding
;
1739 if (bitlen
% BITS_PER_UNIT
!= 0)
1741 if (BYTES_BIG_ENDIAN
)
1742 clear_bit_region_be (tmpbuf
, BITS_PER_UNIT
- 1,
1743 BITS_PER_UNIT
- (bitlen
% BITS_PER_UNIT
));
1745 clear_bit_region (tmpbuf
, bitlen
,
1746 byte_size
* BITS_PER_UNIT
- bitlen
);
1748 /* Left shifting relies on the last byte being clear if bitlen is
1749 a multiple of BITS_PER_UNIT, which might not be clear if
1750 there are padding bytes. */
1751 else if (!BYTES_BIG_ENDIAN
)
1752 tmpbuf
[byte_size
- 1] = '\0';
1754 /* Clear the bit region in PTR where the bits from TMPBUF will be
1756 if (BYTES_BIG_ENDIAN
)
1757 clear_bit_region_be (ptr
+ first_byte
,
1758 BITS_PER_UNIT
- 1 - (bitpos
% BITS_PER_UNIT
), bitlen
);
1760 clear_bit_region (ptr
+ first_byte
, bitpos
% BITS_PER_UNIT
, bitlen
);
1763 int bitlen_mod
= bitlen
% BITS_PER_UNIT
;
1764 int bitpos_mod
= bitpos
% BITS_PER_UNIT
;
1766 bool skip_byte
= false;
1767 if (BYTES_BIG_ENDIAN
)
1769 /* BITPOS and BITLEN are exactly aligned and no shifting
1771 if (bitpos_mod
+ bitlen_mod
== BITS_PER_UNIT
1772 || (bitpos_mod
== 0 && bitlen_mod
== 0))
1774 /* |. . . . . . . .|
1776 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1777 of the value until it aligns with 'bp' in the next byte over. */
1778 else if (bitpos_mod
+ bitlen_mod
< BITS_PER_UNIT
)
1780 shift_amnt
= bitlen_mod
+ bitpos_mod
;
1781 skip_byte
= bitlen_mod
!= 0;
1783 /* |. . . . . . . .|
1786 Shift the value right within the same byte so it aligns with 'bp'. */
1788 shift_amnt
= bitlen_mod
+ bitpos_mod
- BITS_PER_UNIT
;
1791 shift_amnt
= bitpos
% BITS_PER_UNIT
;
1793 /* Create the shifted version of EXPR. */
1794 if (!BYTES_BIG_ENDIAN
)
1796 shift_bytes_in_array (tmpbuf
, byte_size
, shift_amnt
);
1797 if (shift_amnt
== 0)
1802 gcc_assert (BYTES_BIG_ENDIAN
);
1803 shift_bytes_in_array_right (tmpbuf
, byte_size
, shift_amnt
);
1804 /* If shifting right forced us to move into the next byte skip the now
1813 /* Insert the bits from TMPBUF. */
1814 for (unsigned int i
= 0; i
< byte_size
; i
++)
1815 ptr
[first_byte
+ i
] |= tmpbuf
[i
];
1820 /* Sorting function for store_immediate_info objects.
1821 Sorts them by bitposition. */
1824 sort_by_bitpos (const void *x
, const void *y
)
1826 store_immediate_info
*const *tmp
= (store_immediate_info
* const *) x
;
1827 store_immediate_info
*const *tmp2
= (store_immediate_info
* const *) y
;
1829 if ((*tmp
)->bitpos
< (*tmp2
)->bitpos
)
1831 else if ((*tmp
)->bitpos
> (*tmp2
)->bitpos
)
1834 /* If they are the same let's use the order which is guaranteed to
1836 return (*tmp
)->order
- (*tmp2
)->order
;
1839 /* Sorting function for store_immediate_info objects.
1840 Sorts them by the order field. */
1843 sort_by_order (const void *x
, const void *y
)
1845 store_immediate_info
*const *tmp
= (store_immediate_info
* const *) x
;
1846 store_immediate_info
*const *tmp2
= (store_immediate_info
* const *) y
;
1848 if ((*tmp
)->order
< (*tmp2
)->order
)
1850 else if ((*tmp
)->order
> (*tmp2
)->order
)
1856 /* Initialize a merged_store_group object from a store_immediate_info
1859 merged_store_group::merged_store_group (store_immediate_info
*info
)
1861 start
= info
->bitpos
;
1862 width
= info
->bitsize
;
1863 bitregion_start
= info
->bitregion_start
;
1864 bitregion_end
= info
->bitregion_end
;
1865 /* VAL has memory allocated for it in apply_stores once the group
1866 width has been finalized. */
1869 bit_insertion
= false;
1870 only_constants
= info
->rhs_code
== INTEGER_CST
;
1871 first_nonmergeable_order
= ~0U;
1872 lp_nr
= info
->lp_nr
;
1873 unsigned HOST_WIDE_INT align_bitpos
= 0;
1874 get_object_alignment_1 (gimple_assign_lhs (info
->stmt
),
1875 &align
, &align_bitpos
);
1876 align_base
= start
- align_bitpos
;
1877 for (int i
= 0; i
< 2; ++i
)
1879 store_operand_info
&op
= info
->ops
[i
];
1880 if (op
.base_addr
== NULL_TREE
)
1883 load_align_base
[i
] = 0;
1887 get_object_alignment_1 (op
.val
, &load_align
[i
], &align_bitpos
);
1888 load_align_base
[i
] = op
.bitpos
- align_bitpos
;
1892 stores
.safe_push (info
);
1893 last_stmt
= info
->stmt
;
1894 last_order
= info
->order
;
1895 first_stmt
= last_stmt
;
1896 first_order
= last_order
;
1900 merged_store_group::~merged_store_group ()
1906 /* Return true if the store described by INFO can be merged into the group. */
1909 merged_store_group::can_be_merged_into (store_immediate_info
*info
)
1911 /* Do not merge bswap patterns. */
1912 if (info
->rhs_code
== LROTATE_EXPR
)
1915 if (info
->lp_nr
!= lp_nr
)
1918 /* The canonical case. */
1919 if (info
->rhs_code
== stores
[0]->rhs_code
)
1922 /* BIT_INSERT_EXPR is compatible with INTEGER_CST. */
1923 if (info
->rhs_code
== BIT_INSERT_EXPR
&& stores
[0]->rhs_code
== INTEGER_CST
)
1926 if (stores
[0]->rhs_code
== BIT_INSERT_EXPR
&& info
->rhs_code
== INTEGER_CST
)
1929 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
1930 if (info
->rhs_code
== MEM_REF
1931 && (stores
[0]->rhs_code
== INTEGER_CST
1932 || stores
[0]->rhs_code
== BIT_INSERT_EXPR
)
1933 && info
->bitregion_start
== stores
[0]->bitregion_start
1934 && info
->bitregion_end
== stores
[0]->bitregion_end
)
1937 if (stores
[0]->rhs_code
== MEM_REF
1938 && (info
->rhs_code
== INTEGER_CST
1939 || info
->rhs_code
== BIT_INSERT_EXPR
)
1940 && info
->bitregion_start
== stores
[0]->bitregion_start
1941 && info
->bitregion_end
== stores
[0]->bitregion_end
)
1947 /* Helper method for merge_into and merge_overlapping to do
1951 merged_store_group::do_merge (store_immediate_info
*info
)
1953 bitregion_start
= MIN (bitregion_start
, info
->bitregion_start
);
1954 bitregion_end
= MAX (bitregion_end
, info
->bitregion_end
);
1956 unsigned int this_align
;
1957 unsigned HOST_WIDE_INT align_bitpos
= 0;
1958 get_object_alignment_1 (gimple_assign_lhs (info
->stmt
),
1959 &this_align
, &align_bitpos
);
1960 if (this_align
> align
)
1963 align_base
= info
->bitpos
- align_bitpos
;
1965 for (int i
= 0; i
< 2; ++i
)
1967 store_operand_info
&op
= info
->ops
[i
];
1971 get_object_alignment_1 (op
.val
, &this_align
, &align_bitpos
);
1972 if (this_align
> load_align
[i
])
1974 load_align
[i
] = this_align
;
1975 load_align_base
[i
] = op
.bitpos
- align_bitpos
;
1979 gimple
*stmt
= info
->stmt
;
1980 stores
.safe_push (info
);
1981 if (info
->order
> last_order
)
1983 last_order
= info
->order
;
1986 else if (info
->order
< first_order
)
1988 first_order
= info
->order
;
1991 if (info
->rhs_code
!= INTEGER_CST
)
1992 only_constants
= false;
1995 /* Merge a store recorded by INFO into this merged store.
1996 The store is not overlapping with the existing recorded
2000 merged_store_group::merge_into (store_immediate_info
*info
)
2002 /* Make sure we're inserting in the position we think we're inserting. */
2003 gcc_assert (info
->bitpos
>= start
+ width
2004 && info
->bitregion_start
<= bitregion_end
);
2006 width
= info
->bitpos
+ info
->bitsize
- start
;
2010 /* Merge a store described by INFO into this merged store.
2011 INFO overlaps in some way with the current store (i.e. it's not contiguous
2012 which is handled by merged_store_group::merge_into). */
2015 merged_store_group::merge_overlapping (store_immediate_info
*info
)
2017 /* If the store extends the size of the group, extend the width. */
2018 if (info
->bitpos
+ info
->bitsize
> start
+ width
)
2019 width
= info
->bitpos
+ info
->bitsize
- start
;
2024 /* Go through all the recorded stores in this group in program order and
2025 apply their values to the VAL byte array to create the final merged
2026 value. Return true if the operation succeeded. */
2029 merged_store_group::apply_stores ()
2031 /* Make sure we have more than one store in the group, otherwise we cannot
2033 if (bitregion_start
% BITS_PER_UNIT
!= 0
2034 || bitregion_end
% BITS_PER_UNIT
!= 0
2035 || stores
.length () == 1)
2038 stores
.qsort (sort_by_order
);
2039 store_immediate_info
*info
;
2041 /* Create a power-of-2-sized buffer for native_encode_expr. */
2042 buf_size
= 1 << ceil_log2 ((bitregion_end
- bitregion_start
) / BITS_PER_UNIT
);
2043 val
= XNEWVEC (unsigned char, 2 * buf_size
);
2044 mask
= val
+ buf_size
;
2045 memset (val
, 0, buf_size
);
2046 memset (mask
, ~0U, buf_size
);
2048 FOR_EACH_VEC_ELT (stores
, i
, info
)
2050 unsigned int pos_in_buffer
= info
->bitpos
- bitregion_start
;
2052 if (info
->ops
[0].val
&& info
->ops
[0].base_addr
== NULL_TREE
)
2053 cst
= info
->ops
[0].val
;
2054 else if (info
->ops
[1].val
&& info
->ops
[1].base_addr
== NULL_TREE
)
2055 cst
= info
->ops
[1].val
;
2061 if (info
->rhs_code
== BIT_INSERT_EXPR
)
2062 bit_insertion
= true;
2064 ret
= encode_tree_to_bitpos (cst
, val
, info
->bitsize
,
2065 pos_in_buffer
, buf_size
);
2067 unsigned char *m
= mask
+ (pos_in_buffer
/ BITS_PER_UNIT
);
2068 if (BYTES_BIG_ENDIAN
)
2069 clear_bit_region_be (m
, (BITS_PER_UNIT
- 1
2070 - (pos_in_buffer
% BITS_PER_UNIT
)),
2073 clear_bit_region (m
, pos_in_buffer
% BITS_PER_UNIT
, info
->bitsize
);
2074 if (cst
&& dump_file
&& (dump_flags
& TDF_DETAILS
))
2078 fputs ("After writing ", dump_file
);
2079 print_generic_expr (dump_file
, cst
, TDF_NONE
);
2080 fprintf (dump_file
, " of size " HOST_WIDE_INT_PRINT_DEC
2081 " at position %d\n", info
->bitsize
, pos_in_buffer
);
2082 fputs (" the merged value contains ", dump_file
);
2083 dump_char_array (dump_file
, val
, buf_size
);
2084 fputs (" the merged mask contains ", dump_file
);
2085 dump_char_array (dump_file
, mask
, buf_size
);
2087 fputs (" bit insertion is required\n", dump_file
);
2090 fprintf (dump_file
, "Failed to merge stores\n");
2095 stores
.qsort (sort_by_bitpos
);
2099 /* Structure describing the store chain. */
2101 class imm_store_chain_info
2104 /* Doubly-linked list that imposes an order on chain processing.
2105 PNXP (prev's next pointer) points to the head of a list, or to
2106 the next field in the previous chain in the list.
2107 See pass_store_merging::m_stores_head for more rationale. */
2108 imm_store_chain_info
*next
, **pnxp
;
2110 auto_vec
<store_immediate_info
*> m_store_info
;
2111 auto_vec
<merged_store_group
*> m_merged_store_groups
;
2113 imm_store_chain_info (imm_store_chain_info
*&inspt
, tree b_a
)
2114 : next (inspt
), pnxp (&inspt
), base_addr (b_a
)
2119 gcc_checking_assert (pnxp
== next
->pnxp
);
2123 ~imm_store_chain_info ()
2128 gcc_checking_assert (&next
== next
->pnxp
);
2132 bool terminate_and_process_chain ();
2133 bool try_coalesce_bswap (merged_store_group
*, unsigned int, unsigned int);
2134 bool coalesce_immediate_stores ();
2135 bool output_merged_store (merged_store_group
*);
2136 bool output_merged_stores ();
2139 const pass_data pass_data_tree_store_merging
= {
2140 GIMPLE_PASS
, /* type */
2141 "store-merging", /* name */
2142 OPTGROUP_NONE
, /* optinfo_flags */
2143 TV_GIMPLE_STORE_MERGING
, /* tv_id */
2144 PROP_ssa
, /* properties_required */
2145 0, /* properties_provided */
2146 0, /* properties_destroyed */
2147 0, /* todo_flags_start */
2148 TODO_update_ssa
, /* todo_flags_finish */
2151 class pass_store_merging
: public gimple_opt_pass
2154 pass_store_merging (gcc::context
*ctxt
)
2155 : gimple_opt_pass (pass_data_tree_store_merging
, ctxt
), m_stores_head ()
2159 /* Pass not supported for PDP-endian, nor for insane hosts or
2160 target character sizes where native_{encode,interpret}_expr
2161 doesn't work properly. */
2165 return flag_store_merging
2166 && BYTES_BIG_ENDIAN
== WORDS_BIG_ENDIAN
2168 && BITS_PER_UNIT
== 8;
2171 virtual unsigned int execute (function
*);
2174 hash_map
<tree_operand_hash
, class imm_store_chain_info
*> m_stores
;
2176 /* Form a doubly-linked stack of the elements of m_stores, so that
2177 we can iterate over them in a predictable way. Using this order
2178 avoids extraneous differences in the compiler output just because
2179 of tree pointer variations (e.g. different chains end up in
2180 different positions of m_stores, so they are handled in different
2181 orders, so they allocate or release SSA names in different
2182 orders, and when they get reused, subsequent passes end up
2183 getting different SSA names, which may ultimately change
2184 decisions when going out of SSA). */
2185 imm_store_chain_info
*m_stores_head
;
2187 bool process_store (gimple
*);
2188 bool terminate_and_process_chain (imm_store_chain_info
*);
2189 bool terminate_all_aliasing_chains (imm_store_chain_info
**, gimple
*);
2190 bool terminate_and_process_all_chains ();
2191 }; // class pass_store_merging
2193 /* Terminate and process all recorded chains. Return true if any changes
2197 pass_store_merging::terminate_and_process_all_chains ()
2200 while (m_stores_head
)
2201 ret
|= terminate_and_process_chain (m_stores_head
);
2202 gcc_assert (m_stores
.is_empty ());
2206 /* Terminate all chains that are affected by the statement STMT.
2207 CHAIN_INFO is the chain we should ignore from the checks if
2208 non-NULL. Return true if any changes were made. */
2211 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2217 /* If the statement doesn't touch memory it can't alias. */
2218 if (!gimple_vuse (stmt
))
2221 tree store_lhs
= gimple_store_p (stmt
) ? gimple_get_lhs (stmt
) : NULL_TREE
;
2222 ao_ref store_lhs_ref
;
2223 ao_ref_init (&store_lhs_ref
, store_lhs
);
2224 for (imm_store_chain_info
*next
= m_stores_head
, *cur
= next
; cur
; cur
= next
)
2228 /* We already checked all the stores in chain_info and terminated the
2229 chain if necessary. Skip it here. */
2230 if (chain_info
&& *chain_info
== cur
)
2233 store_immediate_info
*info
;
2235 FOR_EACH_VEC_ELT (cur
->m_store_info
, i
, info
)
2237 tree lhs
= gimple_assign_lhs (info
->stmt
);
2239 ao_ref_init (&lhs_ref
, lhs
);
2240 if (ref_maybe_used_by_stmt_p (stmt
, &lhs_ref
)
2241 || stmt_may_clobber_ref_p_1 (stmt
, &lhs_ref
)
2242 || (store_lhs
&& refs_may_alias_p_1 (&store_lhs_ref
,
2245 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2247 fprintf (dump_file
, "stmt causes chain termination:\n");
2248 print_gimple_stmt (dump_file
, stmt
, 0);
2250 ret
|= terminate_and_process_chain (cur
);
2259 /* Helper function. Terminate the recorded chain storing to base object
2260 BASE. Return true if the merging and output was successful. The m_stores
2261 entry is removed after the processing in any case. */
2264 pass_store_merging::terminate_and_process_chain (imm_store_chain_info
*chain_info
)
2266 bool ret
= chain_info
->terminate_and_process_chain ();
2267 m_stores
.remove (chain_info
->base_addr
);
2272 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2273 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2274 be able to sink load of REF across stores between FIRST and LAST, up
2275 to right before LAST. */
2278 stmts_may_clobber_ref_p (gimple
*first
, gimple
*last
, tree ref
)
2281 ao_ref_init (&r
, ref
);
2282 unsigned int count
= 0;
2283 tree vop
= gimple_vdef (last
);
2286 /* Return true conservatively if the basic blocks are different. */
2287 if (gimple_bb (first
) != gimple_bb (last
))
2292 stmt
= SSA_NAME_DEF_STMT (vop
);
2293 if (stmt_may_clobber_ref_p_1 (stmt
, &r
))
2295 if (gimple_store_p (stmt
)
2296 && refs_anti_dependent_p (ref
, gimple_get_lhs (stmt
)))
2298 /* Avoid quadratic compile time by bounding the number of checks
2300 if (++count
> MAX_STORE_ALIAS_CHECKS
)
2302 vop
= gimple_vuse (stmt
);
2304 while (stmt
!= first
);
2309 /* Return true if INFO->ops[IDX] is mergeable with the
2310 corresponding loads already in MERGED_STORE group.
2311 BASE_ADDR is the base address of the whole store group. */
2314 compatible_load_p (merged_store_group
*merged_store
,
2315 store_immediate_info
*info
,
2316 tree base_addr
, int idx
)
2318 store_immediate_info
*infof
= merged_store
->stores
[0];
2319 if (!info
->ops
[idx
].base_addr
2320 || maybe_ne (info
->ops
[idx
].bitpos
- infof
->ops
[idx
].bitpos
,
2321 info
->bitpos
- infof
->bitpos
)
2322 || !operand_equal_p (info
->ops
[idx
].base_addr
,
2323 infof
->ops
[idx
].base_addr
, 0))
2326 store_immediate_info
*infol
= merged_store
->stores
.last ();
2327 tree load_vuse
= gimple_vuse (info
->ops
[idx
].stmt
);
2328 /* In this case all vuses should be the same, e.g.
2329 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2331 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2332 and we can emit the coalesced load next to any of those loads. */
2333 if (gimple_vuse (infof
->ops
[idx
].stmt
) == load_vuse
2334 && gimple_vuse (infol
->ops
[idx
].stmt
) == load_vuse
)
2337 /* Otherwise, at least for now require that the load has the same
2338 vuse as the store. See following examples. */
2339 if (gimple_vuse (info
->stmt
) != load_vuse
)
2342 if (gimple_vuse (infof
->stmt
) != gimple_vuse (infof
->ops
[idx
].stmt
)
2344 && gimple_vuse (infol
->stmt
) != gimple_vuse (infol
->ops
[idx
].stmt
)))
2347 /* If the load is from the same location as the store, already
2348 the construction of the immediate chain info guarantees no intervening
2349 stores, so no further checks are needed. Example:
2350 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
2351 if (known_eq (info
->ops
[idx
].bitpos
, info
->bitpos
)
2352 && operand_equal_p (info
->ops
[idx
].base_addr
, base_addr
, 0))
2355 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2356 of the stores in the group, or any other stores in between those.
2357 Previous calls to compatible_load_p ensured that for all the
2358 merged_store->stores IDX loads, no stmts starting with
2359 merged_store->first_stmt and ending right before merged_store->last_stmt
2360 clobbers those loads. */
2361 gimple
*first
= merged_store
->first_stmt
;
2362 gimple
*last
= merged_store
->last_stmt
;
2364 store_immediate_info
*infoc
;
2365 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2366 comes before the so far first load, we'll be changing
2367 merged_store->first_stmt. In that case we need to give up if
2368 any of the earlier processed loads clobber with the stmts in the new
2370 if (info
->order
< merged_store
->first_order
)
2372 FOR_EACH_VEC_ELT (merged_store
->stores
, i
, infoc
)
2373 if (stmts_may_clobber_ref_p (info
->stmt
, first
, infoc
->ops
[idx
].val
))
2377 /* Similarly, we could change merged_store->last_stmt, so ensure
2378 in that case no stmts in the new range clobber any of the earlier
2380 else if (info
->order
> merged_store
->last_order
)
2382 FOR_EACH_VEC_ELT (merged_store
->stores
, i
, infoc
)
2383 if (stmts_may_clobber_ref_p (last
, info
->stmt
, infoc
->ops
[idx
].val
))
2387 /* And finally, we'd be adding a new load to the set, ensure it isn't
2388 clobbered in the new range. */
2389 if (stmts_may_clobber_ref_p (first
, last
, info
->ops
[idx
].val
))
2392 /* Otherwise, we are looking for:
2393 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2395 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2399 /* Add all refs loaded to compute VAL to REFS vector. */
2402 gather_bswap_load_refs (vec
<tree
> *refs
, tree val
)
2404 if (TREE_CODE (val
) != SSA_NAME
)
2407 gimple
*stmt
= SSA_NAME_DEF_STMT (val
);
2408 if (!is_gimple_assign (stmt
))
2411 if (gimple_assign_load_p (stmt
))
2413 refs
->safe_push (gimple_assign_rhs1 (stmt
));
2417 switch (gimple_assign_rhs_class (stmt
))
2419 case GIMPLE_BINARY_RHS
:
2420 gather_bswap_load_refs (refs
, gimple_assign_rhs2 (stmt
));
2422 case GIMPLE_UNARY_RHS
:
2423 gather_bswap_load_refs (refs
, gimple_assign_rhs1 (stmt
));
2430 /* Check if there are any stores in M_STORE_INFO after index I
2431 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2432 a potential group ending with END that have their order
2433 smaller than LAST_ORDER. RHS_CODE is the kind of store in the
2434 group. Return true if there are no such stores.
2436 MEM[(long long int *)p_28] = 0;
2437 MEM[(long long int *)p_28 + 8B] = 0;
2438 MEM[(long long int *)p_28 + 16B] = 0;
2439 MEM[(long long int *)p_28 + 24B] = 0;
2441 MEM[(int *)p_28 + 8B] = _129;
2442 MEM[(int *)p_28].a = -1;
2444 MEM[(long long int *)p_28] = 0;
2445 MEM[(int *)p_28].a = -1;
2446 stmts in the current group and need to consider if it is safe to
2447 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2448 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2449 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2450 into the group and merging of those 3 stores is successful, merged
2451 stmts will be emitted at the latest store from that group, i.e.
2452 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2453 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2454 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2455 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2456 into the group. That way it will be its own store group and will
2457 not be touched. If RHS_CODE is INTEGER_CST and there are overlapping
2458 INTEGER_CST stores, those are mergeable using merge_overlapping,
2459 so don't return false for those. */
2462 check_no_overlap (vec
<store_immediate_info
*> m_store_info
, unsigned int i
,
2463 enum tree_code rhs_code
, unsigned int last_order
,
2464 unsigned HOST_WIDE_INT end
)
2466 unsigned int len
= m_store_info
.length ();
2467 for (++i
; i
< len
; ++i
)
2469 store_immediate_info
*info
= m_store_info
[i
];
2470 if (info
->bitpos
>= end
)
2472 if (info
->order
< last_order
2473 && (rhs_code
!= INTEGER_CST
|| info
->rhs_code
!= INTEGER_CST
))
2479 /* Return true if m_store_info[first] and at least one following store
2480 form a group which store try_size bitsize value which is byte swapped
2481 from a memory load or some value, or identity from some value.
2482 This uses the bswap pass APIs. */
2485 imm_store_chain_info::try_coalesce_bswap (merged_store_group
*merged_store
,
2487 unsigned int try_size
)
2489 unsigned int len
= m_store_info
.length (), last
= first
;
2490 unsigned HOST_WIDE_INT width
= m_store_info
[first
]->bitsize
;
2491 if (width
>= try_size
)
2493 for (unsigned int i
= first
+ 1; i
< len
; ++i
)
2495 if (m_store_info
[i
]->bitpos
!= m_store_info
[first
]->bitpos
+ width
2496 || m_store_info
[i
]->ins_stmt
== NULL
)
2498 width
+= m_store_info
[i
]->bitsize
;
2499 if (width
>= try_size
)
2505 if (width
!= try_size
)
2508 bool allow_unaligned
2509 = !STRICT_ALIGNMENT
&& param_store_merging_allow_unaligned
;
2510 /* Punt if the combined store would not be aligned and we need alignment. */
2511 if (!allow_unaligned
)
2513 unsigned int align
= merged_store
->align
;
2514 unsigned HOST_WIDE_INT align_base
= merged_store
->align_base
;
2515 for (unsigned int i
= first
+ 1; i
<= last
; ++i
)
2517 unsigned int this_align
;
2518 unsigned HOST_WIDE_INT align_bitpos
= 0;
2519 get_object_alignment_1 (gimple_assign_lhs (m_store_info
[i
]->stmt
),
2520 &this_align
, &align_bitpos
);
2521 if (this_align
> align
)
2524 align_base
= m_store_info
[i
]->bitpos
- align_bitpos
;
2527 unsigned HOST_WIDE_INT align_bitpos
2528 = (m_store_info
[first
]->bitpos
- align_base
) & (align
- 1);
2530 align
= least_bit_hwi (align_bitpos
);
2531 if (align
< try_size
)
2538 case 16: type
= uint16_type_node
; break;
2539 case 32: type
= uint32_type_node
; break;
2540 case 64: type
= uint64_type_node
; break;
2541 default: gcc_unreachable ();
2543 struct symbolic_number n
;
2544 gimple
*ins_stmt
= NULL
;
2545 int vuse_store
= -1;
2546 unsigned int first_order
= merged_store
->first_order
;
2547 unsigned int last_order
= merged_store
->last_order
;
2548 gimple
*first_stmt
= merged_store
->first_stmt
;
2549 gimple
*last_stmt
= merged_store
->last_stmt
;
2550 unsigned HOST_WIDE_INT end
= merged_store
->start
+ merged_store
->width
;
2551 store_immediate_info
*infof
= m_store_info
[first
];
2553 for (unsigned int i
= first
; i
<= last
; ++i
)
2555 store_immediate_info
*info
= m_store_info
[i
];
2556 struct symbolic_number this_n
= info
->n
;
2558 if (!this_n
.base_addr
)
2559 this_n
.range
= try_size
/ BITS_PER_UNIT
;
2561 /* Update vuse in case it has changed by output_merged_stores. */
2562 this_n
.vuse
= gimple_vuse (info
->ins_stmt
);
2563 unsigned int bitpos
= info
->bitpos
- infof
->bitpos
;
2564 if (!do_shift_rotate (LSHIFT_EXPR
, &this_n
,
2566 ? try_size
- info
->bitsize
- bitpos
2569 if (this_n
.base_addr
&& vuse_store
)
2572 for (j
= first
; j
<= last
; ++j
)
2573 if (this_n
.vuse
== gimple_vuse (m_store_info
[j
]->stmt
))
2577 if (vuse_store
== 1)
2585 ins_stmt
= info
->ins_stmt
;
2589 if (n
.base_addr
&& n
.vuse
!= this_n
.vuse
)
2591 if (vuse_store
== 0)
2595 if (info
->order
> last_order
)
2597 last_order
= info
->order
;
2598 last_stmt
= info
->stmt
;
2600 else if (info
->order
< first_order
)
2602 first_order
= info
->order
;
2603 first_stmt
= info
->stmt
;
2605 end
= MAX (end
, info
->bitpos
+ info
->bitsize
);
2607 ins_stmt
= perform_symbolic_merge (ins_stmt
, &n
, info
->ins_stmt
,
2609 if (ins_stmt
== NULL
)
2614 uint64_t cmpxchg
, cmpnop
;
2615 find_bswap_or_nop_finalize (&n
, &cmpxchg
, &cmpnop
);
2617 /* A complete byte swap should make the symbolic number to start with
2618 the largest digit in the highest order byte. Unchanged symbolic
2619 number indicates a read with same endianness as target architecture. */
2620 if (n
.n
!= cmpnop
&& n
.n
!= cmpxchg
)
2623 if (n
.base_addr
== NULL_TREE
&& !is_gimple_val (n
.src
))
2626 if (!check_no_overlap (m_store_info
, last
, LROTATE_EXPR
, last_order
, end
))
2629 /* Don't handle memory copy this way if normal non-bswap processing
2630 would handle it too. */
2631 if (n
.n
== cmpnop
&& (unsigned) n
.n_ops
== last
- first
+ 1)
2634 for (i
= first
; i
<= last
; ++i
)
2635 if (m_store_info
[i
]->rhs_code
!= MEM_REF
)
2645 /* Will emit LROTATE_EXPR. */
2648 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
2649 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
)
2653 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
2654 && optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
)
2661 if (!allow_unaligned
&& n
.base_addr
)
2663 unsigned int align
= get_object_alignment (n
.src
);
2664 if (align
< try_size
)
2668 /* If each load has vuse of the corresponding store, need to verify
2669 the loads can be sunk right before the last store. */
2670 if (vuse_store
== 1)
2672 auto_vec
<tree
, 64> refs
;
2673 for (unsigned int i
= first
; i
<= last
; ++i
)
2674 gather_bswap_load_refs (&refs
,
2675 gimple_assign_rhs1 (m_store_info
[i
]->stmt
));
2679 FOR_EACH_VEC_ELT (refs
, i
, ref
)
2680 if (stmts_may_clobber_ref_p (first_stmt
, last_stmt
, ref
))
2686 infof
->ins_stmt
= ins_stmt
;
2687 for (unsigned int i
= first
; i
<= last
; ++i
)
2689 m_store_info
[i
]->rhs_code
= n
.n
== cmpxchg
? LROTATE_EXPR
: NOP_EXPR
;
2690 m_store_info
[i
]->ops
[0].base_addr
= NULL_TREE
;
2691 m_store_info
[i
]->ops
[1].base_addr
= NULL_TREE
;
2693 merged_store
->merge_into (m_store_info
[i
]);
2699 /* Go through the candidate stores recorded in m_store_info and merge them
2700 into merged_store_group objects recorded into m_merged_store_groups
2701 representing the widened stores. Return true if coalescing was successful
2702 and the number of widened stores is fewer than the original number
2706 imm_store_chain_info::coalesce_immediate_stores ()
2708 /* Anything less can't be processed. */
2709 if (m_store_info
.length () < 2)
2712 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2713 fprintf (dump_file
, "Attempting to coalesce %u stores in chain\n",
2714 m_store_info
.length ());
2716 store_immediate_info
*info
;
2717 unsigned int i
, ignore
= 0;
2719 /* Order the stores by the bitposition they write to. */
2720 m_store_info
.qsort (sort_by_bitpos
);
2722 info
= m_store_info
[0];
2723 merged_store_group
*merged_store
= new merged_store_group (info
);
2724 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2725 fputs ("New store group\n", dump_file
);
2727 FOR_EACH_VEC_ELT (m_store_info
, i
, info
)
2729 unsigned HOST_WIDE_INT new_bitregion_start
, new_bitregion_end
;
2734 /* First try to handle group of stores like:
2739 using the bswap framework. */
2740 if (info
->bitpos
== merged_store
->start
+ merged_store
->width
2741 && merged_store
->stores
.length () == 1
2742 && merged_store
->stores
[0]->ins_stmt
!= NULL
2743 && info
->ins_stmt
!= NULL
)
2745 unsigned int try_size
;
2746 for (try_size
= 64; try_size
>= 16; try_size
>>= 1)
2747 if (try_coalesce_bswap (merged_store
, i
- 1, try_size
))
2752 ignore
= i
+ merged_store
->stores
.length () - 1;
2753 m_merged_store_groups
.safe_push (merged_store
);
2754 if (ignore
< m_store_info
.length ())
2755 merged_store
= new merged_store_group (m_store_info
[ignore
]);
2757 merged_store
= NULL
;
2763 = MIN (merged_store
->bitregion_start
, info
->bitregion_start
);
2765 = MAX (merged_store
->bitregion_end
, info
->bitregion_end
);
2767 if (info
->order
>= merged_store
->first_nonmergeable_order
2768 || (((new_bitregion_end
- new_bitregion_start
+ 1) / BITS_PER_UNIT
)
2769 > (unsigned) param_store_merging_max_size
))
2774 Overlapping stores. */
2775 else if (IN_RANGE (info
->bitpos
, merged_store
->start
,
2776 merged_store
->start
+ merged_store
->width
- 1))
2778 /* Only allow overlapping stores of constants. */
2779 if (info
->rhs_code
== INTEGER_CST
2780 && merged_store
->only_constants
2781 && info
->lp_nr
== merged_store
->lp_nr
)
2783 unsigned int last_order
2784 = MAX (merged_store
->last_order
, info
->order
);
2785 unsigned HOST_WIDE_INT end
2786 = MAX (merged_store
->start
+ merged_store
->width
,
2787 info
->bitpos
+ info
->bitsize
);
2788 if (check_no_overlap (m_store_info
, i
, INTEGER_CST
,
2791 /* check_no_overlap call above made sure there are no
2792 overlapping stores with non-INTEGER_CST rhs_code
2793 in between the first and last of the stores we've
2794 just merged. If there are any INTEGER_CST rhs_code
2795 stores in between, we need to merge_overlapping them
2796 even if in the sort_by_bitpos order there are other
2797 overlapping stores in between. Keep those stores as is.
2799 MEM[(int *)p_28] = 0;
2800 MEM[(char *)p_28 + 3B] = 1;
2801 MEM[(char *)p_28 + 1B] = 2;
2802 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
2803 We can't merge the zero store with the store of two and
2804 not merge anything else, because the store of one is
2805 in the original order in between those two, but in
2806 store_by_bitpos order it comes after the last store that
2807 we can't merge with them. We can merge the first 3 stores
2808 and keep the last store as is though. */
2809 unsigned int len
= m_store_info
.length ();
2810 unsigned int try_order
= last_order
;
2811 unsigned int first_nonmergeable_order
;
2813 bool last_iter
= false;
2817 unsigned int max_order
= 0;
2818 unsigned first_nonmergeable_int_order
= ~0U;
2819 unsigned HOST_WIDE_INT this_end
= end
;
2821 first_nonmergeable_order
= ~0U;
2822 for (unsigned int j
= i
+ 1; j
< len
; ++j
)
2824 store_immediate_info
*info2
= m_store_info
[j
];
2825 if (info2
->bitpos
>= this_end
)
2827 if (info2
->order
< try_order
)
2829 if (info2
->rhs_code
!= INTEGER_CST
)
2831 /* Normally check_no_overlap makes sure this
2832 doesn't happen, but if end grows below,
2833 then we need to process more stores than
2834 check_no_overlap verified. Example:
2835 MEM[(int *)p_5] = 0;
2836 MEM[(short *)p_5 + 3B] = 1;
2837 MEM[(char *)p_5 + 4B] = _9;
2838 MEM[(char *)p_5 + 2B] = 2; */
2843 this_end
= MAX (this_end
,
2844 info2
->bitpos
+ info2
->bitsize
);
2846 else if (info2
->rhs_code
== INTEGER_CST
2849 max_order
= MAX (max_order
, info2
->order
+ 1);
2850 first_nonmergeable_int_order
2851 = MIN (first_nonmergeable_int_order
,
2855 first_nonmergeable_order
2856 = MIN (first_nonmergeable_order
, info2
->order
);
2860 if (last_order
== try_order
)
2862 /* If this failed, but only because we grew
2863 try_order, retry with the last working one,
2864 so that we merge at least something. */
2865 try_order
= last_order
;
2869 last_order
= try_order
;
2870 /* Retry with a larger try_order to see if we could
2871 merge some further INTEGER_CST stores. */
2873 && (first_nonmergeable_int_order
2874 < first_nonmergeable_order
))
2876 try_order
= MIN (max_order
,
2877 first_nonmergeable_order
);
2880 merged_store
->first_nonmergeable_order
);
2881 if (try_order
> last_order
&& ++attempts
< 16)
2884 first_nonmergeable_order
2885 = MIN (first_nonmergeable_order
,
2886 first_nonmergeable_int_order
);
2894 merged_store
->merge_overlapping (info
);
2896 merged_store
->first_nonmergeable_order
2897 = MIN (merged_store
->first_nonmergeable_order
,
2898 first_nonmergeable_order
);
2900 for (unsigned int j
= i
+ 1; j
<= k
; j
++)
2902 store_immediate_info
*info2
= m_store_info
[j
];
2903 gcc_assert (info2
->bitpos
< end
);
2904 if (info2
->order
< last_order
)
2906 gcc_assert (info2
->rhs_code
== INTEGER_CST
);
2908 merged_store
->merge_overlapping (info2
);
2910 /* Other stores are kept and not merged in any
2919 /* |---store 1---||---store 2---|
2920 This store is consecutive to the previous one.
2921 Merge it into the current store group. There can be gaps in between
2922 the stores, but there can't be gaps in between bitregions. */
2923 else if (info
->bitregion_start
<= merged_store
->bitregion_end
2924 && merged_store
->can_be_merged_into (info
))
2926 store_immediate_info
*infof
= merged_store
->stores
[0];
2928 /* All the rhs_code ops that take 2 operands are commutative,
2929 swap the operands if it could make the operands compatible. */
2930 if (infof
->ops
[0].base_addr
2931 && infof
->ops
[1].base_addr
2932 && info
->ops
[0].base_addr
2933 && info
->ops
[1].base_addr
2934 && known_eq (info
->ops
[1].bitpos
- infof
->ops
[0].bitpos
,
2935 info
->bitpos
- infof
->bitpos
)
2936 && operand_equal_p (info
->ops
[1].base_addr
,
2937 infof
->ops
[0].base_addr
, 0))
2939 std::swap (info
->ops
[0], info
->ops
[1]);
2940 info
->ops_swapped_p
= true;
2942 if (check_no_overlap (m_store_info
, i
, info
->rhs_code
,
2943 MAX (merged_store
->last_order
, info
->order
),
2944 MAX (merged_store
->start
+ merged_store
->width
,
2945 info
->bitpos
+ info
->bitsize
)))
2947 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
2948 if (info
->rhs_code
== MEM_REF
&& infof
->rhs_code
!= MEM_REF
)
2950 info
->rhs_code
= BIT_INSERT_EXPR
;
2951 info
->ops
[0].val
= gimple_assign_rhs1 (info
->stmt
);
2952 info
->ops
[0].base_addr
= NULL_TREE
;
2954 else if (infof
->rhs_code
== MEM_REF
&& info
->rhs_code
!= MEM_REF
)
2956 store_immediate_info
*infoj
;
2958 FOR_EACH_VEC_ELT (merged_store
->stores
, j
, infoj
)
2960 infoj
->rhs_code
= BIT_INSERT_EXPR
;
2961 infoj
->ops
[0].val
= gimple_assign_rhs1 (infoj
->stmt
);
2962 infoj
->ops
[0].base_addr
= NULL_TREE
;
2965 if ((infof
->ops
[0].base_addr
2966 ? compatible_load_p (merged_store
, info
, base_addr
, 0)
2967 : !info
->ops
[0].base_addr
)
2968 && (infof
->ops
[1].base_addr
2969 ? compatible_load_p (merged_store
, info
, base_addr
, 1)
2970 : !info
->ops
[1].base_addr
))
2972 merged_store
->merge_into (info
);
2978 /* |---store 1---| <gap> |---store 2---|.
2979 Gap between stores or the rhs not compatible. Start a new group. */
2981 /* Try to apply all the stores recorded for the group to determine
2982 the bitpattern they write and discard it if that fails.
2983 This will also reject single-store groups. */
2984 if (merged_store
->apply_stores ())
2985 m_merged_store_groups
.safe_push (merged_store
);
2987 delete merged_store
;
2989 merged_store
= new merged_store_group (info
);
2990 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2991 fputs ("New store group\n", dump_file
);
2994 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2996 fprintf (dump_file
, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
2997 " bitpos:" HOST_WIDE_INT_PRINT_DEC
" val:",
2998 i
, info
->bitsize
, info
->bitpos
);
2999 print_generic_expr (dump_file
, gimple_assign_rhs1 (info
->stmt
));
3000 fputc ('\n', dump_file
);
3004 /* Record or discard the last store group. */
3007 if (merged_store
->apply_stores ())
3008 m_merged_store_groups
.safe_push (merged_store
);
3010 delete merged_store
;
3013 gcc_assert (m_merged_store_groups
.length () <= m_store_info
.length ());
3016 = !m_merged_store_groups
.is_empty ()
3017 && m_merged_store_groups
.length () < m_store_info
.length ();
3019 if (success
&& dump_file
)
3020 fprintf (dump_file
, "Coalescing successful!\nMerged into %u stores\n",
3021 m_merged_store_groups
.length ());
3026 /* Return the type to use for the merged stores or loads described by STMTS.
3027 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3028 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3029 of the MEM_REFs if any. */
3032 get_alias_type_for_stmts (vec
<gimple
*> &stmts
, bool is_load
,
3033 unsigned short *cliquep
, unsigned short *basep
)
3037 tree type
= NULL_TREE
;
3038 tree ret
= NULL_TREE
;
3042 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
3044 tree ref
= is_load
? gimple_assign_rhs1 (stmt
)
3045 : gimple_assign_lhs (stmt
);
3046 tree type1
= reference_alias_ptr_type (ref
);
3047 tree base
= get_base_address (ref
);
3051 if (TREE_CODE (base
) == MEM_REF
)
3053 *cliquep
= MR_DEPENDENCE_CLIQUE (base
);
3054 *basep
= MR_DEPENDENCE_BASE (base
);
3059 if (!alias_ptr_types_compatible_p (type
, type1
))
3060 ret
= ptr_type_node
;
3061 if (TREE_CODE (base
) != MEM_REF
3062 || *cliquep
!= MR_DEPENDENCE_CLIQUE (base
)
3063 || *basep
!= MR_DEPENDENCE_BASE (base
))
3072 /* Return the location_t information we can find among the statements
3076 get_location_for_stmts (vec
<gimple
*> &stmts
)
3081 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
3082 if (gimple_has_location (stmt
))
3083 return gimple_location (stmt
);
3085 return UNKNOWN_LOCATION
;
3088 /* Used to decribe a store resulting from splitting a wide store in smaller
3089 regularly-sized stores in split_group. */
3094 unsigned HOST_WIDE_INT bytepos
;
3095 unsigned HOST_WIDE_INT size
;
3096 unsigned HOST_WIDE_INT align
;
3097 auto_vec
<store_immediate_info
*> orig_stores
;
3098 /* True if there is a single orig stmt covering the whole split store. */
3100 split_store (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
3101 unsigned HOST_WIDE_INT
);
3104 /* Simple constructor. */
3106 split_store::split_store (unsigned HOST_WIDE_INT bp
,
3107 unsigned HOST_WIDE_INT sz
,
3108 unsigned HOST_WIDE_INT al
)
3109 : bytepos (bp
), size (sz
), align (al
), orig (false)
3111 orig_stores
.create (0);
3114 /* Record all stores in GROUP that write to the region starting at BITPOS and
3115 is of size BITSIZE. Record infos for such statements in STORES if
3116 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
3117 if there is exactly one original store in the range (in that case ignore
3118 clobber stmts, unless there are only clobber stmts). */
3120 static store_immediate_info
*
3121 find_constituent_stores (class merged_store_group
*group
,
3122 vec
<store_immediate_info
*> *stores
,
3123 unsigned int *first
,
3124 unsigned HOST_WIDE_INT bitpos
,
3125 unsigned HOST_WIDE_INT bitsize
)
3127 store_immediate_info
*info
, *ret
= NULL
;
3129 bool second
= false;
3130 bool update_first
= true;
3131 unsigned HOST_WIDE_INT end
= bitpos
+ bitsize
;
3132 for (i
= *first
; group
->stores
.iterate (i
, &info
); ++i
)
3134 unsigned HOST_WIDE_INT stmt_start
= info
->bitpos
;
3135 unsigned HOST_WIDE_INT stmt_end
= stmt_start
+ info
->bitsize
;
3136 if (stmt_end
<= bitpos
)
3138 /* BITPOS passed to this function never decreases from within the
3139 same split_group call, so optimize and don't scan info records
3140 which are known to end before or at BITPOS next time.
3141 Only do it if all stores before this one also pass this. */
3147 update_first
= false;
3149 /* The stores in GROUP are ordered by bitposition so if we're past
3150 the region for this group return early. */
3151 if (stmt_start
>= end
)
3154 if (gimple_clobber_p (info
->stmt
))
3157 stores
->safe_push (info
);
3164 stores
->safe_push (info
);
3165 if (ret
&& !gimple_clobber_p (ret
->stmt
))
3171 else if (ret
&& !gimple_clobber_p (ret
->stmt
))
3179 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3180 store have multiple uses. If any SSA_NAME has multiple uses, also
3181 count statements needed to compute it. */
3184 count_multiple_uses (store_immediate_info
*info
)
3186 gimple
*stmt
= info
->stmt
;
3188 switch (info
->rhs_code
)
3195 if (info
->bit_not_p
)
3197 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3198 ret
= 1; /* Fall through below to return
3199 the BIT_NOT_EXPR stmt and then
3200 BIT_{AND,IOR,XOR}_EXPR and anything it
3203 /* stmt is after this the BIT_NOT_EXPR. */
3204 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3206 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3208 ret
+= 1 + info
->ops
[0].bit_not_p
;
3209 if (info
->ops
[1].base_addr
)
3210 ret
+= 1 + info
->ops
[1].bit_not_p
;
3213 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3214 /* stmt is now the BIT_*_EXPR. */
3215 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3216 ret
+= 1 + info
->ops
[info
->ops_swapped_p
].bit_not_p
;
3217 else if (info
->ops
[info
->ops_swapped_p
].bit_not_p
)
3219 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3220 if (!has_single_use (gimple_assign_rhs1 (stmt2
)))
3223 if (info
->ops
[1].base_addr
== NULL_TREE
)
3225 gcc_checking_assert (!info
->ops_swapped_p
);
3228 if (!has_single_use (gimple_assign_rhs2 (stmt
)))
3229 ret
+= 1 + info
->ops
[1 - info
->ops_swapped_p
].bit_not_p
;
3230 else if (info
->ops
[1 - info
->ops_swapped_p
].bit_not_p
)
3232 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3233 if (!has_single_use (gimple_assign_rhs1 (stmt2
)))
3238 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3239 return 1 + info
->ops
[0].bit_not_p
;
3240 else if (info
->ops
[0].bit_not_p
)
3242 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3243 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3247 case BIT_INSERT_EXPR
:
3248 return has_single_use (gimple_assign_rhs1 (stmt
)) ? 0 : 1;
3254 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3255 vector (if non-NULL) with split_store structs describing the byte offset
3256 (from the base), the bit size and alignment of each store as well as the
3257 original statements involved in each such split group.
3258 This is to separate the splitting strategy from the statement
3259 building/emission/linking done in output_merged_store.
3260 Return number of new stores.
3261 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3262 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3263 BZERO_FIRST may be true only when the first store covers the whole group
3264 and clears it; if BZERO_FIRST is true, keep that first store in the set
3265 unmodified and emit further stores for the overrides only.
3266 If SPLIT_STORES is NULL, it is just a dry run to count number of
3270 split_group (merged_store_group
*group
, bool allow_unaligned_store
,
3271 bool allow_unaligned_load
, bool bzero_first
,
3272 vec
<split_store
*> *split_stores
,
3273 unsigned *total_orig
,
3274 unsigned *total_new
)
3276 unsigned HOST_WIDE_INT pos
= group
->bitregion_start
;
3277 unsigned HOST_WIDE_INT size
= group
->bitregion_end
- pos
;
3278 unsigned HOST_WIDE_INT bytepos
= pos
/ BITS_PER_UNIT
;
3279 unsigned HOST_WIDE_INT group_align
= group
->align
;
3280 unsigned HOST_WIDE_INT align_base
= group
->align_base
;
3281 unsigned HOST_WIDE_INT group_load_align
= group_align
;
3282 bool any_orig
= false;
3284 gcc_assert ((size
% BITS_PER_UNIT
== 0) && (pos
% BITS_PER_UNIT
== 0));
3286 if (group
->stores
[0]->rhs_code
== LROTATE_EXPR
3287 || group
->stores
[0]->rhs_code
== NOP_EXPR
)
3289 gcc_assert (!bzero_first
);
3290 /* For bswap framework using sets of stores, all the checking
3291 has been done earlier in try_coalesce_bswap and needs to be
3292 emitted as a single store. */
3295 /* Avoid the old/new stmt count heuristics. It should be
3296 always beneficial. */
3303 unsigned HOST_WIDE_INT align_bitpos
3304 = (group
->start
- align_base
) & (group_align
- 1);
3305 unsigned HOST_WIDE_INT align
= group_align
;
3307 align
= least_bit_hwi (align_bitpos
);
3308 bytepos
= group
->start
/ BITS_PER_UNIT
;
3310 = new split_store (bytepos
, group
->width
, align
);
3311 unsigned int first
= 0;
3312 find_constituent_stores (group
, &store
->orig_stores
,
3313 &first
, group
->start
, group
->width
);
3314 split_stores
->safe_push (store
);
3320 unsigned int ret
= 0, first
= 0;
3321 unsigned HOST_WIDE_INT try_pos
= bytepos
;
3326 store_immediate_info
*info
= group
->stores
[0];
3329 total_orig
[0] = 1; /* The orig store. */
3330 info
= group
->stores
[0];
3331 if (info
->ops
[0].base_addr
)
3333 if (info
->ops
[1].base_addr
)
3335 switch (info
->rhs_code
)
3340 total_orig
[0]++; /* The orig BIT_*_EXPR stmt. */
3345 total_orig
[0] *= group
->stores
.length ();
3347 FOR_EACH_VEC_ELT (group
->stores
, i
, info
)
3349 total_new
[0] += count_multiple_uses (info
);
3350 total_orig
[0] += (info
->bit_not_p
3351 + info
->ops
[0].bit_not_p
3352 + info
->ops
[1].bit_not_p
);
3356 if (!allow_unaligned_load
)
3357 for (int i
= 0; i
< 2; ++i
)
3358 if (group
->load_align
[i
])
3359 group_load_align
= MIN (group_load_align
, group
->load_align
[i
]);
3363 store_immediate_info
*gstore
;
3364 FOR_EACH_VEC_ELT (group
->stores
, first
, gstore
)
3365 if (!gimple_clobber_p (gstore
->stmt
))
3372 = new split_store (bytepos
, gstore
->bitsize
, align_base
);
3373 store
->orig_stores
.safe_push (gstore
);
3376 split_stores
->safe_push (store
);
3382 if ((allow_unaligned_store
|| group_align
<= BITS_PER_UNIT
)
3383 && (group
->mask
[try_pos
- bytepos
] == (unsigned char) ~0U
3384 || (bzero_first
&& group
->val
[try_pos
- bytepos
] == 0)))
3386 /* Skip padding bytes. */
3388 size
-= BITS_PER_UNIT
;
3392 unsigned HOST_WIDE_INT try_bitpos
= try_pos
* BITS_PER_UNIT
;
3393 unsigned int try_size
= MAX_STORE_BITSIZE
, nonmasked
;
3394 unsigned HOST_WIDE_INT align_bitpos
3395 = (try_bitpos
- align_base
) & (group_align
- 1);
3396 unsigned HOST_WIDE_INT align
= group_align
;
3397 bool found_orig
= false;
3399 align
= least_bit_hwi (align_bitpos
);
3400 if (!allow_unaligned_store
)
3401 try_size
= MIN (try_size
, align
);
3402 if (!allow_unaligned_load
)
3404 /* If we can't do or don't want to do unaligned stores
3405 as well as loads, we need to take the loads into account
3407 unsigned HOST_WIDE_INT load_align
= group_load_align
;
3408 align_bitpos
= (try_bitpos
- align_base
) & (load_align
- 1);
3410 load_align
= least_bit_hwi (align_bitpos
);
3411 for (int i
= 0; i
< 2; ++i
)
3412 if (group
->load_align
[i
])
3415 = known_alignment (try_bitpos
3416 - group
->stores
[0]->bitpos
3417 + group
->stores
[0]->ops
[i
].bitpos
3418 - group
->load_align_base
[i
]);
3419 if (align_bitpos
& (group_load_align
- 1))
3421 unsigned HOST_WIDE_INT a
= least_bit_hwi (align_bitpos
);
3422 load_align
= MIN (load_align
, a
);
3425 try_size
= MIN (try_size
, load_align
);
3427 store_immediate_info
*info
3428 = find_constituent_stores (group
, NULL
, &first
, try_bitpos
, try_size
);
3429 if (info
&& !gimple_clobber_p (info
->stmt
))
3431 /* If there is just one original statement for the range, see if
3432 we can just reuse the original store which could be even larger
3434 unsigned HOST_WIDE_INT stmt_end
3435 = ROUND_UP (info
->bitpos
+ info
->bitsize
, BITS_PER_UNIT
);
3436 info
= find_constituent_stores (group
, NULL
, &first
, try_bitpos
,
3437 stmt_end
- try_bitpos
);
3438 if (info
&& info
->bitpos
>= try_bitpos
)
3440 store_immediate_info
*info2
= NULL
;
3441 unsigned int first_copy
= first
;
3442 if (info
->bitpos
> try_bitpos
3443 && stmt_end
- try_bitpos
<= try_size
)
3445 info2
= find_constituent_stores (group
, NULL
, &first_copy
,
3447 info
->bitpos
- try_bitpos
);
3448 gcc_assert (info2
== NULL
|| gimple_clobber_p (info2
->stmt
));
3450 if (info2
== NULL
&& stmt_end
- try_bitpos
< try_size
)
3452 info2
= find_constituent_stores (group
, NULL
, &first_copy
,
3454 (try_bitpos
+ try_size
)
3456 gcc_assert (info2
== NULL
|| gimple_clobber_p (info2
->stmt
));
3460 try_size
= stmt_end
- try_bitpos
;
3467 /* Approximate store bitsize for the case when there are no padding
3469 while (try_size
> size
)
3471 /* Now look for whole padding bytes at the end of that bitsize. */
3472 for (nonmasked
= try_size
/ BITS_PER_UNIT
; nonmasked
> 0; --nonmasked
)
3473 if (group
->mask
[try_pos
- bytepos
+ nonmasked
- 1]
3474 != (unsigned char) ~0U
3476 || group
->val
[try_pos
- bytepos
+ nonmasked
- 1] != 0))
3478 if (nonmasked
== 0 || (info
&& gimple_clobber_p (info
->stmt
)))
3480 /* If entire try_size range is padding, skip it. */
3481 try_pos
+= try_size
/ BITS_PER_UNIT
;
3485 /* Otherwise try to decrease try_size if second half, last 3 quarters
3486 etc. are padding. */
3487 nonmasked
*= BITS_PER_UNIT
;
3488 while (nonmasked
<= try_size
/ 2)
3490 if (!allow_unaligned_store
&& group_align
> BITS_PER_UNIT
)
3492 /* Now look for whole padding bytes at the start of that bitsize. */
3493 unsigned int try_bytesize
= try_size
/ BITS_PER_UNIT
, masked
;
3494 for (masked
= 0; masked
< try_bytesize
; ++masked
)
3495 if (group
->mask
[try_pos
- bytepos
+ masked
] != (unsigned char) ~0U
3497 || group
->val
[try_pos
- bytepos
+ masked
] != 0))
3499 masked
*= BITS_PER_UNIT
;
3500 gcc_assert (masked
< try_size
);
3501 if (masked
>= try_size
/ 2)
3503 while (masked
>= try_size
/ 2)
3506 try_pos
+= try_size
/ BITS_PER_UNIT
;
3510 /* Need to recompute the alignment, so just retry at the new
3522 = new split_store (try_pos
, try_size
, align
);
3523 info
= find_constituent_stores (group
, &store
->orig_stores
,
3524 &first
, try_bitpos
, try_size
);
3526 && !gimple_clobber_p (info
->stmt
)
3527 && info
->bitpos
>= try_bitpos
3528 && info
->bitpos
+ info
->bitsize
<= try_bitpos
+ try_size
3529 && (store
->orig_stores
.length () == 1
3531 || (info
->bitpos
== try_bitpos
3532 && (info
->bitpos
+ info
->bitsize
3533 == try_bitpos
+ try_size
))))
3538 split_stores
->safe_push (store
);
3541 try_pos
+= try_size
/ BITS_PER_UNIT
;
3549 /* If we are reusing some original stores and any of the
3550 original SSA_NAMEs had multiple uses, we need to subtract
3551 those now before we add the new ones. */
3552 if (total_new
[0] && any_orig
)
3554 FOR_EACH_VEC_ELT (*split_stores
, i
, store
)
3556 total_new
[0] -= count_multiple_uses (store
->orig_stores
[0]);
3558 total_new
[0] += ret
; /* The new store. */
3559 store_immediate_info
*info
= group
->stores
[0];
3560 if (info
->ops
[0].base_addr
)
3561 total_new
[0] += ret
;
3562 if (info
->ops
[1].base_addr
)
3563 total_new
[0] += ret
;
3564 switch (info
->rhs_code
)
3569 total_new
[0] += ret
; /* The new BIT_*_EXPR stmt. */
3574 FOR_EACH_VEC_ELT (*split_stores
, i
, store
)
3577 bool bit_not_p
[3] = { false, false, false };
3578 /* If all orig_stores have certain bit_not_p set, then
3579 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3580 If some orig_stores have certain bit_not_p set, then
3581 we'd use a BIT_XOR_EXPR with a mask and need to account for
3583 FOR_EACH_VEC_ELT (store
->orig_stores
, j
, info
)
3585 if (info
->ops
[0].bit_not_p
)
3586 bit_not_p
[0] = true;
3587 if (info
->ops
[1].bit_not_p
)
3588 bit_not_p
[1] = true;
3589 if (info
->bit_not_p
)
3590 bit_not_p
[2] = true;
3592 total_new
[0] += bit_not_p
[0] + bit_not_p
[1] + bit_not_p
[2];
3600 /* Return the operation through which the operand IDX (if < 2) or
3601 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3602 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3603 the bits should be xored with mask. */
3605 static enum tree_code
3606 invert_op (split_store
*split_store
, int idx
, tree int_type
, tree
&mask
)
3609 store_immediate_info
*info
;
3610 unsigned int cnt
= 0;
3611 bool any_paddings
= false;
3612 FOR_EACH_VEC_ELT (split_store
->orig_stores
, i
, info
)
3614 bool bit_not_p
= idx
< 2 ? info
->ops
[idx
].bit_not_p
: info
->bit_not_p
;
3618 tree lhs
= gimple_assign_lhs (info
->stmt
);
3619 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3620 && TYPE_PRECISION (TREE_TYPE (lhs
)) < info
->bitsize
)
3621 any_paddings
= true;
3627 if (cnt
== split_store
->orig_stores
.length () && !any_paddings
)
3628 return BIT_NOT_EXPR
;
3630 unsigned HOST_WIDE_INT try_bitpos
= split_store
->bytepos
* BITS_PER_UNIT
;
3631 unsigned buf_size
= split_store
->size
/ BITS_PER_UNIT
;
3633 = XALLOCAVEC (unsigned char, buf_size
);
3634 memset (buf
, ~0U, buf_size
);
3635 FOR_EACH_VEC_ELT (split_store
->orig_stores
, i
, info
)
3637 bool bit_not_p
= idx
< 2 ? info
->ops
[idx
].bit_not_p
: info
->bit_not_p
;
3640 /* Clear regions with bit_not_p and invert afterwards, rather than
3641 clear regions with !bit_not_p, so that gaps in between stores aren't
3643 unsigned HOST_WIDE_INT bitsize
= info
->bitsize
;
3644 unsigned HOST_WIDE_INT prec
= bitsize
;
3645 unsigned int pos_in_buffer
= 0;
3648 tree lhs
= gimple_assign_lhs (info
->stmt
);
3649 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3650 && TYPE_PRECISION (TREE_TYPE (lhs
)) < bitsize
)
3651 prec
= TYPE_PRECISION (TREE_TYPE (lhs
));
3653 if (info
->bitpos
< try_bitpos
)
3655 gcc_assert (info
->bitpos
+ bitsize
> try_bitpos
);
3656 if (!BYTES_BIG_ENDIAN
)
3658 if (prec
<= try_bitpos
- info
->bitpos
)
3660 prec
-= try_bitpos
- info
->bitpos
;
3662 bitsize
-= try_bitpos
- info
->bitpos
;
3663 if (BYTES_BIG_ENDIAN
&& prec
> bitsize
)
3667 pos_in_buffer
= info
->bitpos
- try_bitpos
;
3670 /* If this is a bool inversion, invert just the least significant
3671 prec bits rather than all bits of it. */
3672 if (BYTES_BIG_ENDIAN
)
3674 pos_in_buffer
+= bitsize
- prec
;
3675 if (pos_in_buffer
>= split_store
->size
)
3680 if (pos_in_buffer
+ bitsize
> split_store
->size
)
3681 bitsize
= split_store
->size
- pos_in_buffer
;
3682 unsigned char *p
= buf
+ (pos_in_buffer
/ BITS_PER_UNIT
);
3683 if (BYTES_BIG_ENDIAN
)
3684 clear_bit_region_be (p
, (BITS_PER_UNIT
- 1
3685 - (pos_in_buffer
% BITS_PER_UNIT
)), bitsize
);
3687 clear_bit_region (p
, pos_in_buffer
% BITS_PER_UNIT
, bitsize
);
3689 for (unsigned int i
= 0; i
< buf_size
; ++i
)
3691 mask
= native_interpret_expr (int_type
, buf
, buf_size
);
3692 return BIT_XOR_EXPR
;
3695 /* Given a merged store group GROUP output the widened version of it.
3696 The store chain is against the base object BASE.
3697 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
3698 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
3699 Make sure that the number of statements output is less than the number of
3700 original statements. If a better sequence is possible emit it and
3704 imm_store_chain_info::output_merged_store (merged_store_group
*group
)
3706 split_store
*split_store
;
3708 unsigned HOST_WIDE_INT start_byte_pos
3709 = group
->bitregion_start
/ BITS_PER_UNIT
;
3711 unsigned int orig_num_stmts
= group
->stores
.length ();
3712 if (orig_num_stmts
< 2)
3715 auto_vec
<class split_store
*, 32> split_stores
;
3716 bool allow_unaligned_store
3717 = !STRICT_ALIGNMENT
&& param_store_merging_allow_unaligned
;
3718 bool allow_unaligned_load
= allow_unaligned_store
;
3719 bool bzero_first
= false;
3720 store_immediate_info
*store
;
3721 unsigned int num_clobber_stmts
= 0;
3722 if (group
->stores
[0]->rhs_code
== INTEGER_CST
)
3724 FOR_EACH_VEC_ELT (group
->stores
, i
, store
)
3725 if (gimple_clobber_p (store
->stmt
))
3726 num_clobber_stmts
++;
3727 else if (TREE_CODE (gimple_assign_rhs1 (store
->stmt
)) == CONSTRUCTOR
3728 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store
->stmt
)) == 0
3729 && group
->start
== store
->bitpos
3730 && group
->width
== store
->bitsize
3731 && (group
->start
% BITS_PER_UNIT
) == 0
3732 && (group
->width
% BITS_PER_UNIT
) == 0)
3739 FOR_EACH_VEC_ELT_FROM (group
->stores
, i
, store
, i
)
3740 if (gimple_clobber_p (store
->stmt
))
3741 num_clobber_stmts
++;
3742 if (num_clobber_stmts
== orig_num_stmts
)
3744 orig_num_stmts
-= num_clobber_stmts
;
3746 if (allow_unaligned_store
|| bzero_first
)
3748 /* If unaligned stores are allowed, see how many stores we'd emit
3749 for unaligned and how many stores we'd emit for aligned stores.
3750 Only use unaligned stores if it allows fewer stores than aligned.
3751 Similarly, if there is a whole region clear first, prefer expanding
3752 it together compared to expanding clear first followed by merged
3754 unsigned cnt
[4] = { ~0, ~0, ~0, ~0 };
3756 for (int pass
= 0; pass
< 4; ++pass
)
3758 if (!allow_unaligned_store
&& (pass
& 1) != 0)
3760 if (!bzero_first
&& (pass
& 2) != 0)
3762 cnt
[pass
] = split_group (group
, (pass
& 1) != 0,
3763 allow_unaligned_load
, (pass
& 2) != 0,
3765 if (cnt
[pass
] < cnt
[pass_min
])
3768 if ((pass_min
& 1) == 0)
3769 allow_unaligned_store
= false;
3770 if ((pass_min
& 2) == 0)
3771 bzero_first
= false;
3773 unsigned total_orig
, total_new
;
3774 split_group (group
, allow_unaligned_store
, allow_unaligned_load
, bzero_first
,
3775 &split_stores
, &total_orig
, &total_new
);
3777 /* Determine if there is a clobber covering the whole group at the start,
3778 followed by proposed split stores that cover the whole group. In that
3779 case, prefer the transformation even if
3780 split_stores.length () == orig_num_stmts. */
3781 bool clobber_first
= false;
3782 if (num_clobber_stmts
3783 && gimple_clobber_p (group
->stores
[0]->stmt
)
3784 && group
->start
== group
->stores
[0]->bitpos
3785 && group
->width
== group
->stores
[0]->bitsize
3786 && (group
->start
% BITS_PER_UNIT
) == 0
3787 && (group
->width
% BITS_PER_UNIT
) == 0)
3789 clobber_first
= true;
3790 unsigned HOST_WIDE_INT pos
= group
->start
/ BITS_PER_UNIT
;
3791 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
3792 if (split_store
->bytepos
!= pos
)
3794 clobber_first
= false;
3798 pos
+= split_store
->size
/ BITS_PER_UNIT
;
3799 if (pos
!= (group
->start
+ group
->width
) / BITS_PER_UNIT
)
3800 clobber_first
= false;
3803 if (split_stores
.length () >= orig_num_stmts
+ clobber_first
)
3806 /* We didn't manage to reduce the number of statements. Bail out. */
3807 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3808 fprintf (dump_file
, "Exceeded original number of stmts (%u)."
3809 " Not profitable to emit new sequence.\n",
3811 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
3815 if (total_orig
<= total_new
)
3817 /* If number of estimated new statements is above estimated original
3818 statements, bail out too. */
3819 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3820 fprintf (dump_file
, "Estimated number of original stmts (%u)"
3821 " not larger than estimated number of new"
3823 total_orig
, total_new
);
3824 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
3828 if (group
->stores
[0]->rhs_code
== INTEGER_CST
)
3830 bool all_orig
= true;
3831 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
3832 if (!split_store
->orig
)
3839 unsigned int cnt
= split_stores
.length ();
3840 store_immediate_info
*store
;
3841 FOR_EACH_VEC_ELT (group
->stores
, i
, store
)
3842 if (gimple_clobber_p (store
->stmt
))
3844 /* Punt if we wouldn't make any real changes, i.e. keep all
3845 orig stmts + all clobbers. */
3846 if (cnt
== group
->stores
.length ())
3848 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3849 fprintf (dump_file
, "Exceeded original number of stmts (%u)."
3850 " Not profitable to emit new sequence.\n",
3852 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
3859 gimple_stmt_iterator last_gsi
= gsi_for_stmt (group
->last_stmt
);
3860 gimple_seq seq
= NULL
;
3861 tree last_vdef
, new_vuse
;
3862 last_vdef
= gimple_vdef (group
->last_stmt
);
3863 new_vuse
= gimple_vuse (group
->last_stmt
);
3864 tree bswap_res
= NULL_TREE
;
3866 /* Clobbers are not removed. */
3867 if (gimple_clobber_p (group
->last_stmt
))
3869 new_vuse
= make_ssa_name (gimple_vop (cfun
), group
->last_stmt
);
3870 gimple_set_vdef (group
->last_stmt
, new_vuse
);
3873 if (group
->stores
[0]->rhs_code
== LROTATE_EXPR
3874 || group
->stores
[0]->rhs_code
== NOP_EXPR
)
3876 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
3877 gimple
*ins_stmt
= group
->stores
[0]->ins_stmt
;
3878 struct symbolic_number
*n
= &group
->stores
[0]->n
;
3879 bool bswap
= group
->stores
[0]->rhs_code
== LROTATE_EXPR
;
3884 load_type
= bswap_type
= uint16_type_node
;
3887 load_type
= uint32_type_node
;
3890 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
3891 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
3895 load_type
= uint64_type_node
;
3898 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
3899 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
3906 /* If the loads have each vuse of the corresponding store,
3907 we've checked the aliasing already in try_coalesce_bswap and
3908 we want to sink the need load into seq. So need to use new_vuse
3912 if (n
->vuse
== NULL
)
3918 /* Update vuse in case it has changed by output_merged_stores. */
3919 n
->vuse
= gimple_vuse (ins_stmt
);
3921 bswap_res
= bswap_replace (gsi_start (seq
), ins_stmt
, fndecl
,
3922 bswap_type
, load_type
, n
, bswap
);
3923 gcc_assert (bswap_res
);
3926 gimple
*stmt
= NULL
;
3927 auto_vec
<gimple
*, 32> orig_stmts
;
3928 gimple_seq this_seq
;
3929 tree addr
= force_gimple_operand_1 (unshare_expr (base_addr
), &this_seq
,
3930 is_gimple_mem_ref_addr
, NULL_TREE
);
3931 gimple_seq_add_seq_without_update (&seq
, this_seq
);
3933 tree load_addr
[2] = { NULL_TREE
, NULL_TREE
};
3934 gimple_seq load_seq
[2] = { NULL
, NULL
};
3935 gimple_stmt_iterator load_gsi
[2] = { gsi_none (), gsi_none () };
3936 for (int j
= 0; j
< 2; ++j
)
3938 store_operand_info
&op
= group
->stores
[0]->ops
[j
];
3939 if (op
.base_addr
== NULL_TREE
)
3942 store_immediate_info
*infol
= group
->stores
.last ();
3943 if (gimple_vuse (op
.stmt
) == gimple_vuse (infol
->ops
[j
].stmt
))
3945 /* We can't pick the location randomly; while we've verified
3946 all the loads have the same vuse, they can be still in different
3947 basic blocks and we need to pick the one from the last bb:
3953 otherwise if we put the wider load at the q[0] load, we might
3954 segfault if q[1] is not mapped. */
3955 basic_block bb
= gimple_bb (op
.stmt
);
3956 gimple
*ostmt
= op
.stmt
;
3957 store_immediate_info
*info
;
3958 FOR_EACH_VEC_ELT (group
->stores
, i
, info
)
3960 gimple
*tstmt
= info
->ops
[j
].stmt
;
3961 basic_block tbb
= gimple_bb (tstmt
);
3962 if (dominated_by_p (CDI_DOMINATORS
, tbb
, bb
))
3968 load_gsi
[j
] = gsi_for_stmt (ostmt
);
3970 = force_gimple_operand_1 (unshare_expr (op
.base_addr
),
3971 &load_seq
[j
], is_gimple_mem_ref_addr
,
3974 else if (operand_equal_p (base_addr
, op
.base_addr
, 0))
3975 load_addr
[j
] = addr
;
3979 = force_gimple_operand_1 (unshare_expr (op
.base_addr
),
3980 &this_seq
, is_gimple_mem_ref_addr
,
3982 gimple_seq_add_seq_without_update (&seq
, this_seq
);
3986 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
3988 unsigned HOST_WIDE_INT try_size
= split_store
->size
;
3989 unsigned HOST_WIDE_INT try_pos
= split_store
->bytepos
;
3990 unsigned HOST_WIDE_INT try_bitpos
= try_pos
* BITS_PER_UNIT
;
3991 unsigned HOST_WIDE_INT align
= split_store
->align
;
3994 if (split_store
->orig
)
3996 /* If there is just a single non-clobber constituent store
3997 which covers the whole area, just reuse the lhs and rhs. */
3998 gimple
*orig_stmt
= NULL
;
3999 store_immediate_info
*store
;
4001 FOR_EACH_VEC_ELT (split_store
->orig_stores
, j
, store
)
4002 if (!gimple_clobber_p (store
->stmt
))
4004 orig_stmt
= store
->stmt
;
4007 dest
= gimple_assign_lhs (orig_stmt
);
4008 src
= gimple_assign_rhs1 (orig_stmt
);
4009 loc
= gimple_location (orig_stmt
);
4013 store_immediate_info
*info
;
4014 unsigned short clique
, base
;
4016 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4017 orig_stmts
.safe_push (info
->stmt
);
4019 = get_alias_type_for_stmts (orig_stmts
, false, &clique
, &base
);
4020 loc
= get_location_for_stmts (orig_stmts
);
4021 orig_stmts
.truncate (0);
4023 tree int_type
= build_nonstandard_integer_type (try_size
, UNSIGNED
);
4024 int_type
= build_aligned_type (int_type
, align
);
4025 dest
= fold_build2 (MEM_REF
, int_type
, addr
,
4026 build_int_cst (offset_type
, try_pos
));
4027 if (TREE_CODE (dest
) == MEM_REF
)
4029 MR_DEPENDENCE_CLIQUE (dest
) = clique
;
4030 MR_DEPENDENCE_BASE (dest
) = base
;
4035 mask
= integer_zero_node
;
4037 mask
= native_interpret_expr (int_type
,
4038 group
->mask
+ try_pos
4044 j
< 1 + (split_store
->orig_stores
[0]->ops
[1].val
!= NULL_TREE
);
4047 store_operand_info
&op
= split_store
->orig_stores
[0]->ops
[j
];
4050 else if (op
.base_addr
)
4052 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4053 orig_stmts
.safe_push (info
->ops
[j
].stmt
);
4055 offset_type
= get_alias_type_for_stmts (orig_stmts
, true,
4057 location_t load_loc
= get_location_for_stmts (orig_stmts
);
4058 orig_stmts
.truncate (0);
4060 unsigned HOST_WIDE_INT load_align
= group
->load_align
[j
];
4061 unsigned HOST_WIDE_INT align_bitpos
4062 = known_alignment (try_bitpos
4063 - split_store
->orig_stores
[0]->bitpos
4065 if (align_bitpos
& (load_align
- 1))
4066 load_align
= least_bit_hwi (align_bitpos
);
4069 = build_nonstandard_integer_type (try_size
, UNSIGNED
);
4071 = build_aligned_type (load_int_type
, load_align
);
4073 poly_uint64 load_pos
4074 = exact_div (try_bitpos
4075 - split_store
->orig_stores
[0]->bitpos
4078 ops
[j
] = fold_build2 (MEM_REF
, load_int_type
, load_addr
[j
],
4079 build_int_cst (offset_type
, load_pos
));
4080 if (TREE_CODE (ops
[j
]) == MEM_REF
)
4082 MR_DEPENDENCE_CLIQUE (ops
[j
]) = clique
;
4083 MR_DEPENDENCE_BASE (ops
[j
]) = base
;
4085 if (!integer_zerop (mask
))
4086 /* The load might load some bits (that will be masked off
4087 later on) uninitialized, avoid -W*uninitialized
4088 warnings in that case. */
4089 TREE_NO_WARNING (ops
[j
]) = 1;
4091 stmt
= gimple_build_assign (make_ssa_name (int_type
),
4093 gimple_set_location (stmt
, load_loc
);
4094 if (gsi_bb (load_gsi
[j
]))
4096 gimple_set_vuse (stmt
, gimple_vuse (op
.stmt
));
4097 gimple_seq_add_stmt_without_update (&load_seq
[j
], stmt
);
4101 gimple_set_vuse (stmt
, new_vuse
);
4102 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4104 ops
[j
] = gimple_assign_lhs (stmt
);
4106 enum tree_code inv_op
4107 = invert_op (split_store
, j
, int_type
, xor_mask
);
4108 if (inv_op
!= NOP_EXPR
)
4110 stmt
= gimple_build_assign (make_ssa_name (int_type
),
4111 inv_op
, ops
[j
], xor_mask
);
4112 gimple_set_location (stmt
, load_loc
);
4113 ops
[j
] = gimple_assign_lhs (stmt
);
4115 if (gsi_bb (load_gsi
[j
]))
4116 gimple_seq_add_stmt_without_update (&load_seq
[j
],
4119 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4123 ops
[j
] = native_interpret_expr (int_type
,
4124 group
->val
+ try_pos
4129 switch (split_store
->orig_stores
[0]->rhs_code
)
4134 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4136 tree rhs1
= gimple_assign_rhs1 (info
->stmt
);
4137 orig_stmts
.safe_push (SSA_NAME_DEF_STMT (rhs1
));
4140 bit_loc
= get_location_for_stmts (orig_stmts
);
4141 orig_stmts
.truncate (0);
4144 = gimple_build_assign (make_ssa_name (int_type
),
4145 split_store
->orig_stores
[0]->rhs_code
,
4147 gimple_set_location (stmt
, bit_loc
);
4148 /* If there is just one load and there is a separate
4149 load_seq[0], emit the bitwise op right after it. */
4150 if (load_addr
[1] == NULL_TREE
&& gsi_bb (load_gsi
[0]))
4151 gimple_seq_add_stmt_without_update (&load_seq
[0], stmt
);
4152 /* Otherwise, if at least one load is in seq, we need to
4153 emit the bitwise op right before the store. If there
4154 are two loads and are emitted somewhere else, it would
4155 be better to emit the bitwise op as early as possible;
4156 we don't track where that would be possible right now
4159 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4160 src
= gimple_assign_lhs (stmt
);
4162 enum tree_code inv_op
;
4163 inv_op
= invert_op (split_store
, 2, int_type
, xor_mask
);
4164 if (inv_op
!= NOP_EXPR
)
4166 stmt
= gimple_build_assign (make_ssa_name (int_type
),
4167 inv_op
, src
, xor_mask
);
4168 gimple_set_location (stmt
, bit_loc
);
4169 if (load_addr
[1] == NULL_TREE
&& gsi_bb (load_gsi
[0]))
4170 gimple_seq_add_stmt_without_update (&load_seq
[0], stmt
);
4172 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4173 src
= gimple_assign_lhs (stmt
);
4179 if (!is_gimple_val (src
))
4181 stmt
= gimple_build_assign (make_ssa_name (TREE_TYPE (src
)),
4183 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4184 src
= gimple_assign_lhs (stmt
);
4186 if (!useless_type_conversion_p (int_type
, TREE_TYPE (src
)))
4188 stmt
= gimple_build_assign (make_ssa_name (int_type
),
4190 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4191 src
= gimple_assign_lhs (stmt
);
4193 inv_op
= invert_op (split_store
, 2, int_type
, xor_mask
);
4194 if (inv_op
!= NOP_EXPR
)
4196 stmt
= gimple_build_assign (make_ssa_name (int_type
),
4197 inv_op
, src
, xor_mask
);
4198 gimple_set_location (stmt
, loc
);
4199 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4200 src
= gimple_assign_lhs (stmt
);
4208 /* If bit insertion is required, we use the source as an accumulator
4209 into which the successive bit-field values are manually inserted.
4210 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4211 if (group
->bit_insertion
)
4212 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4213 if (info
->rhs_code
== BIT_INSERT_EXPR
4214 && info
->bitpos
< try_bitpos
+ try_size
4215 && info
->bitpos
+ info
->bitsize
> try_bitpos
)
4217 /* Mask, truncate, convert to final type, shift and ior into
4218 the accumulator. Note that every step can be a no-op. */
4219 const HOST_WIDE_INT start_gap
= info
->bitpos
- try_bitpos
;
4220 const HOST_WIDE_INT end_gap
4221 = (try_bitpos
+ try_size
) - (info
->bitpos
+ info
->bitsize
);
4222 tree tem
= info
->ops
[0].val
;
4223 if (TYPE_PRECISION (TREE_TYPE (tem
)) <= info
->bitsize
)
4226 = build_nonstandard_integer_type (info
->bitsize
,
4228 tem
= gimple_convert (&seq
, loc
, bitfield_type
, tem
);
4230 else if ((BYTES_BIG_ENDIAN
? start_gap
: end_gap
) > 0)
4232 const unsigned HOST_WIDE_INT imask
4233 = (HOST_WIDE_INT_1U
<< info
->bitsize
) - 1;
4234 tem
= gimple_build (&seq
, loc
,
4235 BIT_AND_EXPR
, TREE_TYPE (tem
), tem
,
4236 build_int_cst (TREE_TYPE (tem
),
4239 const HOST_WIDE_INT shift
4240 = (BYTES_BIG_ENDIAN
? end_gap
: start_gap
);
4242 tem
= gimple_build (&seq
, loc
,
4243 RSHIFT_EXPR
, TREE_TYPE (tem
), tem
,
4244 build_int_cst (NULL_TREE
, -shift
));
4245 tem
= gimple_convert (&seq
, loc
, int_type
, tem
);
4247 tem
= gimple_build (&seq
, loc
,
4248 LSHIFT_EXPR
, int_type
, tem
,
4249 build_int_cst (NULL_TREE
, shift
));
4250 src
= gimple_build (&seq
, loc
,
4251 BIT_IOR_EXPR
, int_type
, tem
, src
);
4254 if (!integer_zerop (mask
))
4256 tree tem
= make_ssa_name (int_type
);
4257 tree load_src
= unshare_expr (dest
);
4258 /* The load might load some or all bits uninitialized,
4259 avoid -W*uninitialized warnings in that case.
4260 As optimization, it would be nice if all the bits are
4261 provably uninitialized (no stores at all yet or previous
4262 store a CLOBBER) we'd optimize away the load and replace
4264 TREE_NO_WARNING (load_src
) = 1;
4265 stmt
= gimple_build_assign (tem
, load_src
);
4266 gimple_set_location (stmt
, loc
);
4267 gimple_set_vuse (stmt
, new_vuse
);
4268 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4270 /* FIXME: If there is a single chunk of zero bits in mask,
4271 perhaps use BIT_INSERT_EXPR instead? */
4272 stmt
= gimple_build_assign (make_ssa_name (int_type
),
4273 BIT_AND_EXPR
, tem
, mask
);
4274 gimple_set_location (stmt
, loc
);
4275 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4276 tem
= gimple_assign_lhs (stmt
);
4278 if (TREE_CODE (src
) == INTEGER_CST
)
4279 src
= wide_int_to_tree (int_type
,
4280 wi::bit_and_not (wi::to_wide (src
),
4281 wi::to_wide (mask
)));
4285 = wide_int_to_tree (int_type
,
4286 wi::bit_not (wi::to_wide (mask
)));
4287 stmt
= gimple_build_assign (make_ssa_name (int_type
),
4288 BIT_AND_EXPR
, src
, nmask
);
4289 gimple_set_location (stmt
, loc
);
4290 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4291 src
= gimple_assign_lhs (stmt
);
4293 stmt
= gimple_build_assign (make_ssa_name (int_type
),
4294 BIT_IOR_EXPR
, tem
, src
);
4295 gimple_set_location (stmt
, loc
);
4296 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4297 src
= gimple_assign_lhs (stmt
);
4301 stmt
= gimple_build_assign (dest
, src
);
4302 gimple_set_location (stmt
, loc
);
4303 gimple_set_vuse (stmt
, new_vuse
);
4304 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4306 if (group
->lp_nr
&& stmt_could_throw_p (cfun
, stmt
))
4307 add_stmt_to_eh_lp (stmt
, group
->lp_nr
);
4310 if (i
< split_stores
.length () - 1)
4311 new_vdef
= make_ssa_name (gimple_vop (cfun
), stmt
);
4313 new_vdef
= last_vdef
;
4315 gimple_set_vdef (stmt
, new_vdef
);
4316 SSA_NAME_DEF_STMT (new_vdef
) = stmt
;
4317 new_vuse
= new_vdef
;
4320 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4327 "New sequence of %u stores to replace old one of %u stores\n",
4328 split_stores
.length (), orig_num_stmts
);
4329 if (dump_flags
& TDF_DETAILS
)
4330 print_gimple_seq (dump_file
, seq
, 0, TDF_VOPS
| TDF_MEMSYMS
);
4333 if (gimple_clobber_p (group
->last_stmt
))
4334 update_stmt (group
->last_stmt
);
4336 if (group
->lp_nr
> 0)
4338 /* We're going to insert a sequence of (potentially) throwing stores
4339 into an active EH region. This means that we're going to create
4340 new basic blocks with EH edges pointing to the post landing pad
4341 and, therefore, to have to update its PHI nodes, if any. For the
4342 virtual PHI node, we're going to use the VDEFs created above, but
4343 for the other nodes, we need to record the original reaching defs. */
4344 eh_landing_pad lp
= get_eh_landing_pad_from_number (group
->lp_nr
);
4345 basic_block lp_bb
= label_to_block (cfun
, lp
->post_landing_pad
);
4346 basic_block last_bb
= gimple_bb (group
->last_stmt
);
4347 edge last_edge
= find_edge (last_bb
, lp_bb
);
4348 auto_vec
<tree
, 16> last_defs
;
4350 for (gpi
= gsi_start_phis (lp_bb
); !gsi_end_p (gpi
); gsi_next (&gpi
))
4352 gphi
*phi
= gpi
.phi ();
4354 if (virtual_operand_p (gimple_phi_result (phi
)))
4355 last_def
= NULL_TREE
;
4357 last_def
= gimple_phi_arg_def (phi
, last_edge
->dest_idx
);
4358 last_defs
.safe_push (last_def
);
4361 /* Do the insertion. Then, if new basic blocks have been created in the
4362 process, rewind the chain of VDEFs create above to walk the new basic
4363 blocks and update the corresponding arguments of the PHI nodes. */
4364 update_modified_stmts (seq
);
4365 if (gimple_find_sub_bbs (seq
, &last_gsi
))
4366 while (last_vdef
!= gimple_vuse (group
->last_stmt
))
4368 gimple
*stmt
= SSA_NAME_DEF_STMT (last_vdef
);
4369 if (stmt_could_throw_p (cfun
, stmt
))
4371 edge new_edge
= find_edge (gimple_bb (stmt
), lp_bb
);
4373 for (gpi
= gsi_start_phis (lp_bb
), i
= 0;
4375 gsi_next (&gpi
), i
++)
4377 gphi
*phi
= gpi
.phi ();
4379 if (virtual_operand_p (gimple_phi_result (phi
)))
4380 new_def
= last_vdef
;
4382 new_def
= last_defs
[i
];
4383 add_phi_arg (phi
, new_def
, new_edge
, UNKNOWN_LOCATION
);
4386 last_vdef
= gimple_vuse (stmt
);
4390 gsi_insert_seq_after (&last_gsi
, seq
, GSI_SAME_STMT
);
4392 for (int j
= 0; j
< 2; ++j
)
4394 gsi_insert_seq_after (&load_gsi
[j
], load_seq
[j
], GSI_SAME_STMT
);
4399 /* Process the merged_store_group objects created in the coalescing phase.
4400 The stores are all against the base object BASE.
4401 Try to output the widened stores and delete the original statements if
4402 successful. Return true iff any changes were made. */
4405 imm_store_chain_info::output_merged_stores ()
4408 merged_store_group
*merged_store
;
4410 FOR_EACH_VEC_ELT (m_merged_store_groups
, i
, merged_store
)
4412 if (dbg_cnt (store_merging
)
4413 && output_merged_store (merged_store
))
4416 store_immediate_info
*store
;
4417 FOR_EACH_VEC_ELT (merged_store
->stores
, j
, store
)
4419 gimple
*stmt
= store
->stmt
;
4420 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4421 /* Don't remove clobbers, they are still useful even if
4422 everything is overwritten afterwards. */
4423 if (gimple_clobber_p (stmt
))
4425 gsi_remove (&gsi
, true);
4427 remove_stmt_from_eh_lp (stmt
);
4428 if (stmt
!= merged_store
->last_stmt
)
4430 unlink_stmt_vdef (stmt
);
4431 release_defs (stmt
);
4437 if (ret
&& dump_file
)
4438 fprintf (dump_file
, "Merging successful!\n");
4443 /* Coalesce the store_immediate_info objects recorded against the base object
4444 BASE in the first phase and output them.
4445 Delete the allocated structures.
4446 Return true if any changes were made. */
4449 imm_store_chain_info::terminate_and_process_chain ()
4451 /* Process store chain. */
4453 if (m_store_info
.length () > 1)
4455 ret
= coalesce_immediate_stores ();
4457 ret
= output_merged_stores ();
4460 /* Delete all the entries we allocated ourselves. */
4461 store_immediate_info
*info
;
4463 FOR_EACH_VEC_ELT (m_store_info
, i
, info
)
4466 merged_store_group
*merged_info
;
4467 FOR_EACH_VEC_ELT (m_merged_store_groups
, i
, merged_info
)
4473 /* Return true iff LHS is a destination potentially interesting for
4474 store merging. In practice these are the codes that get_inner_reference
4478 lhs_valid_for_store_merging_p (tree lhs
)
4483 switch (TREE_CODE (lhs
))
4486 case ARRAY_RANGE_REF
:
4498 /* Return true if the tree RHS is a constant we want to consider
4499 during store merging. In practice accept all codes that
4500 native_encode_expr accepts. */
4503 rhs_valid_for_store_merging_p (tree rhs
)
4505 unsigned HOST_WIDE_INT size
;
4506 if (TREE_CODE (rhs
) == CONSTRUCTOR
4507 && CONSTRUCTOR_NELTS (rhs
) == 0
4508 && TYPE_SIZE_UNIT (TREE_TYPE (rhs
))
4509 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs
))))
4511 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs
))).is_constant (&size
)
4512 && native_encode_expr (rhs
, NULL
, size
) != 0);
4515 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4516 and return true on success or false on failure. */
4519 adjust_bit_pos (poly_offset_int byte_off
,
4520 poly_int64
*pbitpos
,
4521 poly_uint64
*pbitregion_start
,
4522 poly_uint64
*pbitregion_end
)
4524 poly_offset_int bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4525 bit_off
+= *pbitpos
;
4527 if (known_ge (bit_off
, 0) && bit_off
.to_shwi (pbitpos
))
4529 if (maybe_ne (*pbitregion_end
, 0U))
4531 bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4532 bit_off
+= *pbitregion_start
;
4533 if (bit_off
.to_uhwi (pbitregion_start
))
4535 bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4536 bit_off
+= *pbitregion_end
;
4537 if (!bit_off
.to_uhwi (pbitregion_end
))
4538 *pbitregion_end
= 0;
4541 *pbitregion_end
= 0;
4549 /* If MEM is a memory reference usable for store merging (either as
4550 store destination or for loads), return the non-NULL base_addr
4551 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4552 Otherwise return NULL, *PBITPOS should be still valid even for that
4556 mem_valid_for_store_merging (tree mem
, poly_uint64
*pbitsize
,
4557 poly_uint64
*pbitpos
,
4558 poly_uint64
*pbitregion_start
,
4559 poly_uint64
*pbitregion_end
)
4561 poly_int64 bitsize
, bitpos
;
4562 poly_uint64 bitregion_start
= 0, bitregion_end
= 0;
4564 int unsignedp
= 0, reversep
= 0, volatilep
= 0;
4566 tree base_addr
= get_inner_reference (mem
, &bitsize
, &bitpos
, &offset
, &mode
,
4567 &unsignedp
, &reversep
, &volatilep
);
4568 *pbitsize
= bitsize
;
4569 if (known_eq (bitsize
, 0))
4572 if (TREE_CODE (mem
) == COMPONENT_REF
4573 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem
, 1)))
4575 get_bit_range (&bitregion_start
, &bitregion_end
, mem
, &bitpos
, &offset
);
4576 if (maybe_ne (bitregion_end
, 0U))
4583 /* We do not want to rewrite TARGET_MEM_REFs. */
4584 if (TREE_CODE (base_addr
) == TARGET_MEM_REF
)
4586 /* In some cases get_inner_reference may return a
4587 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4588 canonicalize the base_addr to MEM_REF [ptr] and take
4589 byteoffset into account in the bitpos. This occurs in
4590 PR 23684 and this way we can catch more chains. */
4591 else if (TREE_CODE (base_addr
) == MEM_REF
)
4593 if (!adjust_bit_pos (mem_ref_offset (base_addr
), &bitpos
,
4594 &bitregion_start
, &bitregion_end
))
4596 base_addr
= TREE_OPERAND (base_addr
, 0);
4598 /* get_inner_reference returns the base object, get at its
4602 if (maybe_lt (bitpos
, 0))
4604 base_addr
= build_fold_addr_expr (base_addr
);
4609 /* If the access is variable offset then a base decl has to be
4610 address-taken to be able to emit pointer-based stores to it.
4611 ??? We might be able to get away with re-using the original
4612 base up to the first variable part and then wrapping that inside
4614 tree base
= get_base_address (base_addr
);
4615 if (!base
|| (DECL_P (base
) && !TREE_ADDRESSABLE (base
)))
4618 /* Similarly to above for the base, remove constant from the offset. */
4619 if (TREE_CODE (offset
) == PLUS_EXPR
4620 && TREE_CODE (TREE_OPERAND (offset
, 1)) == INTEGER_CST
4621 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset
, 1)),
4622 &bitpos
, &bitregion_start
, &bitregion_end
))
4623 offset
= TREE_OPERAND (offset
, 0);
4625 base_addr
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base_addr
),
4629 if (known_eq (bitregion_end
, 0U))
4631 bitregion_start
= round_down_to_byte_boundary (bitpos
);
4632 bitregion_end
= round_up_to_byte_boundary (bitpos
+ bitsize
);
4635 *pbitsize
= bitsize
;
4637 *pbitregion_start
= bitregion_start
;
4638 *pbitregion_end
= bitregion_end
;
4642 /* Return true if STMT is a load that can be used for store merging.
4643 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
4644 BITREGION_END are properties of the corresponding store. */
4647 handled_load (gimple
*stmt
, store_operand_info
*op
,
4648 poly_uint64 bitsize
, poly_uint64 bitpos
,
4649 poly_uint64 bitregion_start
, poly_uint64 bitregion_end
)
4651 if (!is_gimple_assign (stmt
))
4653 if (gimple_assign_rhs_code (stmt
) == BIT_NOT_EXPR
)
4655 tree rhs1
= gimple_assign_rhs1 (stmt
);
4656 if (TREE_CODE (rhs1
) == SSA_NAME
4657 && handled_load (SSA_NAME_DEF_STMT (rhs1
), op
, bitsize
, bitpos
,
4658 bitregion_start
, bitregion_end
))
4660 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
4661 been optimized earlier, but if allowed here, would confuse the
4662 multiple uses counting. */
4665 op
->bit_not_p
= !op
->bit_not_p
;
4670 if (gimple_vuse (stmt
)
4671 && gimple_assign_load_p (stmt
)
4672 && !stmt_can_throw_internal (cfun
, stmt
)
4673 && !gimple_has_volatile_ops (stmt
))
4675 tree mem
= gimple_assign_rhs1 (stmt
);
4677 = mem_valid_for_store_merging (mem
, &op
->bitsize
, &op
->bitpos
,
4678 &op
->bitregion_start
,
4679 &op
->bitregion_end
);
4680 if (op
->base_addr
!= NULL_TREE
4681 && known_eq (op
->bitsize
, bitsize
)
4682 && multiple_p (op
->bitpos
- bitpos
, BITS_PER_UNIT
)
4683 && known_ge (op
->bitpos
- op
->bitregion_start
,
4684 bitpos
- bitregion_start
)
4685 && known_ge (op
->bitregion_end
- op
->bitpos
,
4686 bitregion_end
- bitpos
))
4690 op
->bit_not_p
= false;
4697 /* Return the index number of the landing pad for STMT, if any. */
4700 lp_nr_for_store (gimple
*stmt
)
4702 if (!cfun
->can_throw_non_call_exceptions
|| !cfun
->eh
)
4705 if (!stmt_could_throw_p (cfun
, stmt
))
4708 return lookup_stmt_eh_lp (stmt
);
4711 /* Record the store STMT for store merging optimization if it can be
4712 optimized. Return true if any changes were made. */
4715 pass_store_merging::process_store (gimple
*stmt
)
4717 tree lhs
= gimple_assign_lhs (stmt
);
4718 tree rhs
= gimple_assign_rhs1 (stmt
);
4719 poly_uint64 bitsize
, bitpos
;
4720 poly_uint64 bitregion_start
, bitregion_end
;
4722 = mem_valid_for_store_merging (lhs
, &bitsize
, &bitpos
,
4723 &bitregion_start
, &bitregion_end
);
4724 if (known_eq (bitsize
, 0U))
4727 bool invalid
= (base_addr
== NULL_TREE
4728 || (maybe_gt (bitsize
,
4729 (unsigned int) MAX_BITSIZE_MODE_ANY_INT
)
4730 && TREE_CODE (rhs
) != INTEGER_CST
4731 && (TREE_CODE (rhs
) != CONSTRUCTOR
4732 || CONSTRUCTOR_NELTS (rhs
) != 0)));
4733 enum tree_code rhs_code
= ERROR_MARK
;
4734 bool bit_not_p
= false;
4735 struct symbolic_number n
;
4736 gimple
*ins_stmt
= NULL
;
4737 store_operand_info ops
[2];
4740 else if (rhs_valid_for_store_merging_p (rhs
))
4742 rhs_code
= INTEGER_CST
;
4745 else if (TREE_CODE (rhs
) != SSA_NAME
)
4749 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
), *def_stmt1
, *def_stmt2
;
4750 if (!is_gimple_assign (def_stmt
))
4752 else if (handled_load (def_stmt
, &ops
[0], bitsize
, bitpos
,
4753 bitregion_start
, bitregion_end
))
4755 else if (gimple_assign_rhs_code (def_stmt
) == BIT_NOT_EXPR
)
4757 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
4758 if (TREE_CODE (rhs1
) == SSA_NAME
4759 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1
)))
4762 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
4766 if (rhs_code
== ERROR_MARK
&& !invalid
)
4767 switch ((rhs_code
= gimple_assign_rhs_code (def_stmt
)))
4773 rhs1
= gimple_assign_rhs1 (def_stmt
);
4774 rhs2
= gimple_assign_rhs2 (def_stmt
);
4776 if (TREE_CODE (rhs1
) != SSA_NAME
)
4778 def_stmt1
= SSA_NAME_DEF_STMT (rhs1
);
4779 if (!is_gimple_assign (def_stmt1
)
4780 || !handled_load (def_stmt1
, &ops
[0], bitsize
, bitpos
,
4781 bitregion_start
, bitregion_end
))
4783 if (rhs_valid_for_store_merging_p (rhs2
))
4785 else if (TREE_CODE (rhs2
) != SSA_NAME
)
4789 def_stmt2
= SSA_NAME_DEF_STMT (rhs2
);
4790 if (!is_gimple_assign (def_stmt2
))
4792 else if (!handled_load (def_stmt2
, &ops
[1], bitsize
, bitpos
,
4793 bitregion_start
, bitregion_end
))
4803 unsigned HOST_WIDE_INT const_bitsize
;
4804 if (bitsize
.is_constant (&const_bitsize
)
4805 && (const_bitsize
% BITS_PER_UNIT
) == 0
4806 && const_bitsize
<= 64
4807 && multiple_p (bitpos
, BITS_PER_UNIT
))
4809 ins_stmt
= find_bswap_or_nop_1 (def_stmt
, &n
, 12);
4813 for (unsigned HOST_WIDE_INT i
= 0;
4815 i
+= BITS_PER_UNIT
, nn
>>= BITS_PER_MARKER
)
4816 if ((nn
& MARKER_MASK
) == 0
4817 || (nn
& MARKER_MASK
) == MARKER_BYTE_UNKNOWN
)
4826 rhs_code
= LROTATE_EXPR
;
4827 ops
[0].base_addr
= NULL_TREE
;
4828 ops
[1].base_addr
= NULL_TREE
;
4836 && bitsize
.is_constant (&const_bitsize
)
4837 && ((const_bitsize
% BITS_PER_UNIT
) != 0
4838 || !multiple_p (bitpos
, BITS_PER_UNIT
))
4839 && const_bitsize
<= 64)
4841 /* Bypass a conversion to the bit-field type. */
4843 && is_gimple_assign (def_stmt
)
4844 && CONVERT_EXPR_CODE_P (rhs_code
))
4846 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
4847 if (TREE_CODE (rhs1
) == SSA_NAME
4848 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
)))
4851 rhs_code
= BIT_INSERT_EXPR
;
4854 ops
[0].base_addr
= NULL_TREE
;
4855 ops
[1].base_addr
= NULL_TREE
;
4860 unsigned HOST_WIDE_INT const_bitsize
, const_bitpos
;
4861 unsigned HOST_WIDE_INT const_bitregion_start
, const_bitregion_end
;
4863 || !bitsize
.is_constant (&const_bitsize
)
4864 || !bitpos
.is_constant (&const_bitpos
)
4865 || !bitregion_start
.is_constant (&const_bitregion_start
)
4866 || !bitregion_end
.is_constant (&const_bitregion_end
))
4867 return terminate_all_aliasing_chains (NULL
, stmt
);
4870 memset (&n
, 0, sizeof (n
));
4872 class imm_store_chain_info
**chain_info
= NULL
;
4875 chain_info
= m_stores
.get (base_addr
);
4877 store_immediate_info
*info
;
4880 unsigned int ord
= (*chain_info
)->m_store_info
.length ();
4881 info
= new store_immediate_info (const_bitsize
, const_bitpos
,
4882 const_bitregion_start
,
4883 const_bitregion_end
,
4884 stmt
, ord
, rhs_code
, n
, ins_stmt
,
4885 bit_not_p
, lp_nr_for_store (stmt
),
4887 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4889 fprintf (dump_file
, "Recording immediate store from stmt:\n");
4890 print_gimple_stmt (dump_file
, stmt
, 0);
4892 (*chain_info
)->m_store_info
.safe_push (info
);
4893 ret
|= terminate_all_aliasing_chains (chain_info
, stmt
);
4894 /* If we reach the limit of stores to merge in a chain terminate and
4895 process the chain now. */
4896 if ((*chain_info
)->m_store_info
.length ()
4897 == (unsigned int) param_max_stores_to_merge
)
4899 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4901 "Reached maximum number of statements to merge:\n");
4902 ret
|= terminate_and_process_chain (*chain_info
);
4907 /* Store aliases any existing chain? */
4908 ret
|= terminate_all_aliasing_chains (NULL
, stmt
);
4909 /* Start a new chain. */
4910 class imm_store_chain_info
*new_chain
4911 = new imm_store_chain_info (m_stores_head
, base_addr
);
4912 info
= new store_immediate_info (const_bitsize
, const_bitpos
,
4913 const_bitregion_start
,
4914 const_bitregion_end
,
4915 stmt
, 0, rhs_code
, n
, ins_stmt
,
4916 bit_not_p
, lp_nr_for_store (stmt
),
4918 new_chain
->m_store_info
.safe_push (info
);
4919 m_stores
.put (base_addr
, new_chain
);
4920 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4922 fprintf (dump_file
, "Starting new chain with statement:\n");
4923 print_gimple_stmt (dump_file
, stmt
, 0);
4924 fprintf (dump_file
, "The base object is:\n");
4925 print_generic_expr (dump_file
, base_addr
);
4926 fprintf (dump_file
, "\n");
4931 /* Return true if STMT is a store valid for store merging. */
4934 store_valid_for_store_merging_p (gimple
*stmt
)
4936 return gimple_assign_single_p (stmt
)
4937 && gimple_vdef (stmt
)
4938 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt
))
4939 && (!gimple_has_volatile_ops (stmt
) || gimple_clobber_p (stmt
));
4942 enum basic_block_status
{ BB_INVALID
, BB_VALID
, BB_EXTENDED_VALID
};
4944 /* Return the status of basic block BB wrt store merging. */
4946 static enum basic_block_status
4947 get_status_for_store_merging (basic_block bb
)
4949 unsigned int num_statements
= 0;
4950 gimple_stmt_iterator gsi
;
4953 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4955 gimple
*stmt
= gsi_stmt (gsi
);
4957 if (is_gimple_debug (stmt
))
4960 if (store_valid_for_store_merging_p (stmt
) && ++num_statements
>= 2)
4964 if (num_statements
== 0)
4967 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
4968 && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb
)))
4969 && (e
= find_fallthru_edge (bb
->succs
))
4970 && e
->dest
== bb
->next_bb
)
4971 return BB_EXTENDED_VALID
;
4973 return num_statements
>= 2 ? BB_VALID
: BB_INVALID
;
4976 /* Entry point for the pass. Go over each basic block recording chains of
4977 immediate stores. Upon encountering a terminating statement (as defined
4978 by stmt_terminates_chain_p) process the recorded stores and emit the widened
4982 pass_store_merging::execute (function
*fun
)
4985 hash_set
<gimple
*> orig_stmts
;
4986 bool changed
= false, open_chains
= false;
4988 /* If the function can throw and catch non-call exceptions, we'll be trying
4989 to merge stores across different basic blocks so we need to first unsplit
4990 the EH edges in order to streamline the CFG of the function. */
4991 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
)
4992 unsplit_eh_edges ();
4994 calculate_dominance_info (CDI_DOMINATORS
);
4996 FOR_EACH_BB_FN (bb
, fun
)
4998 const basic_block_status bb_status
= get_status_for_store_merging (bb
);
4999 gimple_stmt_iterator gsi
;
5001 if (open_chains
&& (bb_status
== BB_INVALID
|| !single_pred_p (bb
)))
5003 changed
|= terminate_and_process_all_chains ();
5004 open_chains
= false;
5007 if (bb_status
== BB_INVALID
)
5010 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5011 fprintf (dump_file
, "Processing basic block <%d>:\n", bb
->index
);
5013 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5015 gimple
*stmt
= gsi_stmt (gsi
);
5017 if (is_gimple_debug (stmt
))
5020 if (gimple_has_volatile_ops (stmt
) && !gimple_clobber_p (stmt
))
5022 /* Terminate all chains. */
5023 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5024 fprintf (dump_file
, "Volatile access terminates "
5026 changed
|= terminate_and_process_all_chains ();
5027 open_chains
= false;
5031 if (store_valid_for_store_merging_p (stmt
))
5032 changed
|= process_store (stmt
);
5034 changed
|= terminate_all_aliasing_chains (NULL
, stmt
);
5037 if (bb_status
== BB_EXTENDED_VALID
)
5041 changed
|= terminate_and_process_all_chains ();
5042 open_chains
= false;
5047 changed
|= terminate_and_process_all_chains ();
5049 /* If the function can throw and catch non-call exceptions and something
5050 changed during the pass, then the CFG has (very likely) changed too. */
5051 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
&& changed
)
5053 free_dominance_info (CDI_DOMINATORS
);
5054 return TODO_cleanup_cfg
;
5062 /* Construct and return a store merging pass object. */
5065 make_pass_store_merging (gcc::context
*ctxt
)
5067 return new pass_store_merging (ctxt
);
5072 namespace selftest
{
5074 /* Selftests for store merging helpers. */
5076 /* Assert that all elements of the byte arrays X and Y, both of length N
5080 verify_array_eq (unsigned char *x
, unsigned char *y
, unsigned int n
)
5082 for (unsigned int i
= 0; i
< n
; i
++)
5086 fprintf (stderr
, "Arrays do not match. X:\n");
5087 dump_char_array (stderr
, x
, n
);
5088 fprintf (stderr
, "Y:\n");
5089 dump_char_array (stderr
, y
, n
);
5091 ASSERT_EQ (x
[i
], y
[i
]);
5095 /* Test shift_bytes_in_array and that it carries bits across between
5099 verify_shift_bytes_in_array (void)
5102 00011111 | 11100000. */
5103 unsigned char orig
[2] = { 0xe0, 0x1f };
5104 unsigned char in
[2];
5105 memcpy (in
, orig
, sizeof orig
);
5107 unsigned char expected
[2] = { 0x80, 0x7f };
5108 shift_bytes_in_array (in
, sizeof (in
), 2);
5109 verify_array_eq (in
, expected
, sizeof (in
));
5111 memcpy (in
, orig
, sizeof orig
);
5112 memcpy (expected
, orig
, sizeof orig
);
5113 /* Check that shifting by zero doesn't change anything. */
5114 shift_bytes_in_array (in
, sizeof (in
), 0);
5115 verify_array_eq (in
, expected
, sizeof (in
));
5119 /* Test shift_bytes_in_array_right and that it carries bits across between
5123 verify_shift_bytes_in_array_right (void)
5126 00011111 | 11100000. */
5127 unsigned char orig
[2] = { 0x1f, 0xe0};
5128 unsigned char in
[2];
5129 memcpy (in
, orig
, sizeof orig
);
5130 unsigned char expected
[2] = { 0x07, 0xf8};
5131 shift_bytes_in_array_right (in
, sizeof (in
), 2);
5132 verify_array_eq (in
, expected
, sizeof (in
));
5134 memcpy (in
, orig
, sizeof orig
);
5135 memcpy (expected
, orig
, sizeof orig
);
5136 /* Check that shifting by zero doesn't change anything. */
5137 shift_bytes_in_array_right (in
, sizeof (in
), 0);
5138 verify_array_eq (in
, expected
, sizeof (in
));
5141 /* Test clear_bit_region that it clears exactly the bits asked and
5145 verify_clear_bit_region (void)
5147 /* Start with all bits set and test clearing various patterns in them. */
5148 unsigned char orig
[3] = { 0xff, 0xff, 0xff};
5149 unsigned char in
[3];
5150 unsigned char expected
[3];
5151 memcpy (in
, orig
, sizeof in
);
5153 /* Check zeroing out all the bits. */
5154 clear_bit_region (in
, 0, 3 * BITS_PER_UNIT
);
5155 expected
[0] = expected
[1] = expected
[2] = 0;
5156 verify_array_eq (in
, expected
, sizeof in
);
5158 memcpy (in
, orig
, sizeof in
);
5159 /* Leave the first and last bits intact. */
5160 clear_bit_region (in
, 1, 3 * BITS_PER_UNIT
- 2);
5164 verify_array_eq (in
, expected
, sizeof in
);
5167 /* Test clear_bit_region_be that it clears exactly the bits asked and
5171 verify_clear_bit_region_be (void)
5173 /* Start with all bits set and test clearing various patterns in them. */
5174 unsigned char orig
[3] = { 0xff, 0xff, 0xff};
5175 unsigned char in
[3];
5176 unsigned char expected
[3];
5177 memcpy (in
, orig
, sizeof in
);
5179 /* Check zeroing out all the bits. */
5180 clear_bit_region_be (in
, BITS_PER_UNIT
- 1, 3 * BITS_PER_UNIT
);
5181 expected
[0] = expected
[1] = expected
[2] = 0;
5182 verify_array_eq (in
, expected
, sizeof in
);
5184 memcpy (in
, orig
, sizeof in
);
5185 /* Leave the first and last bits intact. */
5186 clear_bit_region_be (in
, BITS_PER_UNIT
- 2, 3 * BITS_PER_UNIT
- 2);
5190 verify_array_eq (in
, expected
, sizeof in
);
5194 /* Run all of the selftests within this file. */
5197 store_merging_c_tests (void)
5199 verify_shift_bytes_in_array ();
5200 verify_shift_bytes_in_array_right ();
5201 verify_clear_bit_region ();
5202 verify_clear_bit_region_be ();
5205 } // namespace selftest
5206 #endif /* CHECKING_P. */