1 /* GIMPLE store merging and byte swapping passes.
2 Copyright (C) 2009-2019 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
31 we can transform this into a single 4-byte store if the target supports it:
32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
56 The algorithm is applied to each basic block in three phases:
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
64 terminate such chains when appropriate (for example when when the stored
65 values get used subsequently).
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76 store_immediate_info objects) and coalesce contiguous stores into
77 merged_store_group objects. For bit-field stores, we don't need to
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
84 For example, given the stores:
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
143 #include "coretypes.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
151 #include "gimple-pretty-print.h"
153 #include "fold-const.h"
155 #include "print-tree.h"
156 #include "tree-hash-traits.h"
157 #include "gimple-iterator.h"
158 #include "gimplify.h"
159 #include "gimple-fold.h"
160 #include "stor-layout.h"
163 #include "cfgcleanup.h"
164 #include "tree-cfg.h"
168 #include "gimplify-me.h"
170 #include "expr.h" /* For get_bit_range. */
171 #include "optabs-tree.h"
173 #include "selftest.h"
175 /* The maximum size (in bits) of the stores this pass should generate. */
176 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
177 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
179 /* Limit to bound the number of aliasing checks for loads with the same
180 vuse as the corresponding store. */
181 #define MAX_STORE_ALIAS_CHECKS 64
187 /* Number of hand-written 16-bit nop / bswaps found. */
190 /* Number of hand-written 32-bit nop / bswaps found. */
193 /* Number of hand-written 64-bit nop / bswaps found. */
195 } nop_stats
, bswap_stats
;
197 /* A symbolic number structure is used to detect byte permutation and selection
198 patterns of a source. To achieve that, its field N contains an artificial
199 number consisting of BITS_PER_MARKER sized markers tracking where does each
200 byte come from in the source:
202 0 - target byte has the value 0
203 FF - target byte has an unknown value (eg. due to sign extension)
204 1..size - marker value is the byte index in the source (0 for lsb).
206 To detect permutations on memory sources (arrays and structures), a symbolic
207 number is also associated:
208 - a base address BASE_ADDR and an OFFSET giving the address of the source;
209 - a range which gives the difference between the highest and lowest accessed
210 memory location to make such a symbolic number;
211 - the address SRC of the source element of lowest address as a convenience
212 to easily get BASE_ADDR + offset + lowest bytepos;
213 - number of expressions N_OPS bitwise ored together to represent
214 approximate cost of the computation.
216 Note 1: the range is different from size as size reflects the size of the
217 type of the current expression. For instance, for an array char a[],
218 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
219 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
222 Note 2: for non-memory sources, range holds the same value as size.
224 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
226 struct symbolic_number
{
231 poly_int64_pod bytepos
;
235 unsigned HOST_WIDE_INT range
;
239 #define BITS_PER_MARKER 8
240 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
241 #define MARKER_BYTE_UNKNOWN MARKER_MASK
242 #define HEAD_MARKER(n, size) \
243 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
245 /* The number which the find_bswap_or_nop_1 result should match in
246 order to have a nop. The number is masked according to the size of
247 the symbolic number before using it. */
248 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
249 (uint64_t)0x08070605 << 32 | 0x04030201)
251 /* The number which the find_bswap_or_nop_1 result should match in
252 order to have a byte swap. The number is masked according to the
253 size of the symbolic number before using it. */
254 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
255 (uint64_t)0x01020304 << 32 | 0x05060708)
257 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
258 number N. Return false if the requested operation is not permitted
259 on a symbolic number. */
262 do_shift_rotate (enum tree_code code
,
263 struct symbolic_number
*n
,
266 int i
, size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
267 unsigned head_marker
;
270 || count
>= TYPE_PRECISION (n
->type
)
271 || count
% BITS_PER_UNIT
!= 0)
273 count
= (count
/ BITS_PER_UNIT
) * BITS_PER_MARKER
;
275 /* Zero out the extra bits of N in order to avoid them being shifted
276 into the significant bits. */
277 if (size
< 64 / BITS_PER_MARKER
)
278 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
286 head_marker
= HEAD_MARKER (n
->n
, size
);
288 /* Arithmetic shift of signed type: result is dependent on the value. */
289 if (!TYPE_UNSIGNED (n
->type
) && head_marker
)
290 for (i
= 0; i
< count
/ BITS_PER_MARKER
; i
++)
291 n
->n
|= (uint64_t) MARKER_BYTE_UNKNOWN
292 << ((size
- 1 - i
) * BITS_PER_MARKER
);
295 n
->n
= (n
->n
<< count
) | (n
->n
>> ((size
* BITS_PER_MARKER
) - count
));
298 n
->n
= (n
->n
>> count
) | (n
->n
<< ((size
* BITS_PER_MARKER
) - count
));
303 /* Zero unused bits for size. */
304 if (size
< 64 / BITS_PER_MARKER
)
305 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
309 /* Perform sanity checking for the symbolic number N and the gimple
313 verify_symbolic_number_p (struct symbolic_number
*n
, gimple
*stmt
)
317 lhs_type
= gimple_expr_type (stmt
);
319 if (TREE_CODE (lhs_type
) != INTEGER_TYPE
)
322 if (TYPE_PRECISION (lhs_type
) != TYPE_PRECISION (n
->type
))
328 /* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
332 init_symbolic_number (struct symbolic_number
*n
, tree src
)
336 if (! INTEGRAL_TYPE_P (TREE_TYPE (src
)))
339 n
->base_addr
= n
->offset
= n
->alias_set
= n
->vuse
= NULL_TREE
;
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n
->type
= TREE_TYPE (src
);
346 size
= TYPE_PRECISION (n
->type
);
347 if (size
% BITS_PER_UNIT
!= 0)
349 size
/= BITS_PER_UNIT
;
350 if (size
> 64 / BITS_PER_MARKER
)
356 if (size
< 64 / BITS_PER_MARKER
)
357 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
362 /* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
367 find_bswap_or_nop_load (gimple
*stmt
, tree ref
, struct symbolic_number
*n
)
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
371 poly_int64 bitsize
, bitpos
, bytepos
;
373 int unsignedp
, reversep
, volatilep
;
374 tree offset
, base_addr
;
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
380 if (!gimple_assign_load_p (stmt
) || gimple_has_volatile_ops (stmt
))
383 base_addr
= get_inner_reference (ref
, &bitsize
, &bitpos
, &offset
, &mode
,
384 &unsignedp
, &reversep
, &volatilep
);
386 if (TREE_CODE (base_addr
) == TARGET_MEM_REF
)
387 /* Do not rewrite TARGET_MEM_REF. */
389 else if (TREE_CODE (base_addr
) == MEM_REF
)
391 poly_offset_int bit_offset
= 0;
392 tree off
= TREE_OPERAND (base_addr
, 1);
394 if (!integer_zerop (off
))
396 poly_offset_int boff
= mem_ref_offset (base_addr
);
397 boff
<<= LOG2_BITS_PER_UNIT
;
401 base_addr
= TREE_OPERAND (base_addr
, 0);
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
404 if (maybe_lt (bit_offset
, 0))
406 tree byte_offset
= wide_int_to_tree
407 (sizetype
, bits_to_bytes_round_down (bit_offset
));
408 bit_offset
= num_trailing_bits (bit_offset
);
410 offset
= size_binop (PLUS_EXPR
, offset
, byte_offset
);
412 offset
= byte_offset
;
415 bitpos
+= bit_offset
.force_shwi ();
418 base_addr
= build_fold_addr_expr (base_addr
);
420 if (!multiple_p (bitpos
, BITS_PER_UNIT
, &bytepos
))
422 if (!multiple_p (bitsize
, BITS_PER_UNIT
))
427 if (!init_symbolic_number (n
, ref
))
429 n
->base_addr
= base_addr
;
431 n
->bytepos
= bytepos
;
432 n
->alias_set
= reference_alias_ptr_type (ref
);
433 n
->vuse
= gimple_vuse (stmt
);
437 /* Compute the symbolic number N representing the result of a bitwise OR on 2
438 symbolic number N1 and N2 whose source statements are respectively
439 SOURCE_STMT1 and SOURCE_STMT2. */
442 perform_symbolic_merge (gimple
*source_stmt1
, struct symbolic_number
*n1
,
443 gimple
*source_stmt2
, struct symbolic_number
*n2
,
444 struct symbolic_number
*n
)
449 struct symbolic_number
*n_start
;
451 tree rhs1
= gimple_assign_rhs1 (source_stmt1
);
452 if (TREE_CODE (rhs1
) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1
, 0)) == SSA_NAME
)
454 rhs1
= TREE_OPERAND (rhs1
, 0);
455 tree rhs2
= gimple_assign_rhs1 (source_stmt2
);
456 if (TREE_CODE (rhs2
) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2
, 0)) == SSA_NAME
)
458 rhs2
= TREE_OPERAND (rhs2
, 0);
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
465 HOST_WIDE_INT start1
, start2
, start_sub
, end_sub
, end1
, end2
, end
;
466 struct symbolic_number
*toinc_n_ptr
, *n_end
;
467 basic_block bb1
, bb2
;
469 if (!n1
->base_addr
|| !n2
->base_addr
470 || !operand_equal_p (n1
->base_addr
, n2
->base_addr
, 0))
473 if (!n1
->offset
!= !n2
->offset
474 || (n1
->offset
&& !operand_equal_p (n1
->offset
, n2
->offset
, 0)))
478 if (!(n2
->bytepos
- n1
->bytepos
).is_constant (&start2
))
484 start_sub
= start2
- start1
;
489 start_sub
= start1
- start2
;
492 bb1
= gimple_bb (source_stmt1
);
493 bb2
= gimple_bb (source_stmt2
);
494 if (dominated_by_p (CDI_DOMINATORS
, bb1
, bb2
))
495 source_stmt
= source_stmt1
;
497 source_stmt
= source_stmt2
;
499 /* Find the highest address at which a load is performed and
500 compute related info. */
501 end1
= start1
+ (n1
->range
- 1);
502 end2
= start2
+ (n2
->range
- 1);
506 end_sub
= end2
- end1
;
511 end_sub
= end1
- end2
;
513 n_end
= (end2
> end1
) ? n2
: n1
;
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN
)
517 toinc_n_ptr
= (n_end
== n1
) ? n2
: n1
;
519 toinc_n_ptr
= (n_start
== n1
) ? n2
: n1
;
521 n
->range
= end
- MIN (start1
, start2
) + 1;
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n
->range
> 64 / BITS_PER_MARKER
)
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc
= BYTES_BIG_ENDIAN
? end_sub
: start_sub
;
531 size
= TYPE_PRECISION (n1
->type
) / BITS_PER_UNIT
;
532 for (i
= 0; i
< size
; i
++, inc
<<= BITS_PER_MARKER
)
535 = (toinc_n_ptr
->n
>> (i
* BITS_PER_MARKER
)) & MARKER_MASK
;
536 if (marker
&& marker
!= MARKER_BYTE_UNKNOWN
)
537 toinc_n_ptr
->n
+= inc
;
542 n
->range
= n1
->range
;
544 source_stmt
= source_stmt1
;
548 || alias_ptr_types_compatible_p (n1
->alias_set
, n2
->alias_set
))
549 n
->alias_set
= n1
->alias_set
;
551 n
->alias_set
= ptr_type_node
;
552 n
->vuse
= n_start
->vuse
;
553 n
->base_addr
= n_start
->base_addr
;
554 n
->offset
= n_start
->offset
;
555 n
->src
= n_start
->src
;
556 n
->bytepos
= n_start
->bytepos
;
557 n
->type
= n_start
->type
;
558 size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
560 for (i
= 0, mask
= MARKER_MASK
; i
< size
; i
++, mask
<<= BITS_PER_MARKER
)
562 uint64_t masked1
, masked2
;
564 masked1
= n1
->n
& mask
;
565 masked2
= n2
->n
& mask
;
566 if (masked1
&& masked2
&& masked1
!= masked2
)
569 n
->n
= n1
->n
| n2
->n
;
570 n
->n_ops
= n1
->n_ops
+ n2
->n_ops
;
575 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
576 the operation given by the rhs of STMT on the result. If the operation
577 could successfully be executed the function returns a gimple stmt whose
578 rhs's first tree is the expression of the source operand and NULL
582 find_bswap_or_nop_1 (gimple
*stmt
, struct symbolic_number
*n
, int limit
)
585 tree rhs1
, rhs2
= NULL
;
586 gimple
*rhs1_stmt
, *rhs2_stmt
, *source_stmt1
;
587 enum gimple_rhs_class rhs_class
;
589 if (!limit
|| !is_gimple_assign (stmt
))
592 rhs1
= gimple_assign_rhs1 (stmt
);
594 if (find_bswap_or_nop_load (stmt
, rhs1
, n
))
597 /* Handle BIT_FIELD_REF. */
598 if (TREE_CODE (rhs1
) == BIT_FIELD_REF
599 && TREE_CODE (TREE_OPERAND (rhs1
, 0)) == SSA_NAME
)
601 unsigned HOST_WIDE_INT bitsize
= tree_to_uhwi (TREE_OPERAND (rhs1
, 1));
602 unsigned HOST_WIDE_INT bitpos
= tree_to_uhwi (TREE_OPERAND (rhs1
, 2));
603 if (bitpos
% BITS_PER_UNIT
== 0
604 && bitsize
% BITS_PER_UNIT
== 0
605 && init_symbolic_number (n
, TREE_OPERAND (rhs1
, 0)))
607 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
608 if (BYTES_BIG_ENDIAN
)
609 bitpos
= TYPE_PRECISION (n
->type
) - bitpos
- bitsize
;
612 if (!do_shift_rotate (RSHIFT_EXPR
, n
, bitpos
))
617 uint64_t tmp
= (1 << BITS_PER_UNIT
) - 1;
618 for (unsigned i
= 0; i
< bitsize
/ BITS_PER_UNIT
;
619 i
++, tmp
<<= BITS_PER_UNIT
)
620 mask
|= (uint64_t) MARKER_MASK
<< (i
* BITS_PER_MARKER
);
624 n
->type
= TREE_TYPE (rhs1
);
626 n
->range
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
628 return verify_symbolic_number_p (n
, stmt
) ? stmt
: NULL
;
634 if (TREE_CODE (rhs1
) != SSA_NAME
)
637 code
= gimple_assign_rhs_code (stmt
);
638 rhs_class
= gimple_assign_rhs_class (stmt
);
639 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
641 if (rhs_class
== GIMPLE_BINARY_RHS
)
642 rhs2
= gimple_assign_rhs2 (stmt
);
644 /* Handle unary rhs and binary rhs with integer constants as second
647 if (rhs_class
== GIMPLE_UNARY_RHS
648 || (rhs_class
== GIMPLE_BINARY_RHS
649 && TREE_CODE (rhs2
) == INTEGER_CST
))
651 if (code
!= BIT_AND_EXPR
652 && code
!= LSHIFT_EXPR
653 && code
!= RSHIFT_EXPR
654 && code
!= LROTATE_EXPR
655 && code
!= RROTATE_EXPR
656 && !CONVERT_EXPR_CODE_P (code
))
659 source_stmt1
= find_bswap_or_nop_1 (rhs1_stmt
, n
, limit
- 1);
661 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
662 we have to initialize the symbolic number. */
665 if (gimple_assign_load_p (stmt
)
666 || !init_symbolic_number (n
, rhs1
))
675 int i
, size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
676 uint64_t val
= int_cst_value (rhs2
), mask
= 0;
677 uint64_t tmp
= (1 << BITS_PER_UNIT
) - 1;
679 /* Only constants masking full bytes are allowed. */
680 for (i
= 0; i
< size
; i
++, tmp
<<= BITS_PER_UNIT
)
681 if ((val
& tmp
) != 0 && (val
& tmp
) != tmp
)
684 mask
|= (uint64_t) MARKER_MASK
<< (i
* BITS_PER_MARKER
);
693 if (!do_shift_rotate (code
, n
, (int) TREE_INT_CST_LOW (rhs2
)))
698 int i
, type_size
, old_type_size
;
701 type
= gimple_expr_type (stmt
);
702 type_size
= TYPE_PRECISION (type
);
703 if (type_size
% BITS_PER_UNIT
!= 0)
705 type_size
/= BITS_PER_UNIT
;
706 if (type_size
> 64 / BITS_PER_MARKER
)
709 /* Sign extension: result is dependent on the value. */
710 old_type_size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
711 if (!TYPE_UNSIGNED (n
->type
) && type_size
> old_type_size
712 && HEAD_MARKER (n
->n
, old_type_size
))
713 for (i
= 0; i
< type_size
- old_type_size
; i
++)
714 n
->n
|= (uint64_t) MARKER_BYTE_UNKNOWN
715 << ((type_size
- 1 - i
) * BITS_PER_MARKER
);
717 if (type_size
< 64 / BITS_PER_MARKER
)
719 /* If STMT casts to a smaller type mask out the bits not
720 belonging to the target type. */
721 n
->n
&= ((uint64_t) 1 << (type_size
* BITS_PER_MARKER
)) - 1;
725 n
->range
= type_size
;
731 return verify_symbolic_number_p (n
, stmt
) ? source_stmt1
: NULL
;
734 /* Handle binary rhs. */
736 if (rhs_class
== GIMPLE_BINARY_RHS
)
738 struct symbolic_number n1
, n2
;
739 gimple
*source_stmt
, *source_stmt2
;
741 if (code
!= BIT_IOR_EXPR
)
744 if (TREE_CODE (rhs2
) != SSA_NAME
)
747 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
752 source_stmt1
= find_bswap_or_nop_1 (rhs1_stmt
, &n1
, limit
- 1);
757 source_stmt2
= find_bswap_or_nop_1 (rhs2_stmt
, &n2
, limit
- 1);
762 if (TYPE_PRECISION (n1
.type
) != TYPE_PRECISION (n2
.type
))
765 if (n1
.vuse
!= n2
.vuse
)
769 = perform_symbolic_merge (source_stmt1
, &n1
, source_stmt2
, &n2
, n
);
774 if (!verify_symbolic_number_p (n
, stmt
))
786 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
787 *CMPXCHG, *CMPNOP and adjust *N. */
790 find_bswap_or_nop_finalize (struct symbolic_number
*n
, uint64_t *cmpxchg
,
796 /* The number which the find_bswap_or_nop_1 result should match in order
797 to have a full byte swap. The number is shifted to the right
798 according to the size of the symbolic number before using it. */
802 /* Find real size of result (highest non-zero byte). */
804 for (tmpn
= n
->n
, rsize
= 0; tmpn
; tmpn
>>= BITS_PER_MARKER
, rsize
++);
808 /* Zero out the bits corresponding to untouched bytes in original gimple
810 if (n
->range
< (int) sizeof (int64_t))
812 mask
= ((uint64_t) 1 << (n
->range
* BITS_PER_MARKER
)) - 1;
813 *cmpxchg
>>= (64 / BITS_PER_MARKER
- n
->range
) * BITS_PER_MARKER
;
817 /* Zero out the bits corresponding to unused bytes in the result of the
818 gimple expression. */
819 if (rsize
< n
->range
)
821 if (BYTES_BIG_ENDIAN
)
823 mask
= ((uint64_t) 1 << (rsize
* BITS_PER_MARKER
)) - 1;
825 *cmpnop
>>= (n
->range
- rsize
) * BITS_PER_MARKER
;
829 mask
= ((uint64_t) 1 << (rsize
* BITS_PER_MARKER
)) - 1;
830 *cmpxchg
>>= (n
->range
- rsize
) * BITS_PER_MARKER
;
836 n
->range
*= BITS_PER_UNIT
;
839 /* Check if STMT completes a bswap implementation or a read in a given
840 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
841 accordingly. It also sets N to represent the kind of operations
842 performed: size of the resulting expression and whether it works on
843 a memory source, and if so alias-set and vuse. At last, the
844 function returns a stmt whose rhs's first tree is the source
848 find_bswap_or_nop (gimple
*stmt
, struct symbolic_number
*n
, bool *bswap
)
850 /* The last parameter determines the depth search limit. It usually
851 correlates directly to the number n of bytes to be touched. We
852 increase that number by log2(n) + 1 here in order to also
853 cover signed -> unsigned conversions of the src operand as can be seen
854 in libgcc, and for initial shift/and operation of the src operand. */
855 int limit
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt
)));
856 limit
+= 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT
) limit
);
857 gimple
*ins_stmt
= find_bswap_or_nop_1 (stmt
, n
, limit
);
862 uint64_t cmpxchg
, cmpnop
;
863 find_bswap_or_nop_finalize (n
, &cmpxchg
, &cmpnop
);
865 /* A complete byte swap should make the symbolic number to start with
866 the largest digit in the highest order byte. Unchanged symbolic
867 number indicates a read with same endianness as target architecture. */
870 else if (n
->n
== cmpxchg
)
875 /* Useless bit manipulation performed by code. */
876 if (!n
->base_addr
&& n
->n
== cmpnop
&& n
->n_ops
== 1)
882 const pass_data pass_data_optimize_bswap
=
884 GIMPLE_PASS
, /* type */
886 OPTGROUP_NONE
, /* optinfo_flags */
888 PROP_ssa
, /* properties_required */
889 0, /* properties_provided */
890 0, /* properties_destroyed */
891 0, /* todo_flags_start */
892 0, /* todo_flags_finish */
895 class pass_optimize_bswap
: public gimple_opt_pass
898 pass_optimize_bswap (gcc::context
*ctxt
)
899 : gimple_opt_pass (pass_data_optimize_bswap
, ctxt
)
902 /* opt_pass methods: */
903 virtual bool gate (function
*)
905 return flag_expensive_optimizations
&& optimize
&& BITS_PER_UNIT
== 8;
908 virtual unsigned int execute (function
*);
910 }; // class pass_optimize_bswap
912 /* Perform the bswap optimization: replace the expression computed in the rhs
913 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
914 bswap, load or load + bswap expression.
915 Which of these alternatives replace the rhs is given by N->base_addr (non
916 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
917 load to perform are also given in N while the builtin bswap invoke is given
918 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
919 load statements involved to construct the rhs in gsi_stmt (GSI) and
920 N->range gives the size of the rhs expression for maintaining some
923 Note that if the replacement involve a load and if gsi_stmt (GSI) is
924 non-NULL, that stmt is moved just after INS_STMT to do the load with the
925 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
928 bswap_replace (gimple_stmt_iterator gsi
, gimple
*ins_stmt
, tree fndecl
,
929 tree bswap_type
, tree load_type
, struct symbolic_number
*n
,
932 tree src
, tmp
, tgt
= NULL_TREE
;
935 gimple
*cur_stmt
= gsi_stmt (gsi
);
938 tgt
= gimple_assign_lhs (cur_stmt
);
940 /* Need to load the value from memory first. */
943 gimple_stmt_iterator gsi_ins
= gsi
;
945 gsi_ins
= gsi_for_stmt (ins_stmt
);
946 tree addr_expr
, addr_tmp
, val_expr
, val_tmp
;
947 tree load_offset_ptr
, aligned_load_type
;
949 unsigned align
= get_object_alignment (src
);
950 poly_int64 load_offset
= 0;
954 basic_block ins_bb
= gimple_bb (ins_stmt
);
955 basic_block cur_bb
= gimple_bb (cur_stmt
);
956 if (!dominated_by_p (CDI_DOMINATORS
, cur_bb
, ins_bb
))
959 /* Move cur_stmt just before one of the load of the original
960 to ensure it has the same VUSE. See PR61517 for what could
962 if (gimple_bb (cur_stmt
) != gimple_bb (ins_stmt
))
963 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt
));
964 gsi_move_before (&gsi
, &gsi_ins
);
965 gsi
= gsi_for_stmt (cur_stmt
);
970 /* Compute address to load from and cast according to the size
972 addr_expr
= build_fold_addr_expr (src
);
973 if (is_gimple_mem_ref_addr (addr_expr
))
974 addr_tmp
= unshare_expr (addr_expr
);
977 addr_tmp
= unshare_expr (n
->base_addr
);
978 if (!is_gimple_mem_ref_addr (addr_tmp
))
979 addr_tmp
= force_gimple_operand_gsi_1 (&gsi
, addr_tmp
,
980 is_gimple_mem_ref_addr
,
983 load_offset
= n
->bytepos
;
987 = force_gimple_operand_gsi (&gsi
, unshare_expr (n
->offset
),
988 true, NULL_TREE
, true,
991 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp
)),
992 POINTER_PLUS_EXPR
, addr_tmp
, off
);
993 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
994 addr_tmp
= gimple_assign_lhs (stmt
);
998 /* Perform the load. */
999 aligned_load_type
= load_type
;
1000 if (align
< TYPE_ALIGN (load_type
))
1001 aligned_load_type
= build_aligned_type (load_type
, align
);
1002 load_offset_ptr
= build_int_cst (n
->alias_set
, load_offset
);
1003 val_expr
= fold_build2 (MEM_REF
, aligned_load_type
, addr_tmp
,
1009 nop_stats
.found_16bit
++;
1010 else if (n
->range
== 32)
1011 nop_stats
.found_32bit
++;
1014 gcc_assert (n
->range
== 64);
1015 nop_stats
.found_64bit
++;
1018 /* Convert the result of load if necessary. */
1019 if (tgt
&& !useless_type_conversion_p (TREE_TYPE (tgt
), load_type
))
1021 val_tmp
= make_temp_ssa_name (aligned_load_type
, NULL
,
1023 load_stmt
= gimple_build_assign (val_tmp
, val_expr
);
1024 gimple_set_vuse (load_stmt
, n
->vuse
);
1025 gsi_insert_before (&gsi
, load_stmt
, GSI_SAME_STMT
);
1026 gimple_assign_set_rhs_with_ops (&gsi
, NOP_EXPR
, val_tmp
);
1027 update_stmt (cur_stmt
);
1031 gimple_assign_set_rhs_with_ops (&gsi
, MEM_REF
, val_expr
);
1032 gimple_set_vuse (cur_stmt
, n
->vuse
);
1033 update_stmt (cur_stmt
);
1037 tgt
= make_ssa_name (load_type
);
1038 cur_stmt
= gimple_build_assign (tgt
, MEM_REF
, val_expr
);
1039 gimple_set_vuse (cur_stmt
, n
->vuse
);
1040 gsi_insert_before (&gsi
, cur_stmt
, GSI_SAME_STMT
);
1046 "%d bit load in target endianness found at: ",
1048 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1054 val_tmp
= make_temp_ssa_name (aligned_load_type
, NULL
, "load_dst");
1055 load_stmt
= gimple_build_assign (val_tmp
, val_expr
);
1056 gimple_set_vuse (load_stmt
, n
->vuse
);
1057 gsi_insert_before (&gsi
, load_stmt
, GSI_SAME_STMT
);
1064 if (tgt
&& !useless_type_conversion_p (TREE_TYPE (tgt
), TREE_TYPE (src
)))
1066 if (!is_gimple_val (src
))
1068 g
= gimple_build_assign (tgt
, NOP_EXPR
, src
);
1071 g
= gimple_build_assign (tgt
, src
);
1075 nop_stats
.found_16bit
++;
1076 else if (n
->range
== 32)
1077 nop_stats
.found_32bit
++;
1080 gcc_assert (n
->range
== 64);
1081 nop_stats
.found_64bit
++;
1086 "%d bit reshuffle in target endianness found at: ",
1089 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1092 print_generic_expr (dump_file
, tgt
, TDF_NONE
);
1093 fprintf (dump_file
, "\n");
1097 gsi_replace (&gsi
, g
, true);
1100 else if (TREE_CODE (src
) == BIT_FIELD_REF
)
1101 src
= TREE_OPERAND (src
, 0);
1104 bswap_stats
.found_16bit
++;
1105 else if (n
->range
== 32)
1106 bswap_stats
.found_32bit
++;
1109 gcc_assert (n
->range
== 64);
1110 bswap_stats
.found_64bit
++;
1115 /* Convert the src expression if necessary. */
1116 if (!useless_type_conversion_p (TREE_TYPE (tmp
), bswap_type
))
1118 gimple
*convert_stmt
;
1120 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapsrc");
1121 convert_stmt
= gimple_build_assign (tmp
, NOP_EXPR
, src
);
1122 gsi_insert_before (&gsi
, convert_stmt
, GSI_SAME_STMT
);
1125 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1126 are considered as rotation of 2N bit values by N bits is generally not
1127 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1128 gives 0x03040102 while a bswap for that value is 0x04030201. */
1129 if (bswap
&& n
->range
== 16)
1131 tree count
= build_int_cst (NULL
, BITS_PER_UNIT
);
1132 src
= fold_build2 (LROTATE_EXPR
, bswap_type
, tmp
, count
);
1133 bswap_stmt
= gimple_build_assign (NULL
, src
);
1136 bswap_stmt
= gimple_build_call (fndecl
, 1, tmp
);
1138 if (tgt
== NULL_TREE
)
1139 tgt
= make_ssa_name (bswap_type
);
1142 /* Convert the result if necessary. */
1143 if (!useless_type_conversion_p (TREE_TYPE (tgt
), bswap_type
))
1145 gimple
*convert_stmt
;
1147 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapdst");
1148 convert_stmt
= gimple_build_assign (tgt
, NOP_EXPR
, tmp
);
1149 gsi_insert_after (&gsi
, convert_stmt
, GSI_SAME_STMT
);
1152 gimple_set_lhs (bswap_stmt
, tmp
);
1156 fprintf (dump_file
, "%d bit bswap implementation found at: ",
1159 print_gimple_stmt (dump_file
, cur_stmt
, 0);
1162 print_generic_expr (dump_file
, tgt
, TDF_NONE
);
1163 fprintf (dump_file
, "\n");
1169 gsi_insert_after (&gsi
, bswap_stmt
, GSI_SAME_STMT
);
1170 gsi_remove (&gsi
, true);
1173 gsi_insert_before (&gsi
, bswap_stmt
, GSI_SAME_STMT
);
1177 /* Find manual byte swap implementations as well as load in a given
1178 endianness. Byte swaps are turned into a bswap builtin invokation
1179 while endian loads are converted to bswap builtin invokation or
1180 simple load according to the target endianness. */
1183 pass_optimize_bswap::execute (function
*fun
)
1186 bool bswap32_p
, bswap64_p
;
1187 bool changed
= false;
1188 tree bswap32_type
= NULL_TREE
, bswap64_type
= NULL_TREE
;
1190 bswap32_p
= (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
1191 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
);
1192 bswap64_p
= (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
1193 && (optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
1194 || (bswap32_p
&& word_mode
== SImode
)));
1196 /* Determine the argument type of the builtins. The code later on
1197 assumes that the return and argument type are the same. */
1200 tree fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
1201 bswap32_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1206 tree fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
1207 bswap64_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
1210 memset (&nop_stats
, 0, sizeof (nop_stats
));
1211 memset (&bswap_stats
, 0, sizeof (bswap_stats
));
1212 calculate_dominance_info (CDI_DOMINATORS
);
1214 FOR_EACH_BB_FN (bb
, fun
)
1216 gimple_stmt_iterator gsi
;
1218 /* We do a reverse scan for bswap patterns to make sure we get the
1219 widest match. As bswap pattern matching doesn't handle previously
1220 inserted smaller bswap replacements as sub-patterns, the wider
1221 variant wouldn't be detected. */
1222 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
);)
1224 gimple
*ins_stmt
, *cur_stmt
= gsi_stmt (gsi
);
1225 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
1226 enum tree_code code
;
1227 struct symbolic_number n
;
1230 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1231 might be moved to a different basic block by bswap_replace and gsi
1232 must not points to it if that's the case. Moving the gsi_prev
1233 there make sure that gsi points to the statement previous to
1234 cur_stmt while still making sure that all statements are
1235 considered in this basic block. */
1238 if (!is_gimple_assign (cur_stmt
))
1241 code
= gimple_assign_rhs_code (cur_stmt
);
1246 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt
))
1247 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt
))
1257 ins_stmt
= find_bswap_or_nop (cur_stmt
, &n
, &bswap
);
1265 /* Already in canonical form, nothing to do. */
1266 if (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
1268 load_type
= bswap_type
= uint16_type_node
;
1271 load_type
= uint32_type_node
;
1274 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
1275 bswap_type
= bswap32_type
;
1279 load_type
= uint64_type_node
;
1282 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
1283 bswap_type
= bswap64_type
;
1290 if (bswap
&& !fndecl
&& n
.range
!= 16)
1293 if (bswap_replace (gsi_for_stmt (cur_stmt
), ins_stmt
, fndecl
,
1294 bswap_type
, load_type
, &n
, bswap
))
1299 statistics_counter_event (fun
, "16-bit nop implementations found",
1300 nop_stats
.found_16bit
);
1301 statistics_counter_event (fun
, "32-bit nop implementations found",
1302 nop_stats
.found_32bit
);
1303 statistics_counter_event (fun
, "64-bit nop implementations found",
1304 nop_stats
.found_64bit
);
1305 statistics_counter_event (fun
, "16-bit bswap implementations found",
1306 bswap_stats
.found_16bit
);
1307 statistics_counter_event (fun
, "32-bit bswap implementations found",
1308 bswap_stats
.found_32bit
);
1309 statistics_counter_event (fun
, "64-bit bswap implementations found",
1310 bswap_stats
.found_64bit
);
1312 return (changed
? TODO_update_ssa
: 0);
1318 make_pass_optimize_bswap (gcc::context
*ctxt
)
1320 return new pass_optimize_bswap (ctxt
);
1325 /* Struct recording one operand for the store, which is either a constant,
1326 then VAL represents the constant and all the other fields are zero, or
1327 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1328 and the other fields also reflect the memory load, or an SSA name, then
1329 VAL represents the SSA name and all the other fields are zero, */
1331 class store_operand_info
1336 poly_uint64 bitsize
;
1338 poly_uint64 bitregion_start
;
1339 poly_uint64 bitregion_end
;
1342 store_operand_info ();
1345 store_operand_info::store_operand_info ()
1346 : val (NULL_TREE
), base_addr (NULL_TREE
), bitsize (0), bitpos (0),
1347 bitregion_start (0), bitregion_end (0), stmt (NULL
), bit_not_p (false)
1351 /* Struct recording the information about a single store of an immediate
1352 to memory. These are created in the first phase and coalesced into
1353 merged_store_group objects in the second phase. */
1355 class store_immediate_info
1358 unsigned HOST_WIDE_INT bitsize
;
1359 unsigned HOST_WIDE_INT bitpos
;
1360 unsigned HOST_WIDE_INT bitregion_start
;
1361 /* This is one past the last bit of the bit region. */
1362 unsigned HOST_WIDE_INT bitregion_end
;
1365 /* INTEGER_CST for constant stores, MEM_REF for memory copy,
1366 BIT_*_EXPR for logical bitwise operation, BIT_INSERT_EXPR
1368 LROTATE_EXPR if it can be only bswap optimized and
1369 ops are not really meaningful.
1370 NOP_EXPR if bswap optimization detected identity, ops
1371 are not meaningful. */
1372 enum tree_code rhs_code
;
1373 /* Two fields for bswap optimization purposes. */
1374 struct symbolic_number n
;
1376 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
1378 /* True if ops have been swapped and thus ops[1] represents
1379 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1381 /* The index number of the landing pad, or 0 if there is none. */
1383 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1384 just the first one. */
1385 store_operand_info ops
[2];
1386 store_immediate_info (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
1387 unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
1388 gimple
*, unsigned int, enum tree_code
,
1389 struct symbolic_number
&, gimple
*, bool, int,
1390 const store_operand_info
&,
1391 const store_operand_info
&);
1394 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs
,
1395 unsigned HOST_WIDE_INT bp
,
1396 unsigned HOST_WIDE_INT brs
,
1397 unsigned HOST_WIDE_INT bre
,
1400 enum tree_code rhscode
,
1401 struct symbolic_number
&nr
,
1405 const store_operand_info
&op0r
,
1406 const store_operand_info
&op1r
)
1407 : bitsize (bs
), bitpos (bp
), bitregion_start (brs
), bitregion_end (bre
),
1408 stmt (st
), order (ord
), rhs_code (rhscode
), n (nr
),
1409 ins_stmt (ins_stmtp
), bit_not_p (bitnotp
), ops_swapped_p (false),
1411 #if __cplusplus >= 201103L
1412 , ops
{ op0r
, op1r
}
1422 /* Struct representing a group of stores to contiguous memory locations.
1423 These are produced by the second phase (coalescing) and consumed in the
1424 third phase that outputs the widened stores. */
1426 class merged_store_group
1429 unsigned HOST_WIDE_INT start
;
1430 unsigned HOST_WIDE_INT width
;
1431 unsigned HOST_WIDE_INT bitregion_start
;
1432 unsigned HOST_WIDE_INT bitregion_end
;
1433 /* The size of the allocated memory for val and mask. */
1434 unsigned HOST_WIDE_INT buf_size
;
1435 unsigned HOST_WIDE_INT align_base
;
1436 poly_uint64 load_align_base
[2];
1439 unsigned int load_align
[2];
1440 unsigned int first_order
;
1441 unsigned int last_order
;
1443 bool only_constants
;
1444 unsigned int first_nonmergeable_order
;
1447 auto_vec
<store_immediate_info
*> stores
;
1448 /* We record the first and last original statements in the sequence because
1449 we'll need their vuse/vdef and replacement position. It's easier to keep
1450 track of them separately as 'stores' is reordered by apply_stores. */
1454 unsigned char *mask
;
1456 merged_store_group (store_immediate_info
*);
1457 ~merged_store_group ();
1458 bool can_be_merged_into (store_immediate_info
*);
1459 void merge_into (store_immediate_info
*);
1460 void merge_overlapping (store_immediate_info
*);
1461 bool apply_stores ();
1463 void do_merge (store_immediate_info
*);
1466 /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1469 dump_char_array (FILE *fd
, unsigned char *ptr
, unsigned int len
)
1474 for (unsigned int i
= 0; i
< len
; i
++)
1475 fprintf (fd
, "%02x ", ptr
[i
]);
1479 /* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
1480 bits between adjacent elements. AMNT should be within
1483 00011111|11100000 << 2 = 01111111|10000000
1484 PTR[1] | PTR[0] PTR[1] | PTR[0]. */
1487 shift_bytes_in_array (unsigned char *ptr
, unsigned int sz
, unsigned int amnt
)
1492 unsigned char carry_over
= 0U;
1493 unsigned char carry_mask
= (~0U) << (unsigned char) (BITS_PER_UNIT
- amnt
);
1494 unsigned char clear_mask
= (~0U) << amnt
;
1496 for (unsigned int i
= 0; i
< sz
; i
++)
1498 unsigned prev_carry_over
= carry_over
;
1499 carry_over
= (ptr
[i
] & carry_mask
) >> (BITS_PER_UNIT
- amnt
);
1504 ptr
[i
] &= clear_mask
;
1505 ptr
[i
] |= prev_carry_over
;
1510 /* Like shift_bytes_in_array but for big-endian.
1511 Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
1512 bits between adjacent elements. AMNT should be within
1515 00011111|11100000 >> 2 = 00000111|11111000
1516 PTR[0] | PTR[1] PTR[0] | PTR[1]. */
1519 shift_bytes_in_array_right (unsigned char *ptr
, unsigned int sz
,
1525 unsigned char carry_over
= 0U;
1526 unsigned char carry_mask
= ~(~0U << amnt
);
1528 for (unsigned int i
= 0; i
< sz
; i
++)
1530 unsigned prev_carry_over
= carry_over
;
1531 carry_over
= ptr
[i
] & carry_mask
;
1533 carry_over
<<= (unsigned char) BITS_PER_UNIT
- amnt
;
1535 ptr
[i
] |= prev_carry_over
;
1539 /* Clear out LEN bits starting from bit START in the byte array
1540 PTR. This clears the bits to the *right* from START.
1541 START must be within [0, BITS_PER_UNIT) and counts starting from
1542 the least significant bit. */
1545 clear_bit_region_be (unsigned char *ptr
, unsigned int start
,
1550 /* Clear len bits to the right of start. */
1551 else if (len
<= start
+ 1)
1553 unsigned char mask
= (~(~0U << len
));
1554 mask
= mask
<< (start
+ 1U - len
);
1557 else if (start
!= BITS_PER_UNIT
- 1)
1559 clear_bit_region_be (ptr
, start
, (start
% BITS_PER_UNIT
) + 1);
1560 clear_bit_region_be (ptr
+ 1, BITS_PER_UNIT
- 1,
1561 len
- (start
% BITS_PER_UNIT
) - 1);
1563 else if (start
== BITS_PER_UNIT
- 1
1564 && len
> BITS_PER_UNIT
)
1566 unsigned int nbytes
= len
/ BITS_PER_UNIT
;
1567 memset (ptr
, 0, nbytes
);
1568 if (len
% BITS_PER_UNIT
!= 0)
1569 clear_bit_region_be (ptr
+ nbytes
, BITS_PER_UNIT
- 1,
1570 len
% BITS_PER_UNIT
);
1576 /* In the byte array PTR clear the bit region starting at bit
1577 START and is LEN bits wide.
1578 For regions spanning multiple bytes do this recursively until we reach
1579 zero LEN or a region contained within a single byte. */
1582 clear_bit_region (unsigned char *ptr
, unsigned int start
,
1585 /* Degenerate base case. */
1588 else if (start
>= BITS_PER_UNIT
)
1589 clear_bit_region (ptr
+ 1, start
- BITS_PER_UNIT
, len
);
1590 /* Second base case. */
1591 else if ((start
+ len
) <= BITS_PER_UNIT
)
1593 unsigned char mask
= (~0U) << (unsigned char) (BITS_PER_UNIT
- len
);
1594 mask
>>= BITS_PER_UNIT
- (start
+ len
);
1600 /* Clear most significant bits in a byte and proceed with the next byte. */
1601 else if (start
!= 0)
1603 clear_bit_region (ptr
, start
, BITS_PER_UNIT
- start
);
1604 clear_bit_region (ptr
+ 1, 0, len
- (BITS_PER_UNIT
- start
));
1606 /* Whole bytes need to be cleared. */
1607 else if (start
== 0 && len
> BITS_PER_UNIT
)
1609 unsigned int nbytes
= len
/ BITS_PER_UNIT
;
1610 /* We could recurse on each byte but we clear whole bytes, so a simple
1612 memset (ptr
, '\0', nbytes
);
1613 /* Clear the remaining sub-byte region if there is one. */
1614 if (len
% BITS_PER_UNIT
!= 0)
1615 clear_bit_region (ptr
+ nbytes
, 0, len
% BITS_PER_UNIT
);
1621 /* Write BITLEN bits of EXPR to the byte array PTR at
1622 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1623 Return true if the operation succeeded. */
1626 encode_tree_to_bitpos (tree expr
, unsigned char *ptr
, int bitlen
, int bitpos
,
1627 unsigned int total_bytes
)
1629 unsigned int first_byte
= bitpos
/ BITS_PER_UNIT
;
1630 bool sub_byte_op_p
= ((bitlen
% BITS_PER_UNIT
)
1631 || (bitpos
% BITS_PER_UNIT
)
1632 || !int_mode_for_size (bitlen
, 0).exists ());
1634 = (TREE_CODE (expr
) == CONSTRUCTOR
1635 && CONSTRUCTOR_NELTS (expr
) == 0
1636 && TYPE_SIZE_UNIT (TREE_TYPE (expr
))
1637 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr
))));
1641 if (first_byte
>= total_bytes
)
1643 total_bytes
-= first_byte
;
1646 unsigned HOST_WIDE_INT rhs_bytes
1647 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr
)));
1648 if (rhs_bytes
> total_bytes
)
1650 memset (ptr
+ first_byte
, '\0', rhs_bytes
);
1653 return native_encode_expr (expr
, ptr
+ first_byte
, total_bytes
) != 0;
1657 We are writing a non byte-sized quantity or at a position that is not
1659 |--------|--------|--------| ptr + first_byte
1661 xxx xxxxxxxx xxx< bp>
1664 First native_encode_expr EXPR into a temporary buffer and shift each
1665 byte in the buffer by 'bp' (carrying the bits over as necessary).
1666 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1667 <------bitlen---->< bp>
1668 Then we clear the destination bits:
1669 |---00000|00000000|000-----| ptr + first_byte
1670 <-------bitlen--->< bp>
1672 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1673 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1676 We are writing a non byte-sized quantity or at a position that is not
1678 ptr + first_byte |--------|--------|--------|
1680 <bp >xxx xxxxxxxx xxx
1683 First native_encode_expr EXPR into a temporary buffer and shift each
1684 byte in the buffer to the right by (carrying the bits over as necessary).
1685 We shift by as much as needed to align the most significant bit of EXPR
1687 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1688 <---bitlen----> <bp ><-----bitlen----->
1689 Then we clear the destination bits:
1690 ptr + first_byte |-----000||00000000||00000---|
1691 <bp ><-------bitlen----->
1693 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1694 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1695 The awkwardness comes from the fact that bitpos is counted from the
1696 most significant bit of a byte. */
1698 /* We must be dealing with fixed-size data at this point, since the
1699 total size is also fixed. */
1700 unsigned int byte_size
;
1703 unsigned HOST_WIDE_INT rhs_bytes
1704 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr
)));
1705 if (rhs_bytes
> total_bytes
)
1707 byte_size
= rhs_bytes
;
1711 fixed_size_mode mode
1712 = as_a
<fixed_size_mode
> (TYPE_MODE (TREE_TYPE (expr
)));
1713 byte_size
= GET_MODE_SIZE (mode
);
1715 /* Allocate an extra byte so that we have space to shift into. */
1717 unsigned char *tmpbuf
= XALLOCAVEC (unsigned char, byte_size
);
1718 memset (tmpbuf
, '\0', byte_size
);
1719 /* The store detection code should only have allowed constants that are
1720 accepted by native_encode_expr or empty ctors. */
1722 && native_encode_expr (expr
, tmpbuf
, byte_size
- 1) == 0)
1725 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1726 bytes to write. This means it can write more than
1727 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1728 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1729 bitlen and zero out the bits that are not relevant as well (that may
1730 contain a sign bit due to sign-extension). */
1731 unsigned int padding
1732 = byte_size
- ROUND_UP (bitlen
, BITS_PER_UNIT
) / BITS_PER_UNIT
- 1;
1733 /* On big-endian the padding is at the 'front' so just skip the initial
1735 if (BYTES_BIG_ENDIAN
)
1738 byte_size
-= padding
;
1740 if (bitlen
% BITS_PER_UNIT
!= 0)
1742 if (BYTES_BIG_ENDIAN
)
1743 clear_bit_region_be (tmpbuf
, BITS_PER_UNIT
- 1,
1744 BITS_PER_UNIT
- (bitlen
% BITS_PER_UNIT
));
1746 clear_bit_region (tmpbuf
, bitlen
,
1747 byte_size
* BITS_PER_UNIT
- bitlen
);
1749 /* Left shifting relies on the last byte being clear if bitlen is
1750 a multiple of BITS_PER_UNIT, which might not be clear if
1751 there are padding bytes. */
1752 else if (!BYTES_BIG_ENDIAN
)
1753 tmpbuf
[byte_size
- 1] = '\0';
1755 /* Clear the bit region in PTR where the bits from TMPBUF will be
1757 if (BYTES_BIG_ENDIAN
)
1758 clear_bit_region_be (ptr
+ first_byte
,
1759 BITS_PER_UNIT
- 1 - (bitpos
% BITS_PER_UNIT
), bitlen
);
1761 clear_bit_region (ptr
+ first_byte
, bitpos
% BITS_PER_UNIT
, bitlen
);
1764 int bitlen_mod
= bitlen
% BITS_PER_UNIT
;
1765 int bitpos_mod
= bitpos
% BITS_PER_UNIT
;
1767 bool skip_byte
= false;
1768 if (BYTES_BIG_ENDIAN
)
1770 /* BITPOS and BITLEN are exactly aligned and no shifting
1772 if (bitpos_mod
+ bitlen_mod
== BITS_PER_UNIT
1773 || (bitpos_mod
== 0 && bitlen_mod
== 0))
1775 /* |. . . . . . . .|
1777 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1778 of the value until it aligns with 'bp' in the next byte over. */
1779 else if (bitpos_mod
+ bitlen_mod
< BITS_PER_UNIT
)
1781 shift_amnt
= bitlen_mod
+ bitpos_mod
;
1782 skip_byte
= bitlen_mod
!= 0;
1784 /* |. . . . . . . .|
1787 Shift the value right within the same byte so it aligns with 'bp'. */
1789 shift_amnt
= bitlen_mod
+ bitpos_mod
- BITS_PER_UNIT
;
1792 shift_amnt
= bitpos
% BITS_PER_UNIT
;
1794 /* Create the shifted version of EXPR. */
1795 if (!BYTES_BIG_ENDIAN
)
1797 shift_bytes_in_array (tmpbuf
, byte_size
, shift_amnt
);
1798 if (shift_amnt
== 0)
1803 gcc_assert (BYTES_BIG_ENDIAN
);
1804 shift_bytes_in_array_right (tmpbuf
, byte_size
, shift_amnt
);
1805 /* If shifting right forced us to move into the next byte skip the now
1814 /* Insert the bits from TMPBUF. */
1815 for (unsigned int i
= 0; i
< byte_size
; i
++)
1816 ptr
[first_byte
+ i
] |= tmpbuf
[i
];
1821 /* Sorting function for store_immediate_info objects.
1822 Sorts them by bitposition. */
1825 sort_by_bitpos (const void *x
, const void *y
)
1827 store_immediate_info
*const *tmp
= (store_immediate_info
* const *) x
;
1828 store_immediate_info
*const *tmp2
= (store_immediate_info
* const *) y
;
1830 if ((*tmp
)->bitpos
< (*tmp2
)->bitpos
)
1832 else if ((*tmp
)->bitpos
> (*tmp2
)->bitpos
)
1835 /* If they are the same let's use the order which is guaranteed to
1837 return (*tmp
)->order
- (*tmp2
)->order
;
1840 /* Sorting function for store_immediate_info objects.
1841 Sorts them by the order field. */
1844 sort_by_order (const void *x
, const void *y
)
1846 store_immediate_info
*const *tmp
= (store_immediate_info
* const *) x
;
1847 store_immediate_info
*const *tmp2
= (store_immediate_info
* const *) y
;
1849 if ((*tmp
)->order
< (*tmp2
)->order
)
1851 else if ((*tmp
)->order
> (*tmp2
)->order
)
1857 /* Initialize a merged_store_group object from a store_immediate_info
1860 merged_store_group::merged_store_group (store_immediate_info
*info
)
1862 start
= info
->bitpos
;
1863 width
= info
->bitsize
;
1864 bitregion_start
= info
->bitregion_start
;
1865 bitregion_end
= info
->bitregion_end
;
1866 /* VAL has memory allocated for it in apply_stores once the group
1867 width has been finalized. */
1870 bit_insertion
= false;
1871 only_constants
= info
->rhs_code
== INTEGER_CST
;
1872 first_nonmergeable_order
= ~0U;
1873 lp_nr
= info
->lp_nr
;
1874 unsigned HOST_WIDE_INT align_bitpos
= 0;
1875 get_object_alignment_1 (gimple_assign_lhs (info
->stmt
),
1876 &align
, &align_bitpos
);
1877 align_base
= start
- align_bitpos
;
1878 for (int i
= 0; i
< 2; ++i
)
1880 store_operand_info
&op
= info
->ops
[i
];
1881 if (op
.base_addr
== NULL_TREE
)
1884 load_align_base
[i
] = 0;
1888 get_object_alignment_1 (op
.val
, &load_align
[i
], &align_bitpos
);
1889 load_align_base
[i
] = op
.bitpos
- align_bitpos
;
1893 stores
.safe_push (info
);
1894 last_stmt
= info
->stmt
;
1895 last_order
= info
->order
;
1896 first_stmt
= last_stmt
;
1897 first_order
= last_order
;
1901 merged_store_group::~merged_store_group ()
1907 /* Return true if the store described by INFO can be merged into the group. */
1910 merged_store_group::can_be_merged_into (store_immediate_info
*info
)
1912 /* Do not merge bswap patterns. */
1913 if (info
->rhs_code
== LROTATE_EXPR
)
1916 if (info
->lp_nr
!= lp_nr
)
1919 /* The canonical case. */
1920 if (info
->rhs_code
== stores
[0]->rhs_code
)
1923 /* BIT_INSERT_EXPR is compatible with INTEGER_CST. */
1924 if (info
->rhs_code
== BIT_INSERT_EXPR
&& stores
[0]->rhs_code
== INTEGER_CST
)
1927 if (stores
[0]->rhs_code
== BIT_INSERT_EXPR
&& info
->rhs_code
== INTEGER_CST
)
1930 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
1931 if (info
->rhs_code
== MEM_REF
1932 && (stores
[0]->rhs_code
== INTEGER_CST
1933 || stores
[0]->rhs_code
== BIT_INSERT_EXPR
)
1934 && info
->bitregion_start
== stores
[0]->bitregion_start
1935 && info
->bitregion_end
== stores
[0]->bitregion_end
)
1938 if (stores
[0]->rhs_code
== MEM_REF
1939 && (info
->rhs_code
== INTEGER_CST
1940 || info
->rhs_code
== BIT_INSERT_EXPR
)
1941 && info
->bitregion_start
== stores
[0]->bitregion_start
1942 && info
->bitregion_end
== stores
[0]->bitregion_end
)
1948 /* Helper method for merge_into and merge_overlapping to do
1952 merged_store_group::do_merge (store_immediate_info
*info
)
1954 bitregion_start
= MIN (bitregion_start
, info
->bitregion_start
);
1955 bitregion_end
= MAX (bitregion_end
, info
->bitregion_end
);
1957 unsigned int this_align
;
1958 unsigned HOST_WIDE_INT align_bitpos
= 0;
1959 get_object_alignment_1 (gimple_assign_lhs (info
->stmt
),
1960 &this_align
, &align_bitpos
);
1961 if (this_align
> align
)
1964 align_base
= info
->bitpos
- align_bitpos
;
1966 for (int i
= 0; i
< 2; ++i
)
1968 store_operand_info
&op
= info
->ops
[i
];
1972 get_object_alignment_1 (op
.val
, &this_align
, &align_bitpos
);
1973 if (this_align
> load_align
[i
])
1975 load_align
[i
] = this_align
;
1976 load_align_base
[i
] = op
.bitpos
- align_bitpos
;
1980 gimple
*stmt
= info
->stmt
;
1981 stores
.safe_push (info
);
1982 if (info
->order
> last_order
)
1984 last_order
= info
->order
;
1987 else if (info
->order
< first_order
)
1989 first_order
= info
->order
;
1992 if (info
->rhs_code
!= INTEGER_CST
)
1993 only_constants
= false;
1996 /* Merge a store recorded by INFO into this merged store.
1997 The store is not overlapping with the existing recorded
2001 merged_store_group::merge_into (store_immediate_info
*info
)
2003 /* Make sure we're inserting in the position we think we're inserting. */
2004 gcc_assert (info
->bitpos
>= start
+ width
2005 && info
->bitregion_start
<= bitregion_end
);
2007 width
= info
->bitpos
+ info
->bitsize
- start
;
2011 /* Merge a store described by INFO into this merged store.
2012 INFO overlaps in some way with the current store (i.e. it's not contiguous
2013 which is handled by merged_store_group::merge_into). */
2016 merged_store_group::merge_overlapping (store_immediate_info
*info
)
2018 /* If the store extends the size of the group, extend the width. */
2019 if (info
->bitpos
+ info
->bitsize
> start
+ width
)
2020 width
= info
->bitpos
+ info
->bitsize
- start
;
2025 /* Go through all the recorded stores in this group in program order and
2026 apply their values to the VAL byte array to create the final merged
2027 value. Return true if the operation succeeded. */
2030 merged_store_group::apply_stores ()
2032 /* Make sure we have more than one store in the group, otherwise we cannot
2034 if (bitregion_start
% BITS_PER_UNIT
!= 0
2035 || bitregion_end
% BITS_PER_UNIT
!= 0
2036 || stores
.length () == 1)
2039 stores
.qsort (sort_by_order
);
2040 store_immediate_info
*info
;
2042 /* Create a power-of-2-sized buffer for native_encode_expr. */
2043 buf_size
= 1 << ceil_log2 ((bitregion_end
- bitregion_start
) / BITS_PER_UNIT
);
2044 val
= XNEWVEC (unsigned char, 2 * buf_size
);
2045 mask
= val
+ buf_size
;
2046 memset (val
, 0, buf_size
);
2047 memset (mask
, ~0U, buf_size
);
2049 FOR_EACH_VEC_ELT (stores
, i
, info
)
2051 unsigned int pos_in_buffer
= info
->bitpos
- bitregion_start
;
2053 if (info
->ops
[0].val
&& info
->ops
[0].base_addr
== NULL_TREE
)
2054 cst
= info
->ops
[0].val
;
2055 else if (info
->ops
[1].val
&& info
->ops
[1].base_addr
== NULL_TREE
)
2056 cst
= info
->ops
[1].val
;
2062 if (info
->rhs_code
== BIT_INSERT_EXPR
)
2063 bit_insertion
= true;
2065 ret
= encode_tree_to_bitpos (cst
, val
, info
->bitsize
,
2066 pos_in_buffer
, buf_size
);
2068 unsigned char *m
= mask
+ (pos_in_buffer
/ BITS_PER_UNIT
);
2069 if (BYTES_BIG_ENDIAN
)
2070 clear_bit_region_be (m
, (BITS_PER_UNIT
- 1
2071 - (pos_in_buffer
% BITS_PER_UNIT
)),
2074 clear_bit_region (m
, pos_in_buffer
% BITS_PER_UNIT
, info
->bitsize
);
2075 if (cst
&& dump_file
&& (dump_flags
& TDF_DETAILS
))
2079 fputs ("After writing ", dump_file
);
2080 print_generic_expr (dump_file
, cst
, TDF_NONE
);
2081 fprintf (dump_file
, " of size " HOST_WIDE_INT_PRINT_DEC
2082 " at position %d\n", info
->bitsize
, pos_in_buffer
);
2083 fputs (" the merged value contains ", dump_file
);
2084 dump_char_array (dump_file
, val
, buf_size
);
2085 fputs (" the merged mask contains ", dump_file
);
2086 dump_char_array (dump_file
, mask
, buf_size
);
2088 fputs (" bit insertion is required\n", dump_file
);
2091 fprintf (dump_file
, "Failed to merge stores\n");
2096 stores
.qsort (sort_by_bitpos
);
2100 /* Structure describing the store chain. */
2102 class imm_store_chain_info
2105 /* Doubly-linked list that imposes an order on chain processing.
2106 PNXP (prev's next pointer) points to the head of a list, or to
2107 the next field in the previous chain in the list.
2108 See pass_store_merging::m_stores_head for more rationale. */
2109 imm_store_chain_info
*next
, **pnxp
;
2111 auto_vec
<store_immediate_info
*> m_store_info
;
2112 auto_vec
<merged_store_group
*> m_merged_store_groups
;
2114 imm_store_chain_info (imm_store_chain_info
*&inspt
, tree b_a
)
2115 : next (inspt
), pnxp (&inspt
), base_addr (b_a
)
2120 gcc_checking_assert (pnxp
== next
->pnxp
);
2124 ~imm_store_chain_info ()
2129 gcc_checking_assert (&next
== next
->pnxp
);
2133 bool terminate_and_process_chain ();
2134 bool try_coalesce_bswap (merged_store_group
*, unsigned int, unsigned int);
2135 bool coalesce_immediate_stores ();
2136 bool output_merged_store (merged_store_group
*);
2137 bool output_merged_stores ();
2140 const pass_data pass_data_tree_store_merging
= {
2141 GIMPLE_PASS
, /* type */
2142 "store-merging", /* name */
2143 OPTGROUP_NONE
, /* optinfo_flags */
2144 TV_GIMPLE_STORE_MERGING
, /* tv_id */
2145 PROP_ssa
, /* properties_required */
2146 0, /* properties_provided */
2147 0, /* properties_destroyed */
2148 0, /* todo_flags_start */
2149 TODO_update_ssa
, /* todo_flags_finish */
2152 class pass_store_merging
: public gimple_opt_pass
2155 pass_store_merging (gcc::context
*ctxt
)
2156 : gimple_opt_pass (pass_data_tree_store_merging
, ctxt
), m_stores_head ()
2160 /* Pass not supported for PDP-endian, nor for insane hosts or
2161 target character sizes where native_{encode,interpret}_expr
2162 doesn't work properly. */
2166 return flag_store_merging
2167 && BYTES_BIG_ENDIAN
== WORDS_BIG_ENDIAN
2169 && BITS_PER_UNIT
== 8;
2172 virtual unsigned int execute (function
*);
2175 hash_map
<tree_operand_hash
, class imm_store_chain_info
*> m_stores
;
2177 /* Form a doubly-linked stack of the elements of m_stores, so that
2178 we can iterate over them in a predictable way. Using this order
2179 avoids extraneous differences in the compiler output just because
2180 of tree pointer variations (e.g. different chains end up in
2181 different positions of m_stores, so they are handled in different
2182 orders, so they allocate or release SSA names in different
2183 orders, and when they get reused, subsequent passes end up
2184 getting different SSA names, which may ultimately change
2185 decisions when going out of SSA). */
2186 imm_store_chain_info
*m_stores_head
;
2188 bool process_store (gimple
*);
2189 bool terminate_and_process_chain (imm_store_chain_info
*);
2190 bool terminate_all_aliasing_chains (imm_store_chain_info
**, gimple
*);
2191 bool terminate_and_process_all_chains ();
2192 }; // class pass_store_merging
2194 /* Terminate and process all recorded chains. Return true if any changes
2198 pass_store_merging::terminate_and_process_all_chains ()
2201 while (m_stores_head
)
2202 ret
|= terminate_and_process_chain (m_stores_head
);
2203 gcc_assert (m_stores
.is_empty ());
2207 /* Terminate all chains that are affected by the statement STMT.
2208 CHAIN_INFO is the chain we should ignore from the checks if
2209 non-NULL. Return true if any changes were made. */
2212 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2218 /* If the statement doesn't touch memory it can't alias. */
2219 if (!gimple_vuse (stmt
))
2222 tree store_lhs
= gimple_store_p (stmt
) ? gimple_get_lhs (stmt
) : NULL_TREE
;
2223 for (imm_store_chain_info
*next
= m_stores_head
, *cur
= next
; cur
; cur
= next
)
2227 /* We already checked all the stores in chain_info and terminated the
2228 chain if necessary. Skip it here. */
2229 if (chain_info
&& *chain_info
== cur
)
2232 store_immediate_info
*info
;
2234 FOR_EACH_VEC_ELT (cur
->m_store_info
, i
, info
)
2236 tree lhs
= gimple_assign_lhs (info
->stmt
);
2237 if (ref_maybe_used_by_stmt_p (stmt
, lhs
)
2238 || stmt_may_clobber_ref_p (stmt
, lhs
)
2239 || (store_lhs
&& refs_output_dependent_p (store_lhs
, lhs
)))
2241 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2243 fprintf (dump_file
, "stmt causes chain termination:\n");
2244 print_gimple_stmt (dump_file
, stmt
, 0);
2246 ret
|= terminate_and_process_chain (cur
);
2255 /* Helper function. Terminate the recorded chain storing to base object
2256 BASE. Return true if the merging and output was successful. The m_stores
2257 entry is removed after the processing in any case. */
2260 pass_store_merging::terminate_and_process_chain (imm_store_chain_info
*chain_info
)
2262 bool ret
= chain_info
->terminate_and_process_chain ();
2263 m_stores
.remove (chain_info
->base_addr
);
2268 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2269 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2270 be able to sink load of REF across stores between FIRST and LAST, up
2271 to right before LAST. */
2274 stmts_may_clobber_ref_p (gimple
*first
, gimple
*last
, tree ref
)
2277 ao_ref_init (&r
, ref
);
2278 unsigned int count
= 0;
2279 tree vop
= gimple_vdef (last
);
2282 /* Return true conservatively if the basic blocks are different. */
2283 if (gimple_bb (first
) != gimple_bb (last
))
2288 stmt
= SSA_NAME_DEF_STMT (vop
);
2289 if (stmt_may_clobber_ref_p_1 (stmt
, &r
))
2291 if (gimple_store_p (stmt
)
2292 && refs_anti_dependent_p (ref
, gimple_get_lhs (stmt
)))
2294 /* Avoid quadratic compile time by bounding the number of checks
2296 if (++count
> MAX_STORE_ALIAS_CHECKS
)
2298 vop
= gimple_vuse (stmt
);
2300 while (stmt
!= first
);
2305 /* Return true if INFO->ops[IDX] is mergeable with the
2306 corresponding loads already in MERGED_STORE group.
2307 BASE_ADDR is the base address of the whole store group. */
2310 compatible_load_p (merged_store_group
*merged_store
,
2311 store_immediate_info
*info
,
2312 tree base_addr
, int idx
)
2314 store_immediate_info
*infof
= merged_store
->stores
[0];
2315 if (!info
->ops
[idx
].base_addr
2316 || maybe_ne (info
->ops
[idx
].bitpos
- infof
->ops
[idx
].bitpos
,
2317 info
->bitpos
- infof
->bitpos
)
2318 || !operand_equal_p (info
->ops
[idx
].base_addr
,
2319 infof
->ops
[idx
].base_addr
, 0))
2322 store_immediate_info
*infol
= merged_store
->stores
.last ();
2323 tree load_vuse
= gimple_vuse (info
->ops
[idx
].stmt
);
2324 /* In this case all vuses should be the same, e.g.
2325 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2327 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2328 and we can emit the coalesced load next to any of those loads. */
2329 if (gimple_vuse (infof
->ops
[idx
].stmt
) == load_vuse
2330 && gimple_vuse (infol
->ops
[idx
].stmt
) == load_vuse
)
2333 /* Otherwise, at least for now require that the load has the same
2334 vuse as the store. See following examples. */
2335 if (gimple_vuse (info
->stmt
) != load_vuse
)
2338 if (gimple_vuse (infof
->stmt
) != gimple_vuse (infof
->ops
[idx
].stmt
)
2340 && gimple_vuse (infol
->stmt
) != gimple_vuse (infol
->ops
[idx
].stmt
)))
2343 /* If the load is from the same location as the store, already
2344 the construction of the immediate chain info guarantees no intervening
2345 stores, so no further checks are needed. Example:
2346 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
2347 if (known_eq (info
->ops
[idx
].bitpos
, info
->bitpos
)
2348 && operand_equal_p (info
->ops
[idx
].base_addr
, base_addr
, 0))
2351 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2352 of the stores in the group, or any other stores in between those.
2353 Previous calls to compatible_load_p ensured that for all the
2354 merged_store->stores IDX loads, no stmts starting with
2355 merged_store->first_stmt and ending right before merged_store->last_stmt
2356 clobbers those loads. */
2357 gimple
*first
= merged_store
->first_stmt
;
2358 gimple
*last
= merged_store
->last_stmt
;
2360 store_immediate_info
*infoc
;
2361 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2362 comes before the so far first load, we'll be changing
2363 merged_store->first_stmt. In that case we need to give up if
2364 any of the earlier processed loads clobber with the stmts in the new
2366 if (info
->order
< merged_store
->first_order
)
2368 FOR_EACH_VEC_ELT (merged_store
->stores
, i
, infoc
)
2369 if (stmts_may_clobber_ref_p (info
->stmt
, first
, infoc
->ops
[idx
].val
))
2373 /* Similarly, we could change merged_store->last_stmt, so ensure
2374 in that case no stmts in the new range clobber any of the earlier
2376 else if (info
->order
> merged_store
->last_order
)
2378 FOR_EACH_VEC_ELT (merged_store
->stores
, i
, infoc
)
2379 if (stmts_may_clobber_ref_p (last
, info
->stmt
, infoc
->ops
[idx
].val
))
2383 /* And finally, we'd be adding a new load to the set, ensure it isn't
2384 clobbered in the new range. */
2385 if (stmts_may_clobber_ref_p (first
, last
, info
->ops
[idx
].val
))
2388 /* Otherwise, we are looking for:
2389 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2391 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2395 /* Add all refs loaded to compute VAL to REFS vector. */
2398 gather_bswap_load_refs (vec
<tree
> *refs
, tree val
)
2400 if (TREE_CODE (val
) != SSA_NAME
)
2403 gimple
*stmt
= SSA_NAME_DEF_STMT (val
);
2404 if (!is_gimple_assign (stmt
))
2407 if (gimple_assign_load_p (stmt
))
2409 refs
->safe_push (gimple_assign_rhs1 (stmt
));
2413 switch (gimple_assign_rhs_class (stmt
))
2415 case GIMPLE_BINARY_RHS
:
2416 gather_bswap_load_refs (refs
, gimple_assign_rhs2 (stmt
));
2418 case GIMPLE_UNARY_RHS
:
2419 gather_bswap_load_refs (refs
, gimple_assign_rhs1 (stmt
));
2426 /* Check if there are any stores in M_STORE_INFO after index I
2427 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2428 a potential group ending with END that have their order
2429 smaller than LAST_ORDER. RHS_CODE is the kind of store in the
2430 group. Return true if there are no such stores.
2432 MEM[(long long int *)p_28] = 0;
2433 MEM[(long long int *)p_28 + 8B] = 0;
2434 MEM[(long long int *)p_28 + 16B] = 0;
2435 MEM[(long long int *)p_28 + 24B] = 0;
2437 MEM[(int *)p_28 + 8B] = _129;
2438 MEM[(int *)p_28].a = -1;
2440 MEM[(long long int *)p_28] = 0;
2441 MEM[(int *)p_28].a = -1;
2442 stmts in the current group and need to consider if it is safe to
2443 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2444 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2445 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2446 into the group and merging of those 3 stores is successful, merged
2447 stmts will be emitted at the latest store from that group, i.e.
2448 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2449 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2450 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2451 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2452 into the group. That way it will be its own store group and will
2453 not be touched. If RHS_CODE is INTEGER_CST and there are overlapping
2454 INTEGER_CST stores, those are mergeable using merge_overlapping,
2455 so don't return false for those. */
2458 check_no_overlap (vec
<store_immediate_info
*> m_store_info
, unsigned int i
,
2459 enum tree_code rhs_code
, unsigned int last_order
,
2460 unsigned HOST_WIDE_INT end
)
2462 unsigned int len
= m_store_info
.length ();
2463 for (++i
; i
< len
; ++i
)
2465 store_immediate_info
*info
= m_store_info
[i
];
2466 if (info
->bitpos
>= end
)
2468 if (info
->order
< last_order
2469 && (rhs_code
!= INTEGER_CST
|| info
->rhs_code
!= INTEGER_CST
))
2475 /* Return true if m_store_info[first] and at least one following store
2476 form a group which store try_size bitsize value which is byte swapped
2477 from a memory load or some value, or identity from some value.
2478 This uses the bswap pass APIs. */
2481 imm_store_chain_info::try_coalesce_bswap (merged_store_group
*merged_store
,
2483 unsigned int try_size
)
2485 unsigned int len
= m_store_info
.length (), last
= first
;
2486 unsigned HOST_WIDE_INT width
= m_store_info
[first
]->bitsize
;
2487 if (width
>= try_size
)
2489 for (unsigned int i
= first
+ 1; i
< len
; ++i
)
2491 if (m_store_info
[i
]->bitpos
!= m_store_info
[first
]->bitpos
+ width
2492 || m_store_info
[i
]->ins_stmt
== NULL
)
2494 width
+= m_store_info
[i
]->bitsize
;
2495 if (width
>= try_size
)
2501 if (width
!= try_size
)
2504 bool allow_unaligned
2505 = !STRICT_ALIGNMENT
&& PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED
);
2506 /* Punt if the combined store would not be aligned and we need alignment. */
2507 if (!allow_unaligned
)
2509 unsigned int align
= merged_store
->align
;
2510 unsigned HOST_WIDE_INT align_base
= merged_store
->align_base
;
2511 for (unsigned int i
= first
+ 1; i
<= last
; ++i
)
2513 unsigned int this_align
;
2514 unsigned HOST_WIDE_INT align_bitpos
= 0;
2515 get_object_alignment_1 (gimple_assign_lhs (m_store_info
[i
]->stmt
),
2516 &this_align
, &align_bitpos
);
2517 if (this_align
> align
)
2520 align_base
= m_store_info
[i
]->bitpos
- align_bitpos
;
2523 unsigned HOST_WIDE_INT align_bitpos
2524 = (m_store_info
[first
]->bitpos
- align_base
) & (align
- 1);
2526 align
= least_bit_hwi (align_bitpos
);
2527 if (align
< try_size
)
2534 case 16: type
= uint16_type_node
; break;
2535 case 32: type
= uint32_type_node
; break;
2536 case 64: type
= uint64_type_node
; break;
2537 default: gcc_unreachable ();
2539 struct symbolic_number n
;
2540 gimple
*ins_stmt
= NULL
;
2541 int vuse_store
= -1;
2542 unsigned int first_order
= merged_store
->first_order
;
2543 unsigned int last_order
= merged_store
->last_order
;
2544 gimple
*first_stmt
= merged_store
->first_stmt
;
2545 gimple
*last_stmt
= merged_store
->last_stmt
;
2546 unsigned HOST_WIDE_INT end
= merged_store
->start
+ merged_store
->width
;
2547 store_immediate_info
*infof
= m_store_info
[first
];
2549 for (unsigned int i
= first
; i
<= last
; ++i
)
2551 store_immediate_info
*info
= m_store_info
[i
];
2552 struct symbolic_number this_n
= info
->n
;
2554 if (!this_n
.base_addr
)
2555 this_n
.range
= try_size
/ BITS_PER_UNIT
;
2557 /* Update vuse in case it has changed by output_merged_stores. */
2558 this_n
.vuse
= gimple_vuse (info
->ins_stmt
);
2559 unsigned int bitpos
= info
->bitpos
- infof
->bitpos
;
2560 if (!do_shift_rotate (LSHIFT_EXPR
, &this_n
,
2562 ? try_size
- info
->bitsize
- bitpos
2565 if (this_n
.base_addr
&& vuse_store
)
2568 for (j
= first
; j
<= last
; ++j
)
2569 if (this_n
.vuse
== gimple_vuse (m_store_info
[j
]->stmt
))
2573 if (vuse_store
== 1)
2581 ins_stmt
= info
->ins_stmt
;
2585 if (n
.base_addr
&& n
.vuse
!= this_n
.vuse
)
2587 if (vuse_store
== 0)
2591 if (info
->order
> last_order
)
2593 last_order
= info
->order
;
2594 last_stmt
= info
->stmt
;
2596 else if (info
->order
< first_order
)
2598 first_order
= info
->order
;
2599 first_stmt
= info
->stmt
;
2601 end
= MAX (end
, info
->bitpos
+ info
->bitsize
);
2603 ins_stmt
= perform_symbolic_merge (ins_stmt
, &n
, info
->ins_stmt
,
2605 if (ins_stmt
== NULL
)
2610 uint64_t cmpxchg
, cmpnop
;
2611 find_bswap_or_nop_finalize (&n
, &cmpxchg
, &cmpnop
);
2613 /* A complete byte swap should make the symbolic number to start with
2614 the largest digit in the highest order byte. Unchanged symbolic
2615 number indicates a read with same endianness as target architecture. */
2616 if (n
.n
!= cmpnop
&& n
.n
!= cmpxchg
)
2619 if (n
.base_addr
== NULL_TREE
&& !is_gimple_val (n
.src
))
2622 if (!check_no_overlap (m_store_info
, last
, LROTATE_EXPR
, last_order
, end
))
2625 /* Don't handle memory copy this way if normal non-bswap processing
2626 would handle it too. */
2627 if (n
.n
== cmpnop
&& (unsigned) n
.n_ops
== last
- first
+ 1)
2630 for (i
= first
; i
<= last
; ++i
)
2631 if (m_store_info
[i
]->rhs_code
!= MEM_REF
)
2641 /* Will emit LROTATE_EXPR. */
2644 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
2645 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
)
2649 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
2650 && optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
)
2657 if (!allow_unaligned
&& n
.base_addr
)
2659 unsigned int align
= get_object_alignment (n
.src
);
2660 if (align
< try_size
)
2664 /* If each load has vuse of the corresponding store, need to verify
2665 the loads can be sunk right before the last store. */
2666 if (vuse_store
== 1)
2668 auto_vec
<tree
, 64> refs
;
2669 for (unsigned int i
= first
; i
<= last
; ++i
)
2670 gather_bswap_load_refs (&refs
,
2671 gimple_assign_rhs1 (m_store_info
[i
]->stmt
));
2675 FOR_EACH_VEC_ELT (refs
, i
, ref
)
2676 if (stmts_may_clobber_ref_p (first_stmt
, last_stmt
, ref
))
2682 infof
->ins_stmt
= ins_stmt
;
2683 for (unsigned int i
= first
; i
<= last
; ++i
)
2685 m_store_info
[i
]->rhs_code
= n
.n
== cmpxchg
? LROTATE_EXPR
: NOP_EXPR
;
2686 m_store_info
[i
]->ops
[0].base_addr
= NULL_TREE
;
2687 m_store_info
[i
]->ops
[1].base_addr
= NULL_TREE
;
2689 merged_store
->merge_into (m_store_info
[i
]);
2695 /* Go through the candidate stores recorded in m_store_info and merge them
2696 into merged_store_group objects recorded into m_merged_store_groups
2697 representing the widened stores. Return true if coalescing was successful
2698 and the number of widened stores is fewer than the original number
2702 imm_store_chain_info::coalesce_immediate_stores ()
2704 /* Anything less can't be processed. */
2705 if (m_store_info
.length () < 2)
2708 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2709 fprintf (dump_file
, "Attempting to coalesce %u stores in chain\n",
2710 m_store_info
.length ());
2712 store_immediate_info
*info
;
2713 unsigned int i
, ignore
= 0;
2715 /* Order the stores by the bitposition they write to. */
2716 m_store_info
.qsort (sort_by_bitpos
);
2718 info
= m_store_info
[0];
2719 merged_store_group
*merged_store
= new merged_store_group (info
);
2720 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2721 fputs ("New store group\n", dump_file
);
2723 FOR_EACH_VEC_ELT (m_store_info
, i
, info
)
2725 unsigned HOST_WIDE_INT new_bitregion_start
, new_bitregion_end
;
2730 /* First try to handle group of stores like:
2735 using the bswap framework. */
2736 if (info
->bitpos
== merged_store
->start
+ merged_store
->width
2737 && merged_store
->stores
.length () == 1
2738 && merged_store
->stores
[0]->ins_stmt
!= NULL
2739 && info
->ins_stmt
!= NULL
)
2741 unsigned int try_size
;
2742 for (try_size
= 64; try_size
>= 16; try_size
>>= 1)
2743 if (try_coalesce_bswap (merged_store
, i
- 1, try_size
))
2748 ignore
= i
+ merged_store
->stores
.length () - 1;
2749 m_merged_store_groups
.safe_push (merged_store
);
2750 if (ignore
< m_store_info
.length ())
2751 merged_store
= new merged_store_group (m_store_info
[ignore
]);
2753 merged_store
= NULL
;
2759 = MIN (merged_store
->bitregion_start
, info
->bitregion_start
);
2761 = MAX (merged_store
->bitregion_end
, info
->bitregion_end
);
2763 if (info
->order
>= merged_store
->first_nonmergeable_order
2764 || (((new_bitregion_end
- new_bitregion_start
+ 1) / BITS_PER_UNIT
)
2765 > (unsigned) PARAM_VALUE (PARAM_STORE_MERGING_MAX_SIZE
)))
2770 Overlapping stores. */
2771 else if (IN_RANGE (info
->bitpos
, merged_store
->start
,
2772 merged_store
->start
+ merged_store
->width
- 1))
2774 /* Only allow overlapping stores of constants. */
2775 if (info
->rhs_code
== INTEGER_CST
2776 && merged_store
->only_constants
2777 && info
->lp_nr
== merged_store
->lp_nr
)
2779 unsigned int last_order
2780 = MAX (merged_store
->last_order
, info
->order
);
2781 unsigned HOST_WIDE_INT end
2782 = MAX (merged_store
->start
+ merged_store
->width
,
2783 info
->bitpos
+ info
->bitsize
);
2784 if (check_no_overlap (m_store_info
, i
, INTEGER_CST
,
2787 /* check_no_overlap call above made sure there are no
2788 overlapping stores with non-INTEGER_CST rhs_code
2789 in between the first and last of the stores we've
2790 just merged. If there are any INTEGER_CST rhs_code
2791 stores in between, we need to merge_overlapping them
2792 even if in the sort_by_bitpos order there are other
2793 overlapping stores in between. Keep those stores as is.
2795 MEM[(int *)p_28] = 0;
2796 MEM[(char *)p_28 + 3B] = 1;
2797 MEM[(char *)p_28 + 1B] = 2;
2798 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
2799 We can't merge the zero store with the store of two and
2800 not merge anything else, because the store of one is
2801 in the original order in between those two, but in
2802 store_by_bitpos order it comes after the last store that
2803 we can't merge with them. We can merge the first 3 stores
2804 and keep the last store as is though. */
2805 unsigned int len
= m_store_info
.length ();
2806 unsigned int try_order
= last_order
;
2807 unsigned int first_nonmergeable_order
;
2809 bool last_iter
= false;
2813 unsigned int max_order
= 0;
2814 unsigned first_nonmergeable_int_order
= ~0U;
2815 unsigned HOST_WIDE_INT this_end
= end
;
2817 first_nonmergeable_order
= ~0U;
2818 for (unsigned int j
= i
+ 1; j
< len
; ++j
)
2820 store_immediate_info
*info2
= m_store_info
[j
];
2821 if (info2
->bitpos
>= this_end
)
2823 if (info2
->order
< try_order
)
2825 if (info2
->rhs_code
!= INTEGER_CST
)
2827 /* Normally check_no_overlap makes sure this
2828 doesn't happen, but if end grows below,
2829 then we need to process more stores than
2830 check_no_overlap verified. Example:
2831 MEM[(int *)p_5] = 0;
2832 MEM[(short *)p_5 + 3B] = 1;
2833 MEM[(char *)p_5 + 4B] = _9;
2834 MEM[(char *)p_5 + 2B] = 2; */
2839 this_end
= MAX (this_end
,
2840 info2
->bitpos
+ info2
->bitsize
);
2842 else if (info2
->rhs_code
== INTEGER_CST
2845 max_order
= MAX (max_order
, info2
->order
+ 1);
2846 first_nonmergeable_int_order
2847 = MIN (first_nonmergeable_int_order
,
2851 first_nonmergeable_order
2852 = MIN (first_nonmergeable_order
, info2
->order
);
2856 if (last_order
== try_order
)
2858 /* If this failed, but only because we grew
2859 try_order, retry with the last working one,
2860 so that we merge at least something. */
2861 try_order
= last_order
;
2865 last_order
= try_order
;
2866 /* Retry with a larger try_order to see if we could
2867 merge some further INTEGER_CST stores. */
2869 && (first_nonmergeable_int_order
2870 < first_nonmergeable_order
))
2872 try_order
= MIN (max_order
,
2873 first_nonmergeable_order
);
2876 merged_store
->first_nonmergeable_order
);
2877 if (try_order
> last_order
&& ++attempts
< 16)
2880 first_nonmergeable_order
2881 = MIN (first_nonmergeable_order
,
2882 first_nonmergeable_int_order
);
2890 merged_store
->merge_overlapping (info
);
2892 merged_store
->first_nonmergeable_order
2893 = MIN (merged_store
->first_nonmergeable_order
,
2894 first_nonmergeable_order
);
2896 for (unsigned int j
= i
+ 1; j
<= k
; j
++)
2898 store_immediate_info
*info2
= m_store_info
[j
];
2899 gcc_assert (info2
->bitpos
< end
);
2900 if (info2
->order
< last_order
)
2902 gcc_assert (info2
->rhs_code
== INTEGER_CST
);
2904 merged_store
->merge_overlapping (info2
);
2906 /* Other stores are kept and not merged in any
2915 /* |---store 1---||---store 2---|
2916 This store is consecutive to the previous one.
2917 Merge it into the current store group. There can be gaps in between
2918 the stores, but there can't be gaps in between bitregions. */
2919 else if (info
->bitregion_start
<= merged_store
->bitregion_end
2920 && merged_store
->can_be_merged_into (info
))
2922 store_immediate_info
*infof
= merged_store
->stores
[0];
2924 /* All the rhs_code ops that take 2 operands are commutative,
2925 swap the operands if it could make the operands compatible. */
2926 if (infof
->ops
[0].base_addr
2927 && infof
->ops
[1].base_addr
2928 && info
->ops
[0].base_addr
2929 && info
->ops
[1].base_addr
2930 && known_eq (info
->ops
[1].bitpos
- infof
->ops
[0].bitpos
,
2931 info
->bitpos
- infof
->bitpos
)
2932 && operand_equal_p (info
->ops
[1].base_addr
,
2933 infof
->ops
[0].base_addr
, 0))
2935 std::swap (info
->ops
[0], info
->ops
[1]);
2936 info
->ops_swapped_p
= true;
2938 if (check_no_overlap (m_store_info
, i
, info
->rhs_code
,
2939 MAX (merged_store
->last_order
, info
->order
),
2940 MAX (merged_store
->start
+ merged_store
->width
,
2941 info
->bitpos
+ info
->bitsize
)))
2943 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
2944 if (info
->rhs_code
== MEM_REF
&& infof
->rhs_code
!= MEM_REF
)
2946 info
->rhs_code
= BIT_INSERT_EXPR
;
2947 info
->ops
[0].val
= gimple_assign_rhs1 (info
->stmt
);
2948 info
->ops
[0].base_addr
= NULL_TREE
;
2950 else if (infof
->rhs_code
== MEM_REF
&& info
->rhs_code
!= MEM_REF
)
2952 store_immediate_info
*infoj
;
2954 FOR_EACH_VEC_ELT (merged_store
->stores
, j
, infoj
)
2956 infoj
->rhs_code
= BIT_INSERT_EXPR
;
2957 infoj
->ops
[0].val
= gimple_assign_rhs1 (infoj
->stmt
);
2958 infoj
->ops
[0].base_addr
= NULL_TREE
;
2961 if ((infof
->ops
[0].base_addr
2962 ? compatible_load_p (merged_store
, info
, base_addr
, 0)
2963 : !info
->ops
[0].base_addr
)
2964 && (infof
->ops
[1].base_addr
2965 ? compatible_load_p (merged_store
, info
, base_addr
, 1)
2966 : !info
->ops
[1].base_addr
))
2968 merged_store
->merge_into (info
);
2974 /* |---store 1---| <gap> |---store 2---|.
2975 Gap between stores or the rhs not compatible. Start a new group. */
2977 /* Try to apply all the stores recorded for the group to determine
2978 the bitpattern they write and discard it if that fails.
2979 This will also reject single-store groups. */
2980 if (merged_store
->apply_stores ())
2981 m_merged_store_groups
.safe_push (merged_store
);
2983 delete merged_store
;
2985 merged_store
= new merged_store_group (info
);
2986 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2987 fputs ("New store group\n", dump_file
);
2990 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2992 fprintf (dump_file
, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
2993 " bitpos:" HOST_WIDE_INT_PRINT_DEC
" val:",
2994 i
, info
->bitsize
, info
->bitpos
);
2995 print_generic_expr (dump_file
, gimple_assign_rhs1 (info
->stmt
));
2996 fputc ('\n', dump_file
);
3000 /* Record or discard the last store group. */
3003 if (merged_store
->apply_stores ())
3004 m_merged_store_groups
.safe_push (merged_store
);
3006 delete merged_store
;
3009 gcc_assert (m_merged_store_groups
.length () <= m_store_info
.length ());
3012 = !m_merged_store_groups
.is_empty ()
3013 && m_merged_store_groups
.length () < m_store_info
.length ();
3015 if (success
&& dump_file
)
3016 fprintf (dump_file
, "Coalescing successful!\nMerged into %u stores\n",
3017 m_merged_store_groups
.length ());
3022 /* Return the type to use for the merged stores or loads described by STMTS.
3023 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3024 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3025 of the MEM_REFs if any. */
3028 get_alias_type_for_stmts (vec
<gimple
*> &stmts
, bool is_load
,
3029 unsigned short *cliquep
, unsigned short *basep
)
3033 tree type
= NULL_TREE
;
3034 tree ret
= NULL_TREE
;
3038 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
3040 tree ref
= is_load
? gimple_assign_rhs1 (stmt
)
3041 : gimple_assign_lhs (stmt
);
3042 tree type1
= reference_alias_ptr_type (ref
);
3043 tree base
= get_base_address (ref
);
3047 if (TREE_CODE (base
) == MEM_REF
)
3049 *cliquep
= MR_DEPENDENCE_CLIQUE (base
);
3050 *basep
= MR_DEPENDENCE_BASE (base
);
3055 if (!alias_ptr_types_compatible_p (type
, type1
))
3056 ret
= ptr_type_node
;
3057 if (TREE_CODE (base
) != MEM_REF
3058 || *cliquep
!= MR_DEPENDENCE_CLIQUE (base
)
3059 || *basep
!= MR_DEPENDENCE_BASE (base
))
3068 /* Return the location_t information we can find among the statements
3072 get_location_for_stmts (vec
<gimple
*> &stmts
)
3077 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
3078 if (gimple_has_location (stmt
))
3079 return gimple_location (stmt
);
3081 return UNKNOWN_LOCATION
;
3084 /* Used to decribe a store resulting from splitting a wide store in smaller
3085 regularly-sized stores in split_group. */
3090 unsigned HOST_WIDE_INT bytepos
;
3091 unsigned HOST_WIDE_INT size
;
3092 unsigned HOST_WIDE_INT align
;
3093 auto_vec
<store_immediate_info
*> orig_stores
;
3094 /* True if there is a single orig stmt covering the whole split store. */
3096 split_store (unsigned HOST_WIDE_INT
, unsigned HOST_WIDE_INT
,
3097 unsigned HOST_WIDE_INT
);
3100 /* Simple constructor. */
3102 split_store::split_store (unsigned HOST_WIDE_INT bp
,
3103 unsigned HOST_WIDE_INT sz
,
3104 unsigned HOST_WIDE_INT al
)
3105 : bytepos (bp
), size (sz
), align (al
), orig (false)
3107 orig_stores
.create (0);
3110 /* Record all stores in GROUP that write to the region starting at BITPOS and
3111 is of size BITSIZE. Record infos for such statements in STORES if
3112 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
3113 if there is exactly one original store in the range. */
3115 static store_immediate_info
*
3116 find_constituent_stores (class merged_store_group
*group
,
3117 vec
<store_immediate_info
*> *stores
,
3118 unsigned int *first
,
3119 unsigned HOST_WIDE_INT bitpos
,
3120 unsigned HOST_WIDE_INT bitsize
)
3122 store_immediate_info
*info
, *ret
= NULL
;
3124 bool second
= false;
3125 bool update_first
= true;
3126 unsigned HOST_WIDE_INT end
= bitpos
+ bitsize
;
3127 for (i
= *first
; group
->stores
.iterate (i
, &info
); ++i
)
3129 unsigned HOST_WIDE_INT stmt_start
= info
->bitpos
;
3130 unsigned HOST_WIDE_INT stmt_end
= stmt_start
+ info
->bitsize
;
3131 if (stmt_end
<= bitpos
)
3133 /* BITPOS passed to this function never decreases from within the
3134 same split_group call, so optimize and don't scan info records
3135 which are known to end before or at BITPOS next time.
3136 Only do it if all stores before this one also pass this. */
3142 update_first
= false;
3144 /* The stores in GROUP are ordered by bitposition so if we're past
3145 the region for this group return early. */
3146 if (stmt_start
>= end
)
3151 stores
->safe_push (info
);
3166 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3167 store have multiple uses. If any SSA_NAME has multiple uses, also
3168 count statements needed to compute it. */
3171 count_multiple_uses (store_immediate_info
*info
)
3173 gimple
*stmt
= info
->stmt
;
3175 switch (info
->rhs_code
)
3182 if (info
->bit_not_p
)
3184 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3185 ret
= 1; /* Fall through below to return
3186 the BIT_NOT_EXPR stmt and then
3187 BIT_{AND,IOR,XOR}_EXPR and anything it
3190 /* stmt is after this the BIT_NOT_EXPR. */
3191 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3193 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3195 ret
+= 1 + info
->ops
[0].bit_not_p
;
3196 if (info
->ops
[1].base_addr
)
3197 ret
+= 1 + info
->ops
[1].bit_not_p
;
3200 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3201 /* stmt is now the BIT_*_EXPR. */
3202 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3203 ret
+= 1 + info
->ops
[info
->ops_swapped_p
].bit_not_p
;
3204 else if (info
->ops
[info
->ops_swapped_p
].bit_not_p
)
3206 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3207 if (!has_single_use (gimple_assign_rhs1 (stmt2
)))
3210 if (info
->ops
[1].base_addr
== NULL_TREE
)
3212 gcc_checking_assert (!info
->ops_swapped_p
);
3215 if (!has_single_use (gimple_assign_rhs2 (stmt
)))
3216 ret
+= 1 + info
->ops
[1 - info
->ops_swapped_p
].bit_not_p
;
3217 else if (info
->ops
[1 - info
->ops_swapped_p
].bit_not_p
)
3219 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3220 if (!has_single_use (gimple_assign_rhs1 (stmt2
)))
3225 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3226 return 1 + info
->ops
[0].bit_not_p
;
3227 else if (info
->ops
[0].bit_not_p
)
3229 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3230 if (!has_single_use (gimple_assign_rhs1 (stmt
)))
3234 case BIT_INSERT_EXPR
:
3235 return has_single_use (gimple_assign_rhs1 (stmt
)) ? 0 : 1;
3241 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3242 vector (if non-NULL) with split_store structs describing the byte offset
3243 (from the base), the bit size and alignment of each store as well as the
3244 original statements involved in each such split group.
3245 This is to separate the splitting strategy from the statement
3246 building/emission/linking done in output_merged_store.
3247 Return number of new stores.
3248 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3249 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3250 BZERO_FIRST may be true only when the first store covers the whole group
3251 and clears it; if BZERO_FIRST is true, keep that first store in the set
3252 unmodified and emit further stores for the overrides only.
3253 If SPLIT_STORES is NULL, it is just a dry run to count number of
3257 split_group (merged_store_group
*group
, bool allow_unaligned_store
,
3258 bool allow_unaligned_load
, bool bzero_first
,
3259 vec
<split_store
*> *split_stores
,
3260 unsigned *total_orig
,
3261 unsigned *total_new
)
3263 unsigned HOST_WIDE_INT pos
= group
->bitregion_start
;
3264 unsigned HOST_WIDE_INT size
= group
->bitregion_end
- pos
;
3265 unsigned HOST_WIDE_INT bytepos
= pos
/ BITS_PER_UNIT
;
3266 unsigned HOST_WIDE_INT group_align
= group
->align
;
3267 unsigned HOST_WIDE_INT align_base
= group
->align_base
;
3268 unsigned HOST_WIDE_INT group_load_align
= group_align
;
3269 bool any_orig
= false;
3271 gcc_assert ((size
% BITS_PER_UNIT
== 0) && (pos
% BITS_PER_UNIT
== 0));
3273 if (group
->stores
[0]->rhs_code
== LROTATE_EXPR
3274 || group
->stores
[0]->rhs_code
== NOP_EXPR
)
3276 gcc_assert (!bzero_first
);
3277 /* For bswap framework using sets of stores, all the checking
3278 has been done earlier in try_coalesce_bswap and needs to be
3279 emitted as a single store. */
3282 /* Avoid the old/new stmt count heuristics. It should be
3283 always beneficial. */
3290 unsigned HOST_WIDE_INT align_bitpos
3291 = (group
->start
- align_base
) & (group_align
- 1);
3292 unsigned HOST_WIDE_INT align
= group_align
;
3294 align
= least_bit_hwi (align_bitpos
);
3295 bytepos
= group
->start
/ BITS_PER_UNIT
;
3297 = new split_store (bytepos
, group
->width
, align
);
3298 unsigned int first
= 0;
3299 find_constituent_stores (group
, &store
->orig_stores
,
3300 &first
, group
->start
, group
->width
);
3301 split_stores
->safe_push (store
);
3307 unsigned int ret
= 0, first
= 0;
3308 unsigned HOST_WIDE_INT try_pos
= bytepos
;
3313 store_immediate_info
*info
= group
->stores
[0];
3316 total_orig
[0] = 1; /* The orig store. */
3317 info
= group
->stores
[0];
3318 if (info
->ops
[0].base_addr
)
3320 if (info
->ops
[1].base_addr
)
3322 switch (info
->rhs_code
)
3327 total_orig
[0]++; /* The orig BIT_*_EXPR stmt. */
3332 total_orig
[0] *= group
->stores
.length ();
3334 FOR_EACH_VEC_ELT (group
->stores
, i
, info
)
3336 total_new
[0] += count_multiple_uses (info
);
3337 total_orig
[0] += (info
->bit_not_p
3338 + info
->ops
[0].bit_not_p
3339 + info
->ops
[1].bit_not_p
);
3343 if (!allow_unaligned_load
)
3344 for (int i
= 0; i
< 2; ++i
)
3345 if (group
->load_align
[i
])
3346 group_load_align
= MIN (group_load_align
, group
->load_align
[i
]);
3355 = new split_store (bytepos
, group
->stores
[0]->bitsize
, align_base
);
3356 store
->orig_stores
.safe_push (group
->stores
[0]);
3359 split_stores
->safe_push (store
);
3365 if ((allow_unaligned_store
|| group_align
<= BITS_PER_UNIT
)
3366 && (group
->mask
[try_pos
- bytepos
] == (unsigned char) ~0U
3367 || (bzero_first
&& group
->val
[try_pos
- bytepos
] == 0)))
3369 /* Skip padding bytes. */
3371 size
-= BITS_PER_UNIT
;
3375 unsigned HOST_WIDE_INT try_bitpos
= try_pos
* BITS_PER_UNIT
;
3376 unsigned int try_size
= MAX_STORE_BITSIZE
, nonmasked
;
3377 unsigned HOST_WIDE_INT align_bitpos
3378 = (try_bitpos
- align_base
) & (group_align
- 1);
3379 unsigned HOST_WIDE_INT align
= group_align
;
3381 align
= least_bit_hwi (align_bitpos
);
3382 if (!allow_unaligned_store
)
3383 try_size
= MIN (try_size
, align
);
3384 if (!allow_unaligned_load
)
3386 /* If we can't do or don't want to do unaligned stores
3387 as well as loads, we need to take the loads into account
3389 unsigned HOST_WIDE_INT load_align
= group_load_align
;
3390 align_bitpos
= (try_bitpos
- align_base
) & (load_align
- 1);
3392 load_align
= least_bit_hwi (align_bitpos
);
3393 for (int i
= 0; i
< 2; ++i
)
3394 if (group
->load_align
[i
])
3397 = known_alignment (try_bitpos
3398 - group
->stores
[0]->bitpos
3399 + group
->stores
[0]->ops
[i
].bitpos
3400 - group
->load_align_base
[i
]);
3401 if (align_bitpos
& (group_load_align
- 1))
3403 unsigned HOST_WIDE_INT a
= least_bit_hwi (align_bitpos
);
3404 load_align
= MIN (load_align
, a
);
3407 try_size
= MIN (try_size
, load_align
);
3409 store_immediate_info
*info
3410 = find_constituent_stores (group
, NULL
, &first
, try_bitpos
, try_size
);
3413 /* If there is just one original statement for the range, see if
3414 we can just reuse the original store which could be even larger
3416 unsigned HOST_WIDE_INT stmt_end
3417 = ROUND_UP (info
->bitpos
+ info
->bitsize
, BITS_PER_UNIT
);
3418 info
= find_constituent_stores (group
, NULL
, &first
, try_bitpos
,
3419 stmt_end
- try_bitpos
);
3420 if (info
&& info
->bitpos
>= try_bitpos
)
3422 try_size
= stmt_end
- try_bitpos
;
3427 /* Approximate store bitsize for the case when there are no padding
3429 while (try_size
> size
)
3431 /* Now look for whole padding bytes at the end of that bitsize. */
3432 for (nonmasked
= try_size
/ BITS_PER_UNIT
; nonmasked
> 0; --nonmasked
)
3433 if (group
->mask
[try_pos
- bytepos
+ nonmasked
- 1]
3434 != (unsigned char) ~0U
3436 || group
->val
[try_pos
- bytepos
+ nonmasked
- 1] != 0))
3440 /* If entire try_size range is padding, skip it. */
3441 try_pos
+= try_size
/ BITS_PER_UNIT
;
3445 /* Otherwise try to decrease try_size if second half, last 3 quarters
3446 etc. are padding. */
3447 nonmasked
*= BITS_PER_UNIT
;
3448 while (nonmasked
<= try_size
/ 2)
3450 if (!allow_unaligned_store
&& group_align
> BITS_PER_UNIT
)
3452 /* Now look for whole padding bytes at the start of that bitsize. */
3453 unsigned int try_bytesize
= try_size
/ BITS_PER_UNIT
, masked
;
3454 for (masked
= 0; masked
< try_bytesize
; ++masked
)
3455 if (group
->mask
[try_pos
- bytepos
+ masked
] != (unsigned char) ~0U
3457 || group
->val
[try_pos
- bytepos
+ masked
] != 0))
3459 masked
*= BITS_PER_UNIT
;
3460 gcc_assert (masked
< try_size
);
3461 if (masked
>= try_size
/ 2)
3463 while (masked
>= try_size
/ 2)
3466 try_pos
+= try_size
/ BITS_PER_UNIT
;
3470 /* Need to recompute the alignment, so just retry at the new
3482 = new split_store (try_pos
, try_size
, align
);
3483 info
= find_constituent_stores (group
, &store
->orig_stores
,
3484 &first
, try_bitpos
, try_size
);
3486 && info
->bitpos
>= try_bitpos
3487 && info
->bitpos
+ info
->bitsize
<= try_bitpos
+ try_size
)
3492 split_stores
->safe_push (store
);
3495 try_pos
+= try_size
/ BITS_PER_UNIT
;
3503 /* If we are reusing some original stores and any of the
3504 original SSA_NAMEs had multiple uses, we need to subtract
3505 those now before we add the new ones. */
3506 if (total_new
[0] && any_orig
)
3508 FOR_EACH_VEC_ELT (*split_stores
, i
, store
)
3510 total_new
[0] -= count_multiple_uses (store
->orig_stores
[0]);
3512 total_new
[0] += ret
; /* The new store. */
3513 store_immediate_info
*info
= group
->stores
[0];
3514 if (info
->ops
[0].base_addr
)
3515 total_new
[0] += ret
;
3516 if (info
->ops
[1].base_addr
)
3517 total_new
[0] += ret
;
3518 switch (info
->rhs_code
)
3523 total_new
[0] += ret
; /* The new BIT_*_EXPR stmt. */
3528 FOR_EACH_VEC_ELT (*split_stores
, i
, store
)
3531 bool bit_not_p
[3] = { false, false, false };
3532 /* If all orig_stores have certain bit_not_p set, then
3533 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3534 If some orig_stores have certain bit_not_p set, then
3535 we'd use a BIT_XOR_EXPR with a mask and need to account for
3537 FOR_EACH_VEC_ELT (store
->orig_stores
, j
, info
)
3539 if (info
->ops
[0].bit_not_p
)
3540 bit_not_p
[0] = true;
3541 if (info
->ops
[1].bit_not_p
)
3542 bit_not_p
[1] = true;
3543 if (info
->bit_not_p
)
3544 bit_not_p
[2] = true;
3546 total_new
[0] += bit_not_p
[0] + bit_not_p
[1] + bit_not_p
[2];
3554 /* Return the operation through which the operand IDX (if < 2) or
3555 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3556 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3557 the bits should be xored with mask. */
3559 static enum tree_code
3560 invert_op (split_store
*split_store
, int idx
, tree int_type
, tree
&mask
)
3563 store_immediate_info
*info
;
3564 unsigned int cnt
= 0;
3565 bool any_paddings
= false;
3566 FOR_EACH_VEC_ELT (split_store
->orig_stores
, i
, info
)
3568 bool bit_not_p
= idx
< 2 ? info
->ops
[idx
].bit_not_p
: info
->bit_not_p
;
3572 tree lhs
= gimple_assign_lhs (info
->stmt
);
3573 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3574 && TYPE_PRECISION (TREE_TYPE (lhs
)) < info
->bitsize
)
3575 any_paddings
= true;
3581 if (cnt
== split_store
->orig_stores
.length () && !any_paddings
)
3582 return BIT_NOT_EXPR
;
3584 unsigned HOST_WIDE_INT try_bitpos
= split_store
->bytepos
* BITS_PER_UNIT
;
3585 unsigned buf_size
= split_store
->size
/ BITS_PER_UNIT
;
3587 = XALLOCAVEC (unsigned char, buf_size
);
3588 memset (buf
, ~0U, buf_size
);
3589 FOR_EACH_VEC_ELT (split_store
->orig_stores
, i
, info
)
3591 bool bit_not_p
= idx
< 2 ? info
->ops
[idx
].bit_not_p
: info
->bit_not_p
;
3594 /* Clear regions with bit_not_p and invert afterwards, rather than
3595 clear regions with !bit_not_p, so that gaps in between stores aren't
3597 unsigned HOST_WIDE_INT bitsize
= info
->bitsize
;
3598 unsigned HOST_WIDE_INT prec
= bitsize
;
3599 unsigned int pos_in_buffer
= 0;
3602 tree lhs
= gimple_assign_lhs (info
->stmt
);
3603 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3604 && TYPE_PRECISION (TREE_TYPE (lhs
)) < bitsize
)
3605 prec
= TYPE_PRECISION (TREE_TYPE (lhs
));
3607 if (info
->bitpos
< try_bitpos
)
3609 gcc_assert (info
->bitpos
+ bitsize
> try_bitpos
);
3610 if (!BYTES_BIG_ENDIAN
)
3612 if (prec
<= try_bitpos
- info
->bitpos
)
3614 prec
-= try_bitpos
- info
->bitpos
;
3616 bitsize
-= try_bitpos
- info
->bitpos
;
3617 if (BYTES_BIG_ENDIAN
&& prec
> bitsize
)
3621 pos_in_buffer
= info
->bitpos
- try_bitpos
;
3624 /* If this is a bool inversion, invert just the least significant
3625 prec bits rather than all bits of it. */
3626 if (BYTES_BIG_ENDIAN
)
3628 pos_in_buffer
+= bitsize
- prec
;
3629 if (pos_in_buffer
>= split_store
->size
)
3634 if (pos_in_buffer
+ bitsize
> split_store
->size
)
3635 bitsize
= split_store
->size
- pos_in_buffer
;
3636 unsigned char *p
= buf
+ (pos_in_buffer
/ BITS_PER_UNIT
);
3637 if (BYTES_BIG_ENDIAN
)
3638 clear_bit_region_be (p
, (BITS_PER_UNIT
- 1
3639 - (pos_in_buffer
% BITS_PER_UNIT
)), bitsize
);
3641 clear_bit_region (p
, pos_in_buffer
% BITS_PER_UNIT
, bitsize
);
3643 for (unsigned int i
= 0; i
< buf_size
; ++i
)
3645 mask
= native_interpret_expr (int_type
, buf
, buf_size
);
3646 return BIT_XOR_EXPR
;
3649 /* Given a merged store group GROUP output the widened version of it.
3650 The store chain is against the base object BASE.
3651 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
3652 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
3653 Make sure that the number of statements output is less than the number of
3654 original statements. If a better sequence is possible emit it and
3658 imm_store_chain_info::output_merged_store (merged_store_group
*group
)
3660 split_store
*split_store
;
3662 unsigned HOST_WIDE_INT start_byte_pos
3663 = group
->bitregion_start
/ BITS_PER_UNIT
;
3665 unsigned int orig_num_stmts
= group
->stores
.length ();
3666 if (orig_num_stmts
< 2)
3669 auto_vec
<class split_store
*, 32> split_stores
;
3670 bool allow_unaligned_store
3671 = !STRICT_ALIGNMENT
&& PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED
);
3672 bool allow_unaligned_load
= allow_unaligned_store
;
3673 bool bzero_first
= false;
3674 if (group
->stores
[0]->rhs_code
== INTEGER_CST
3675 && TREE_CODE (gimple_assign_rhs1 (group
->stores
[0]->stmt
)) == CONSTRUCTOR
3676 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (group
->stores
[0]->stmt
)) == 0
3677 && group
->start
== group
->stores
[0]->bitpos
3678 && group
->width
== group
->stores
[0]->bitsize
3679 && (group
->start
% BITS_PER_UNIT
) == 0
3680 && (group
->width
% BITS_PER_UNIT
) == 0)
3682 if (allow_unaligned_store
|| bzero_first
)
3684 /* If unaligned stores are allowed, see how many stores we'd emit
3685 for unaligned and how many stores we'd emit for aligned stores.
3686 Only use unaligned stores if it allows fewer stores than aligned.
3687 Similarly, if there is a whole region clear first, prefer expanding
3688 it together compared to expanding clear first followed by merged
3690 unsigned cnt
[4] = { ~0, ~0, ~0, ~0 };
3692 for (int pass
= 0; pass
< 4; ++pass
)
3694 if (!allow_unaligned_store
&& (pass
& 1) != 0)
3696 if (!bzero_first
&& (pass
& 2) != 0)
3698 cnt
[pass
] = split_group (group
, (pass
& 1) != 0,
3699 allow_unaligned_load
, (pass
& 2) != 0,
3701 if (cnt
[pass
] < cnt
[pass_min
])
3704 if ((pass_min
& 1) == 0)
3705 allow_unaligned_store
= false;
3706 if ((pass_min
& 2) == 0)
3707 bzero_first
= false;
3709 unsigned total_orig
, total_new
;
3710 split_group (group
, allow_unaligned_store
, allow_unaligned_load
, bzero_first
,
3711 &split_stores
, &total_orig
, &total_new
);
3713 if (split_stores
.length () >= orig_num_stmts
)
3715 /* We didn't manage to reduce the number of statements. Bail out. */
3716 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3717 fprintf (dump_file
, "Exceeded original number of stmts (%u)."
3718 " Not profitable to emit new sequence.\n",
3720 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
3724 if (total_orig
<= total_new
)
3726 /* If number of estimated new statements is above estimated original
3727 statements, bail out too. */
3728 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3729 fprintf (dump_file
, "Estimated number of original stmts (%u)"
3730 " not larger than estimated number of new"
3732 total_orig
, total_new
);
3733 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
3738 gimple_stmt_iterator last_gsi
= gsi_for_stmt (group
->last_stmt
);
3739 gimple_seq seq
= NULL
;
3740 tree last_vdef
, new_vuse
;
3741 last_vdef
= gimple_vdef (group
->last_stmt
);
3742 new_vuse
= gimple_vuse (group
->last_stmt
);
3743 tree bswap_res
= NULL_TREE
;
3745 if (group
->stores
[0]->rhs_code
== LROTATE_EXPR
3746 || group
->stores
[0]->rhs_code
== NOP_EXPR
)
3748 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
3749 gimple
*ins_stmt
= group
->stores
[0]->ins_stmt
;
3750 struct symbolic_number
*n
= &group
->stores
[0]->n
;
3751 bool bswap
= group
->stores
[0]->rhs_code
== LROTATE_EXPR
;
3756 load_type
= bswap_type
= uint16_type_node
;
3759 load_type
= uint32_type_node
;
3762 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
3763 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
3767 load_type
= uint64_type_node
;
3770 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
3771 bswap_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
3778 /* If the loads have each vuse of the corresponding store,
3779 we've checked the aliasing already in try_coalesce_bswap and
3780 we want to sink the need load into seq. So need to use new_vuse
3784 if (n
->vuse
== NULL
)
3790 /* Update vuse in case it has changed by output_merged_stores. */
3791 n
->vuse
= gimple_vuse (ins_stmt
);
3793 bswap_res
= bswap_replace (gsi_start (seq
), ins_stmt
, fndecl
,
3794 bswap_type
, load_type
, n
, bswap
);
3795 gcc_assert (bswap_res
);
3798 gimple
*stmt
= NULL
;
3799 auto_vec
<gimple
*, 32> orig_stmts
;
3800 gimple_seq this_seq
;
3801 tree addr
= force_gimple_operand_1 (unshare_expr (base_addr
), &this_seq
,
3802 is_gimple_mem_ref_addr
, NULL_TREE
);
3803 gimple_seq_add_seq_without_update (&seq
, this_seq
);
3805 tree load_addr
[2] = { NULL_TREE
, NULL_TREE
};
3806 gimple_seq load_seq
[2] = { NULL
, NULL
};
3807 gimple_stmt_iterator load_gsi
[2] = { gsi_none (), gsi_none () };
3808 for (int j
= 0; j
< 2; ++j
)
3810 store_operand_info
&op
= group
->stores
[0]->ops
[j
];
3811 if (op
.base_addr
== NULL_TREE
)
3814 store_immediate_info
*infol
= group
->stores
.last ();
3815 if (gimple_vuse (op
.stmt
) == gimple_vuse (infol
->ops
[j
].stmt
))
3817 /* We can't pick the location randomly; while we've verified
3818 all the loads have the same vuse, they can be still in different
3819 basic blocks and we need to pick the one from the last bb:
3825 otherwise if we put the wider load at the q[0] load, we might
3826 segfault if q[1] is not mapped. */
3827 basic_block bb
= gimple_bb (op
.stmt
);
3828 gimple
*ostmt
= op
.stmt
;
3829 store_immediate_info
*info
;
3830 FOR_EACH_VEC_ELT (group
->stores
, i
, info
)
3832 gimple
*tstmt
= info
->ops
[j
].stmt
;
3833 basic_block tbb
= gimple_bb (tstmt
);
3834 if (dominated_by_p (CDI_DOMINATORS
, tbb
, bb
))
3840 load_gsi
[j
] = gsi_for_stmt (ostmt
);
3842 = force_gimple_operand_1 (unshare_expr (op
.base_addr
),
3843 &load_seq
[j
], is_gimple_mem_ref_addr
,
3846 else if (operand_equal_p (base_addr
, op
.base_addr
, 0))
3847 load_addr
[j
] = addr
;
3851 = force_gimple_operand_1 (unshare_expr (op
.base_addr
),
3852 &this_seq
, is_gimple_mem_ref_addr
,
3854 gimple_seq_add_seq_without_update (&seq
, this_seq
);
3858 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
3860 unsigned HOST_WIDE_INT try_size
= split_store
->size
;
3861 unsigned HOST_WIDE_INT try_pos
= split_store
->bytepos
;
3862 unsigned HOST_WIDE_INT try_bitpos
= try_pos
* BITS_PER_UNIT
;
3863 unsigned HOST_WIDE_INT align
= split_store
->align
;
3866 if (split_store
->orig
)
3868 /* If there is just a single constituent store which covers
3869 the whole area, just reuse the lhs and rhs. */
3870 gimple
*orig_stmt
= split_store
->orig_stores
[0]->stmt
;
3871 dest
= gimple_assign_lhs (orig_stmt
);
3872 src
= gimple_assign_rhs1 (orig_stmt
);
3873 loc
= gimple_location (orig_stmt
);
3877 store_immediate_info
*info
;
3878 unsigned short clique
, base
;
3880 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
3881 orig_stmts
.safe_push (info
->stmt
);
3883 = get_alias_type_for_stmts (orig_stmts
, false, &clique
, &base
);
3884 loc
= get_location_for_stmts (orig_stmts
);
3885 orig_stmts
.truncate (0);
3887 tree int_type
= build_nonstandard_integer_type (try_size
, UNSIGNED
);
3888 int_type
= build_aligned_type (int_type
, align
);
3889 dest
= fold_build2 (MEM_REF
, int_type
, addr
,
3890 build_int_cst (offset_type
, try_pos
));
3891 if (TREE_CODE (dest
) == MEM_REF
)
3893 MR_DEPENDENCE_CLIQUE (dest
) = clique
;
3894 MR_DEPENDENCE_BASE (dest
) = base
;
3899 mask
= integer_zero_node
;
3901 mask
= native_interpret_expr (int_type
,
3902 group
->mask
+ try_pos
3908 j
< 1 + (split_store
->orig_stores
[0]->ops
[1].val
!= NULL_TREE
);
3911 store_operand_info
&op
= split_store
->orig_stores
[0]->ops
[j
];
3914 else if (op
.base_addr
)
3916 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
3917 orig_stmts
.safe_push (info
->ops
[j
].stmt
);
3919 offset_type
= get_alias_type_for_stmts (orig_stmts
, true,
3921 location_t load_loc
= get_location_for_stmts (orig_stmts
);
3922 orig_stmts
.truncate (0);
3924 unsigned HOST_WIDE_INT load_align
= group
->load_align
[j
];
3925 unsigned HOST_WIDE_INT align_bitpos
3926 = known_alignment (try_bitpos
3927 - split_store
->orig_stores
[0]->bitpos
3929 if (align_bitpos
& (load_align
- 1))
3930 load_align
= least_bit_hwi (align_bitpos
);
3933 = build_nonstandard_integer_type (try_size
, UNSIGNED
);
3935 = build_aligned_type (load_int_type
, load_align
);
3937 poly_uint64 load_pos
3938 = exact_div (try_bitpos
3939 - split_store
->orig_stores
[0]->bitpos
3942 ops
[j
] = fold_build2 (MEM_REF
, load_int_type
, load_addr
[j
],
3943 build_int_cst (offset_type
, load_pos
));
3944 if (TREE_CODE (ops
[j
]) == MEM_REF
)
3946 MR_DEPENDENCE_CLIQUE (ops
[j
]) = clique
;
3947 MR_DEPENDENCE_BASE (ops
[j
]) = base
;
3949 if (!integer_zerop (mask
))
3950 /* The load might load some bits (that will be masked off
3951 later on) uninitialized, avoid -W*uninitialized
3952 warnings in that case. */
3953 TREE_NO_WARNING (ops
[j
]) = 1;
3955 stmt
= gimple_build_assign (make_ssa_name (int_type
),
3957 gimple_set_location (stmt
, load_loc
);
3958 if (gsi_bb (load_gsi
[j
]))
3960 gimple_set_vuse (stmt
, gimple_vuse (op
.stmt
));
3961 gimple_seq_add_stmt_without_update (&load_seq
[j
], stmt
);
3965 gimple_set_vuse (stmt
, new_vuse
);
3966 gimple_seq_add_stmt_without_update (&seq
, stmt
);
3968 ops
[j
] = gimple_assign_lhs (stmt
);
3970 enum tree_code inv_op
3971 = invert_op (split_store
, j
, int_type
, xor_mask
);
3972 if (inv_op
!= NOP_EXPR
)
3974 stmt
= gimple_build_assign (make_ssa_name (int_type
),
3975 inv_op
, ops
[j
], xor_mask
);
3976 gimple_set_location (stmt
, load_loc
);
3977 ops
[j
] = gimple_assign_lhs (stmt
);
3979 if (gsi_bb (load_gsi
[j
]))
3980 gimple_seq_add_stmt_without_update (&load_seq
[j
],
3983 gimple_seq_add_stmt_without_update (&seq
, stmt
);
3987 ops
[j
] = native_interpret_expr (int_type
,
3988 group
->val
+ try_pos
3993 switch (split_store
->orig_stores
[0]->rhs_code
)
3998 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4000 tree rhs1
= gimple_assign_rhs1 (info
->stmt
);
4001 orig_stmts
.safe_push (SSA_NAME_DEF_STMT (rhs1
));
4004 bit_loc
= get_location_for_stmts (orig_stmts
);
4005 orig_stmts
.truncate (0);
4008 = gimple_build_assign (make_ssa_name (int_type
),
4009 split_store
->orig_stores
[0]->rhs_code
,
4011 gimple_set_location (stmt
, bit_loc
);
4012 /* If there is just one load and there is a separate
4013 load_seq[0], emit the bitwise op right after it. */
4014 if (load_addr
[1] == NULL_TREE
&& gsi_bb (load_gsi
[0]))
4015 gimple_seq_add_stmt_without_update (&load_seq
[0], stmt
);
4016 /* Otherwise, if at least one load is in seq, we need to
4017 emit the bitwise op right before the store. If there
4018 are two loads and are emitted somewhere else, it would
4019 be better to emit the bitwise op as early as possible;
4020 we don't track where that would be possible right now
4023 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4024 src
= gimple_assign_lhs (stmt
);
4026 enum tree_code inv_op
;
4027 inv_op
= invert_op (split_store
, 2, int_type
, xor_mask
);
4028 if (inv_op
!= NOP_EXPR
)
4030 stmt
= gimple_build_assign (make_ssa_name (int_type
),
4031 inv_op
, src
, xor_mask
);
4032 gimple_set_location (stmt
, bit_loc
);
4033 if (load_addr
[1] == NULL_TREE
&& gsi_bb (load_gsi
[0]))
4034 gimple_seq_add_stmt_without_update (&load_seq
[0], stmt
);
4036 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4037 src
= gimple_assign_lhs (stmt
);
4043 if (!is_gimple_val (src
))
4045 stmt
= gimple_build_assign (make_ssa_name (TREE_TYPE (src
)),
4047 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4048 src
= gimple_assign_lhs (stmt
);
4050 if (!useless_type_conversion_p (int_type
, TREE_TYPE (src
)))
4052 stmt
= gimple_build_assign (make_ssa_name (int_type
),
4054 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4055 src
= gimple_assign_lhs (stmt
);
4057 inv_op
= invert_op (split_store
, 2, int_type
, xor_mask
);
4058 if (inv_op
!= NOP_EXPR
)
4060 stmt
= gimple_build_assign (make_ssa_name (int_type
),
4061 inv_op
, src
, xor_mask
);
4062 gimple_set_location (stmt
, loc
);
4063 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4064 src
= gimple_assign_lhs (stmt
);
4072 /* If bit insertion is required, we use the source as an accumulator
4073 into which the successive bit-field values are manually inserted.
4074 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4075 if (group
->bit_insertion
)
4076 FOR_EACH_VEC_ELT (split_store
->orig_stores
, k
, info
)
4077 if (info
->rhs_code
== BIT_INSERT_EXPR
4078 && info
->bitpos
< try_bitpos
+ try_size
4079 && info
->bitpos
+ info
->bitsize
> try_bitpos
)
4081 /* Mask, truncate, convert to final type, shift and ior into
4082 the accumulator. Note that every step can be a no-op. */
4083 const HOST_WIDE_INT start_gap
= info
->bitpos
- try_bitpos
;
4084 const HOST_WIDE_INT end_gap
4085 = (try_bitpos
+ try_size
) - (info
->bitpos
+ info
->bitsize
);
4086 tree tem
= info
->ops
[0].val
;
4087 if (TYPE_PRECISION (TREE_TYPE (tem
)) <= info
->bitsize
)
4090 = build_nonstandard_integer_type (info
->bitsize
,
4092 tem
= gimple_convert (&seq
, loc
, bitfield_type
, tem
);
4094 else if ((BYTES_BIG_ENDIAN
? start_gap
: end_gap
) > 0)
4096 const unsigned HOST_WIDE_INT imask
4097 = (HOST_WIDE_INT_1U
<< info
->bitsize
) - 1;
4098 tem
= gimple_build (&seq
, loc
,
4099 BIT_AND_EXPR
, TREE_TYPE (tem
), tem
,
4100 build_int_cst (TREE_TYPE (tem
),
4103 const HOST_WIDE_INT shift
4104 = (BYTES_BIG_ENDIAN
? end_gap
: start_gap
);
4106 tem
= gimple_build (&seq
, loc
,
4107 RSHIFT_EXPR
, TREE_TYPE (tem
), tem
,
4108 build_int_cst (NULL_TREE
, -shift
));
4109 tem
= gimple_convert (&seq
, loc
, int_type
, tem
);
4111 tem
= gimple_build (&seq
, loc
,
4112 LSHIFT_EXPR
, int_type
, tem
,
4113 build_int_cst (NULL_TREE
, shift
));
4114 src
= gimple_build (&seq
, loc
,
4115 BIT_IOR_EXPR
, int_type
, tem
, src
);
4118 if (!integer_zerop (mask
))
4120 tree tem
= make_ssa_name (int_type
);
4121 tree load_src
= unshare_expr (dest
);
4122 /* The load might load some or all bits uninitialized,
4123 avoid -W*uninitialized warnings in that case.
4124 As optimization, it would be nice if all the bits are
4125 provably uninitialized (no stores at all yet or previous
4126 store a CLOBBER) we'd optimize away the load and replace
4128 TREE_NO_WARNING (load_src
) = 1;
4129 stmt
= gimple_build_assign (tem
, load_src
);
4130 gimple_set_location (stmt
, loc
);
4131 gimple_set_vuse (stmt
, new_vuse
);
4132 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4134 /* FIXME: If there is a single chunk of zero bits in mask,
4135 perhaps use BIT_INSERT_EXPR instead? */
4136 stmt
= gimple_build_assign (make_ssa_name (int_type
),
4137 BIT_AND_EXPR
, tem
, mask
);
4138 gimple_set_location (stmt
, loc
);
4139 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4140 tem
= gimple_assign_lhs (stmt
);
4142 if (TREE_CODE (src
) == INTEGER_CST
)
4143 src
= wide_int_to_tree (int_type
,
4144 wi::bit_and_not (wi::to_wide (src
),
4145 wi::to_wide (mask
)));
4149 = wide_int_to_tree (int_type
,
4150 wi::bit_not (wi::to_wide (mask
)));
4151 stmt
= gimple_build_assign (make_ssa_name (int_type
),
4152 BIT_AND_EXPR
, src
, nmask
);
4153 gimple_set_location (stmt
, loc
);
4154 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4155 src
= gimple_assign_lhs (stmt
);
4157 stmt
= gimple_build_assign (make_ssa_name (int_type
),
4158 BIT_IOR_EXPR
, tem
, src
);
4159 gimple_set_location (stmt
, loc
);
4160 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4161 src
= gimple_assign_lhs (stmt
);
4165 stmt
= gimple_build_assign (dest
, src
);
4166 gimple_set_location (stmt
, loc
);
4167 gimple_set_vuse (stmt
, new_vuse
);
4168 gimple_seq_add_stmt_without_update (&seq
, stmt
);
4170 if (group
->lp_nr
&& stmt_could_throw_p (cfun
, stmt
))
4171 add_stmt_to_eh_lp (stmt
, group
->lp_nr
);
4174 if (i
< split_stores
.length () - 1)
4175 new_vdef
= make_ssa_name (gimple_vop (cfun
), stmt
);
4177 new_vdef
= last_vdef
;
4179 gimple_set_vdef (stmt
, new_vdef
);
4180 SSA_NAME_DEF_STMT (new_vdef
) = stmt
;
4181 new_vuse
= new_vdef
;
4184 FOR_EACH_VEC_ELT (split_stores
, i
, split_store
)
4191 "New sequence of %u stores to replace old one of %u stores\n",
4192 split_stores
.length (), orig_num_stmts
);
4193 if (dump_flags
& TDF_DETAILS
)
4194 print_gimple_seq (dump_file
, seq
, 0, TDF_VOPS
| TDF_MEMSYMS
);
4197 if (group
->lp_nr
> 0)
4199 /* We're going to insert a sequence of (potentially) throwing stores
4200 into an active EH region. This means that we're going to create
4201 new basic blocks with EH edges pointing to the post landing pad
4202 and, therefore, to have to update its PHI nodes, if any. For the
4203 virtual PHI node, we're going to use the VDEFs created above, but
4204 for the other nodes, we need to record the original reaching defs. */
4205 eh_landing_pad lp
= get_eh_landing_pad_from_number (group
->lp_nr
);
4206 basic_block lp_bb
= label_to_block (cfun
, lp
->post_landing_pad
);
4207 basic_block last_bb
= gimple_bb (group
->last_stmt
);
4208 edge last_edge
= find_edge (last_bb
, lp_bb
);
4209 auto_vec
<tree
, 16> last_defs
;
4211 for (gpi
= gsi_start_phis (lp_bb
); !gsi_end_p (gpi
); gsi_next (&gpi
))
4213 gphi
*phi
= gpi
.phi ();
4215 if (virtual_operand_p (gimple_phi_result (phi
)))
4216 last_def
= NULL_TREE
;
4218 last_def
= gimple_phi_arg_def (phi
, last_edge
->dest_idx
);
4219 last_defs
.safe_push (last_def
);
4222 /* Do the insertion. Then, if new basic blocks have been created in the
4223 process, rewind the chain of VDEFs create above to walk the new basic
4224 blocks and update the corresponding arguments of the PHI nodes. */
4225 update_modified_stmts (seq
);
4226 if (gimple_find_sub_bbs (seq
, &last_gsi
))
4227 while (last_vdef
!= gimple_vuse (group
->last_stmt
))
4229 gimple
*stmt
= SSA_NAME_DEF_STMT (last_vdef
);
4230 if (stmt_could_throw_p (cfun
, stmt
))
4232 edge new_edge
= find_edge (gimple_bb (stmt
), lp_bb
);
4234 for (gpi
= gsi_start_phis (lp_bb
), i
= 0;
4236 gsi_next (&gpi
), i
++)
4238 gphi
*phi
= gpi
.phi ();
4240 if (virtual_operand_p (gimple_phi_result (phi
)))
4241 new_def
= last_vdef
;
4243 new_def
= last_defs
[i
];
4244 add_phi_arg (phi
, new_def
, new_edge
, UNKNOWN_LOCATION
);
4247 last_vdef
= gimple_vuse (stmt
);
4251 gsi_insert_seq_after (&last_gsi
, seq
, GSI_SAME_STMT
);
4253 for (int j
= 0; j
< 2; ++j
)
4255 gsi_insert_seq_after (&load_gsi
[j
], load_seq
[j
], GSI_SAME_STMT
);
4260 /* Process the merged_store_group objects created in the coalescing phase.
4261 The stores are all against the base object BASE.
4262 Try to output the widened stores and delete the original statements if
4263 successful. Return true iff any changes were made. */
4266 imm_store_chain_info::output_merged_stores ()
4269 merged_store_group
*merged_store
;
4271 FOR_EACH_VEC_ELT (m_merged_store_groups
, i
, merged_store
)
4273 if (dbg_cnt (store_merging
)
4274 && output_merged_store (merged_store
))
4277 store_immediate_info
*store
;
4278 FOR_EACH_VEC_ELT (merged_store
->stores
, j
, store
)
4280 gimple
*stmt
= store
->stmt
;
4281 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4282 gsi_remove (&gsi
, true);
4284 remove_stmt_from_eh_lp (stmt
);
4285 if (stmt
!= merged_store
->last_stmt
)
4287 unlink_stmt_vdef (stmt
);
4288 release_defs (stmt
);
4294 if (ret
&& dump_file
)
4295 fprintf (dump_file
, "Merging successful!\n");
4300 /* Coalesce the store_immediate_info objects recorded against the base object
4301 BASE in the first phase and output them.
4302 Delete the allocated structures.
4303 Return true if any changes were made. */
4306 imm_store_chain_info::terminate_and_process_chain ()
4308 /* Process store chain. */
4310 if (m_store_info
.length () > 1)
4312 ret
= coalesce_immediate_stores ();
4314 ret
= output_merged_stores ();
4317 /* Delete all the entries we allocated ourselves. */
4318 store_immediate_info
*info
;
4320 FOR_EACH_VEC_ELT (m_store_info
, i
, info
)
4323 merged_store_group
*merged_info
;
4324 FOR_EACH_VEC_ELT (m_merged_store_groups
, i
, merged_info
)
4330 /* Return true iff LHS is a destination potentially interesting for
4331 store merging. In practice these are the codes that get_inner_reference
4335 lhs_valid_for_store_merging_p (tree lhs
)
4340 switch (TREE_CODE (lhs
))
4343 case ARRAY_RANGE_REF
:
4355 /* Return true if the tree RHS is a constant we want to consider
4356 during store merging. In practice accept all codes that
4357 native_encode_expr accepts. */
4360 rhs_valid_for_store_merging_p (tree rhs
)
4362 unsigned HOST_WIDE_INT size
;
4363 if (TREE_CODE (rhs
) == CONSTRUCTOR
4364 && !TREE_CLOBBER_P (rhs
)
4365 && CONSTRUCTOR_NELTS (rhs
) == 0
4366 && TYPE_SIZE_UNIT (TREE_TYPE (rhs
))
4367 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs
))))
4369 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs
))).is_constant (&size
)
4370 && native_encode_expr (rhs
, NULL
, size
) != 0);
4373 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4374 and return true on success or false on failure. */
4377 adjust_bit_pos (poly_offset_int byte_off
,
4378 poly_int64
*pbitpos
,
4379 poly_uint64
*pbitregion_start
,
4380 poly_uint64
*pbitregion_end
)
4382 poly_offset_int bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4383 bit_off
+= *pbitpos
;
4385 if (known_ge (bit_off
, 0) && bit_off
.to_shwi (pbitpos
))
4387 if (maybe_ne (*pbitregion_end
, 0U))
4389 bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4390 bit_off
+= *pbitregion_start
;
4391 if (bit_off
.to_uhwi (pbitregion_start
))
4393 bit_off
= byte_off
<< LOG2_BITS_PER_UNIT
;
4394 bit_off
+= *pbitregion_end
;
4395 if (!bit_off
.to_uhwi (pbitregion_end
))
4396 *pbitregion_end
= 0;
4399 *pbitregion_end
= 0;
4407 /* If MEM is a memory reference usable for store merging (either as
4408 store destination or for loads), return the non-NULL base_addr
4409 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4410 Otherwise return NULL, *PBITPOS should be still valid even for that
4414 mem_valid_for_store_merging (tree mem
, poly_uint64
*pbitsize
,
4415 poly_uint64
*pbitpos
,
4416 poly_uint64
*pbitregion_start
,
4417 poly_uint64
*pbitregion_end
)
4419 poly_int64 bitsize
, bitpos
;
4420 poly_uint64 bitregion_start
= 0, bitregion_end
= 0;
4422 int unsignedp
= 0, reversep
= 0, volatilep
= 0;
4424 tree base_addr
= get_inner_reference (mem
, &bitsize
, &bitpos
, &offset
, &mode
,
4425 &unsignedp
, &reversep
, &volatilep
);
4426 *pbitsize
= bitsize
;
4427 if (known_eq (bitsize
, 0))
4430 if (TREE_CODE (mem
) == COMPONENT_REF
4431 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem
, 1)))
4433 get_bit_range (&bitregion_start
, &bitregion_end
, mem
, &bitpos
, &offset
);
4434 if (maybe_ne (bitregion_end
, 0U))
4441 /* We do not want to rewrite TARGET_MEM_REFs. */
4442 if (TREE_CODE (base_addr
) == TARGET_MEM_REF
)
4444 /* In some cases get_inner_reference may return a
4445 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4446 canonicalize the base_addr to MEM_REF [ptr] and take
4447 byteoffset into account in the bitpos. This occurs in
4448 PR 23684 and this way we can catch more chains. */
4449 else if (TREE_CODE (base_addr
) == MEM_REF
)
4451 if (!adjust_bit_pos (mem_ref_offset (base_addr
), &bitpos
,
4452 &bitregion_start
, &bitregion_end
))
4454 base_addr
= TREE_OPERAND (base_addr
, 0);
4456 /* get_inner_reference returns the base object, get at its
4460 if (maybe_lt (bitpos
, 0))
4462 base_addr
= build_fold_addr_expr (base_addr
);
4467 /* If the access is variable offset then a base decl has to be
4468 address-taken to be able to emit pointer-based stores to it.
4469 ??? We might be able to get away with re-using the original
4470 base up to the first variable part and then wrapping that inside
4472 tree base
= get_base_address (base_addr
);
4473 if (!base
|| (DECL_P (base
) && !TREE_ADDRESSABLE (base
)))
4476 /* Similarly to above for the base, remove constant from the offset. */
4477 if (TREE_CODE (offset
) == PLUS_EXPR
4478 && TREE_CODE (TREE_OPERAND (offset
, 1)) == INTEGER_CST
4479 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset
, 1)),
4480 &bitpos
, &bitregion_start
, &bitregion_end
))
4481 offset
= TREE_OPERAND (offset
, 0);
4483 base_addr
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base_addr
),
4487 if (known_eq (bitregion_end
, 0U))
4489 bitregion_start
= round_down_to_byte_boundary (bitpos
);
4490 bitregion_end
= round_up_to_byte_boundary (bitpos
+ bitsize
);
4493 *pbitsize
= bitsize
;
4495 *pbitregion_start
= bitregion_start
;
4496 *pbitregion_end
= bitregion_end
;
4500 /* Return true if STMT is a load that can be used for store merging.
4501 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
4502 BITREGION_END are properties of the corresponding store. */
4505 handled_load (gimple
*stmt
, store_operand_info
*op
,
4506 poly_uint64 bitsize
, poly_uint64 bitpos
,
4507 poly_uint64 bitregion_start
, poly_uint64 bitregion_end
)
4509 if (!is_gimple_assign (stmt
))
4511 if (gimple_assign_rhs_code (stmt
) == BIT_NOT_EXPR
)
4513 tree rhs1
= gimple_assign_rhs1 (stmt
);
4514 if (TREE_CODE (rhs1
) == SSA_NAME
4515 && handled_load (SSA_NAME_DEF_STMT (rhs1
), op
, bitsize
, bitpos
,
4516 bitregion_start
, bitregion_end
))
4518 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
4519 been optimized earlier, but if allowed here, would confuse the
4520 multiple uses counting. */
4523 op
->bit_not_p
= !op
->bit_not_p
;
4528 if (gimple_vuse (stmt
)
4529 && gimple_assign_load_p (stmt
)
4530 && !stmt_can_throw_internal (cfun
, stmt
)
4531 && !gimple_has_volatile_ops (stmt
))
4533 tree mem
= gimple_assign_rhs1 (stmt
);
4535 = mem_valid_for_store_merging (mem
, &op
->bitsize
, &op
->bitpos
,
4536 &op
->bitregion_start
,
4537 &op
->bitregion_end
);
4538 if (op
->base_addr
!= NULL_TREE
4539 && known_eq (op
->bitsize
, bitsize
)
4540 && multiple_p (op
->bitpos
- bitpos
, BITS_PER_UNIT
)
4541 && known_ge (op
->bitpos
- op
->bitregion_start
,
4542 bitpos
- bitregion_start
)
4543 && known_ge (op
->bitregion_end
- op
->bitpos
,
4544 bitregion_end
- bitpos
))
4548 op
->bit_not_p
= false;
4555 /* Return the index number of the landing pad for STMT, if any. */
4558 lp_nr_for_store (gimple
*stmt
)
4560 if (!cfun
->can_throw_non_call_exceptions
|| !cfun
->eh
)
4563 if (!stmt_could_throw_p (cfun
, stmt
))
4566 return lookup_stmt_eh_lp (stmt
);
4569 /* Record the store STMT for store merging optimization if it can be
4570 optimized. Return true if any changes were made. */
4573 pass_store_merging::process_store (gimple
*stmt
)
4575 tree lhs
= gimple_assign_lhs (stmt
);
4576 tree rhs
= gimple_assign_rhs1 (stmt
);
4577 poly_uint64 bitsize
, bitpos
;
4578 poly_uint64 bitregion_start
, bitregion_end
;
4580 = mem_valid_for_store_merging (lhs
, &bitsize
, &bitpos
,
4581 &bitregion_start
, &bitregion_end
);
4582 if (known_eq (bitsize
, 0U))
4585 bool invalid
= (base_addr
== NULL_TREE
4586 || (maybe_gt (bitsize
,
4587 (unsigned int) MAX_BITSIZE_MODE_ANY_INT
)
4588 && TREE_CODE (rhs
) != INTEGER_CST
4589 && (TREE_CODE (rhs
) != CONSTRUCTOR
4590 || CONSTRUCTOR_NELTS (rhs
) != 0)));
4591 enum tree_code rhs_code
= ERROR_MARK
;
4592 bool bit_not_p
= false;
4593 struct symbolic_number n
;
4594 gimple
*ins_stmt
= NULL
;
4595 store_operand_info ops
[2];
4598 else if (rhs_valid_for_store_merging_p (rhs
))
4600 rhs_code
= INTEGER_CST
;
4603 else if (TREE_CODE (rhs
) != SSA_NAME
)
4607 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
), *def_stmt1
, *def_stmt2
;
4608 if (!is_gimple_assign (def_stmt
))
4610 else if (handled_load (def_stmt
, &ops
[0], bitsize
, bitpos
,
4611 bitregion_start
, bitregion_end
))
4613 else if (gimple_assign_rhs_code (def_stmt
) == BIT_NOT_EXPR
)
4615 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
4616 if (TREE_CODE (rhs1
) == SSA_NAME
4617 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1
)))
4620 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
4624 if (rhs_code
== ERROR_MARK
&& !invalid
)
4625 switch ((rhs_code
= gimple_assign_rhs_code (def_stmt
)))
4631 rhs1
= gimple_assign_rhs1 (def_stmt
);
4632 rhs2
= gimple_assign_rhs2 (def_stmt
);
4634 if (TREE_CODE (rhs1
) != SSA_NAME
)
4636 def_stmt1
= SSA_NAME_DEF_STMT (rhs1
);
4637 if (!is_gimple_assign (def_stmt1
)
4638 || !handled_load (def_stmt1
, &ops
[0], bitsize
, bitpos
,
4639 bitregion_start
, bitregion_end
))
4641 if (rhs_valid_for_store_merging_p (rhs2
))
4643 else if (TREE_CODE (rhs2
) != SSA_NAME
)
4647 def_stmt2
= SSA_NAME_DEF_STMT (rhs2
);
4648 if (!is_gimple_assign (def_stmt2
))
4650 else if (!handled_load (def_stmt2
, &ops
[1], bitsize
, bitpos
,
4651 bitregion_start
, bitregion_end
))
4661 unsigned HOST_WIDE_INT const_bitsize
;
4662 if (bitsize
.is_constant (&const_bitsize
)
4663 && (const_bitsize
% BITS_PER_UNIT
) == 0
4664 && const_bitsize
<= 64
4665 && multiple_p (bitpos
, BITS_PER_UNIT
))
4667 ins_stmt
= find_bswap_or_nop_1 (def_stmt
, &n
, 12);
4671 for (unsigned HOST_WIDE_INT i
= 0;
4673 i
+= BITS_PER_UNIT
, nn
>>= BITS_PER_MARKER
)
4674 if ((nn
& MARKER_MASK
) == 0
4675 || (nn
& MARKER_MASK
) == MARKER_BYTE_UNKNOWN
)
4684 rhs_code
= LROTATE_EXPR
;
4685 ops
[0].base_addr
= NULL_TREE
;
4686 ops
[1].base_addr
= NULL_TREE
;
4694 && bitsize
.is_constant (&const_bitsize
)
4695 && ((const_bitsize
% BITS_PER_UNIT
) != 0
4696 || !multiple_p (bitpos
, BITS_PER_UNIT
))
4697 && const_bitsize
<= 64)
4699 /* Bypass a conversion to the bit-field type. */
4701 && is_gimple_assign (def_stmt
)
4702 && CONVERT_EXPR_CODE_P (rhs_code
))
4704 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
4705 if (TREE_CODE (rhs1
) == SSA_NAME
4706 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
)))
4709 rhs_code
= BIT_INSERT_EXPR
;
4712 ops
[0].base_addr
= NULL_TREE
;
4713 ops
[1].base_addr
= NULL_TREE
;
4718 unsigned HOST_WIDE_INT const_bitsize
, const_bitpos
;
4719 unsigned HOST_WIDE_INT const_bitregion_start
, const_bitregion_end
;
4721 || !bitsize
.is_constant (&const_bitsize
)
4722 || !bitpos
.is_constant (&const_bitpos
)
4723 || !bitregion_start
.is_constant (&const_bitregion_start
)
4724 || !bitregion_end
.is_constant (&const_bitregion_end
))
4725 return terminate_all_aliasing_chains (NULL
, stmt
);
4728 memset (&n
, 0, sizeof (n
));
4730 class imm_store_chain_info
**chain_info
= NULL
;
4733 chain_info
= m_stores
.get (base_addr
);
4735 store_immediate_info
*info
;
4738 unsigned int ord
= (*chain_info
)->m_store_info
.length ();
4739 info
= new store_immediate_info (const_bitsize
, const_bitpos
,
4740 const_bitregion_start
,
4741 const_bitregion_end
,
4742 stmt
, ord
, rhs_code
, n
, ins_stmt
,
4743 bit_not_p
, lp_nr_for_store (stmt
),
4745 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4747 fprintf (dump_file
, "Recording immediate store from stmt:\n");
4748 print_gimple_stmt (dump_file
, stmt
, 0);
4750 (*chain_info
)->m_store_info
.safe_push (info
);
4751 ret
|= terminate_all_aliasing_chains (chain_info
, stmt
);
4752 /* If we reach the limit of stores to merge in a chain terminate and
4753 process the chain now. */
4754 if ((*chain_info
)->m_store_info
.length ()
4755 == (unsigned int) PARAM_VALUE (PARAM_MAX_STORES_TO_MERGE
))
4757 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4759 "Reached maximum number of statements to merge:\n");
4760 ret
|= terminate_and_process_chain (*chain_info
);
4765 /* Store aliases any existing chain? */
4766 ret
|= terminate_all_aliasing_chains (NULL
, stmt
);
4767 /* Start a new chain. */
4768 class imm_store_chain_info
*new_chain
4769 = new imm_store_chain_info (m_stores_head
, base_addr
);
4770 info
= new store_immediate_info (const_bitsize
, const_bitpos
,
4771 const_bitregion_start
,
4772 const_bitregion_end
,
4773 stmt
, 0, rhs_code
, n
, ins_stmt
,
4774 bit_not_p
, lp_nr_for_store (stmt
),
4776 new_chain
->m_store_info
.safe_push (info
);
4777 m_stores
.put (base_addr
, new_chain
);
4778 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4780 fprintf (dump_file
, "Starting new chain with statement:\n");
4781 print_gimple_stmt (dump_file
, stmt
, 0);
4782 fprintf (dump_file
, "The base object is:\n");
4783 print_generic_expr (dump_file
, base_addr
);
4784 fprintf (dump_file
, "\n");
4789 /* Return true if STMT is a store valid for store merging. */
4792 store_valid_for_store_merging_p (gimple
*stmt
)
4794 return gimple_assign_single_p (stmt
)
4795 && gimple_vdef (stmt
)
4796 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt
))
4797 && !gimple_has_volatile_ops (stmt
);
4800 enum basic_block_status
{ BB_INVALID
, BB_VALID
, BB_EXTENDED_VALID
};
4802 /* Return the status of basic block BB wrt store merging. */
4804 static enum basic_block_status
4805 get_status_for_store_merging (basic_block bb
)
4807 unsigned int num_statements
= 0;
4808 gimple_stmt_iterator gsi
;
4811 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4813 gimple
*stmt
= gsi_stmt (gsi
);
4815 if (is_gimple_debug (stmt
))
4818 if (store_valid_for_store_merging_p (stmt
) && ++num_statements
>= 2)
4822 if (num_statements
== 0)
4825 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
4826 && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb
)))
4827 && (e
= find_fallthru_edge (bb
->succs
))
4828 && e
->dest
== bb
->next_bb
)
4829 return BB_EXTENDED_VALID
;
4831 return num_statements
>= 2 ? BB_VALID
: BB_INVALID
;
4834 /* Entry point for the pass. Go over each basic block recording chains of
4835 immediate stores. Upon encountering a terminating statement (as defined
4836 by stmt_terminates_chain_p) process the recorded stores and emit the widened
4840 pass_store_merging::execute (function
*fun
)
4843 hash_set
<gimple
*> orig_stmts
;
4844 bool changed
= false, open_chains
= false;
4846 /* If the function can throw and catch non-call exceptions, we'll be trying
4847 to merge stores across different basic blocks so we need to first unsplit
4848 the EH edges in order to streamline the CFG of the function. */
4849 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
)
4850 unsplit_eh_edges ();
4852 calculate_dominance_info (CDI_DOMINATORS
);
4854 FOR_EACH_BB_FN (bb
, fun
)
4856 const basic_block_status bb_status
= get_status_for_store_merging (bb
);
4857 gimple_stmt_iterator gsi
;
4859 if (open_chains
&& (bb_status
== BB_INVALID
|| !single_pred_p (bb
)))
4861 changed
|= terminate_and_process_all_chains ();
4862 open_chains
= false;
4865 if (bb_status
== BB_INVALID
)
4868 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4869 fprintf (dump_file
, "Processing basic block <%d>:\n", bb
->index
);
4871 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4873 gimple
*stmt
= gsi_stmt (gsi
);
4875 if (is_gimple_debug (stmt
))
4878 if (gimple_has_volatile_ops (stmt
))
4880 /* Terminate all chains. */
4881 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4882 fprintf (dump_file
, "Volatile access terminates "
4884 changed
|= terminate_and_process_all_chains ();
4885 open_chains
= false;
4889 if (store_valid_for_store_merging_p (stmt
))
4890 changed
|= process_store (stmt
);
4892 changed
|= terminate_all_aliasing_chains (NULL
, stmt
);
4895 if (bb_status
== BB_EXTENDED_VALID
)
4899 changed
|= terminate_and_process_all_chains ();
4900 open_chains
= false;
4905 changed
|= terminate_and_process_all_chains ();
4907 /* If the function can throw and catch non-call exceptions and something
4908 changed during the pass, then the CFG has (very likely) changed too. */
4909 if (cfun
->can_throw_non_call_exceptions
&& cfun
->eh
&& changed
)
4911 free_dominance_info (CDI_DOMINATORS
);
4912 return TODO_cleanup_cfg
;
4920 /* Construct and return a store merging pass object. */
4923 make_pass_store_merging (gcc::context
*ctxt
)
4925 return new pass_store_merging (ctxt
);
4930 namespace selftest
{
4932 /* Selftests for store merging helpers. */
4934 /* Assert that all elements of the byte arrays X and Y, both of length N
4938 verify_array_eq (unsigned char *x
, unsigned char *y
, unsigned int n
)
4940 for (unsigned int i
= 0; i
< n
; i
++)
4944 fprintf (stderr
, "Arrays do not match. X:\n");
4945 dump_char_array (stderr
, x
, n
);
4946 fprintf (stderr
, "Y:\n");
4947 dump_char_array (stderr
, y
, n
);
4949 ASSERT_EQ (x
[i
], y
[i
]);
4953 /* Test shift_bytes_in_array and that it carries bits across between
4957 verify_shift_bytes_in_array (void)
4960 00011111 | 11100000. */
4961 unsigned char orig
[2] = { 0xe0, 0x1f };
4962 unsigned char in
[2];
4963 memcpy (in
, orig
, sizeof orig
);
4965 unsigned char expected
[2] = { 0x80, 0x7f };
4966 shift_bytes_in_array (in
, sizeof (in
), 2);
4967 verify_array_eq (in
, expected
, sizeof (in
));
4969 memcpy (in
, orig
, sizeof orig
);
4970 memcpy (expected
, orig
, sizeof orig
);
4971 /* Check that shifting by zero doesn't change anything. */
4972 shift_bytes_in_array (in
, sizeof (in
), 0);
4973 verify_array_eq (in
, expected
, sizeof (in
));
4977 /* Test shift_bytes_in_array_right and that it carries bits across between
4981 verify_shift_bytes_in_array_right (void)
4984 00011111 | 11100000. */
4985 unsigned char orig
[2] = { 0x1f, 0xe0};
4986 unsigned char in
[2];
4987 memcpy (in
, orig
, sizeof orig
);
4988 unsigned char expected
[2] = { 0x07, 0xf8};
4989 shift_bytes_in_array_right (in
, sizeof (in
), 2);
4990 verify_array_eq (in
, expected
, sizeof (in
));
4992 memcpy (in
, orig
, sizeof orig
);
4993 memcpy (expected
, orig
, sizeof orig
);
4994 /* Check that shifting by zero doesn't change anything. */
4995 shift_bytes_in_array_right (in
, sizeof (in
), 0);
4996 verify_array_eq (in
, expected
, sizeof (in
));
4999 /* Test clear_bit_region that it clears exactly the bits asked and
5003 verify_clear_bit_region (void)
5005 /* Start with all bits set and test clearing various patterns in them. */
5006 unsigned char orig
[3] = { 0xff, 0xff, 0xff};
5007 unsigned char in
[3];
5008 unsigned char expected
[3];
5009 memcpy (in
, orig
, sizeof in
);
5011 /* Check zeroing out all the bits. */
5012 clear_bit_region (in
, 0, 3 * BITS_PER_UNIT
);
5013 expected
[0] = expected
[1] = expected
[2] = 0;
5014 verify_array_eq (in
, expected
, sizeof in
);
5016 memcpy (in
, orig
, sizeof in
);
5017 /* Leave the first and last bits intact. */
5018 clear_bit_region (in
, 1, 3 * BITS_PER_UNIT
- 2);
5022 verify_array_eq (in
, expected
, sizeof in
);
5025 /* Test verify_clear_bit_region_be that it clears exactly the bits asked and
5029 verify_clear_bit_region_be (void)
5031 /* Start with all bits set and test clearing various patterns in them. */
5032 unsigned char orig
[3] = { 0xff, 0xff, 0xff};
5033 unsigned char in
[3];
5034 unsigned char expected
[3];
5035 memcpy (in
, orig
, sizeof in
);
5037 /* Check zeroing out all the bits. */
5038 clear_bit_region_be (in
, BITS_PER_UNIT
- 1, 3 * BITS_PER_UNIT
);
5039 expected
[0] = expected
[1] = expected
[2] = 0;
5040 verify_array_eq (in
, expected
, sizeof in
);
5042 memcpy (in
, orig
, sizeof in
);
5043 /* Leave the first and last bits intact. */
5044 clear_bit_region_be (in
, BITS_PER_UNIT
- 2, 3 * BITS_PER_UNIT
- 2);
5048 verify_array_eq (in
, expected
, sizeof in
);
5052 /* Run all of the selftests within this file. */
5055 store_merging_c_tests (void)
5057 verify_shift_bytes_in_array ();
5058 verify_shift_bytes_in_array_right ();
5059 verify_clear_bit_region ();
5060 verify_clear_bit_region_be ();
5063 } // namespace selftest
5064 #endif /* CHECKING_P. */