2020-01-10 Richard Biener <rguenther@suse.de>
[gcc.git] / gcc / gimple-ssa-store-merging.c
1 /* GIMPLE store merging and byte swapping passes.
2 Copyright (C) 2009-2020 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
24
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
27 [p ] := imm1;
28 [p + 1B] := imm2;
29 [p + 2B] := imm3;
30 [p + 3B] := imm4;
31 we can transform this into a single 4-byte store if the target supports it:
32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
33
34 Or:
35 [p ] := [q ];
36 [p + 1B] := [q + 1B];
37 [p + 2B] := [q + 2B];
38 [p + 3B] := [q + 3B];
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
41
42 Or:
43 [p ] := [q ] ^ imm1;
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49
50 Or:
51 [p:1 ] := imm;
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55
56 The algorithm is applied to each basic block in three phases:
57
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
64 terminate such chains when appropriate (for example when when the stored
65 values get used subsequently).
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
74
75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76 store_immediate_info objects) and coalesce contiguous stores into
77 merged_store_group objects. For bit-field stores, we don't need to
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
83
84 For example, given the stores:
85 [p ] := 0;
86 [p + 1B] := 1;
87 [p + 3B] := 0;
88 [p + 4B] := 1;
89 [p + 5B] := 0;
90 [p + 6B] := 0;
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
94
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
103
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106 [p ] := 0x1234;
107 [p + 2B] := 0x5678;
108 [p + 4B] := 0xab;
109 [p + 5B] := 0xcd;
110
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
112 p |LE|BE|
113 ---------
114 0 |34|12|
115 1 |12|34|
116 2 |78|56|
117 3 |56|78|
118 4 |ab|ab|
119 5 |cd|cd|
120
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
125
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
129
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
136
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
140
141 #include "config.h"
142 #include "system.h"
143 #include "coretypes.h"
144 #include "backend.h"
145 #include "tree.h"
146 #include "gimple.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
150 #include "ssa.h"
151 #include "gimple-pretty-print.h"
152 #include "alias.h"
153 #include "fold-const.h"
154 #include "print-tree.h"
155 #include "tree-hash-traits.h"
156 #include "gimple-iterator.h"
157 #include "gimplify.h"
158 #include "gimple-fold.h"
159 #include "stor-layout.h"
160 #include "timevar.h"
161 #include "cfganal.h"
162 #include "cfgcleanup.h"
163 #include "tree-cfg.h"
164 #include "except.h"
165 #include "tree-eh.h"
166 #include "target.h"
167 #include "gimplify-me.h"
168 #include "rtl.h"
169 #include "expr.h" /* For get_bit_range. */
170 #include "optabs-tree.h"
171 #include "dbgcnt.h"
172 #include "selftest.h"
173
174 /* The maximum size (in bits) of the stores this pass should generate. */
175 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
177
178 /* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180 #define MAX_STORE_ALIAS_CHECKS 64
181
182 namespace {
183
184 struct bswap_stat
185 {
186 /* Number of hand-written 16-bit nop / bswaps found. */
187 int found_16bit;
188
189 /* Number of hand-written 32-bit nop / bswaps found. */
190 int found_32bit;
191
192 /* Number of hand-written 64-bit nop / bswaps found. */
193 int found_64bit;
194 } nop_stats, bswap_stats;
195
196 /* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
200
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
204
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
214
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219 time a range of 1.
220
221 Note 2: for non-memory sources, range holds the same value as size.
222
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
224
225 struct symbolic_number {
226 uint64_t n;
227 tree type;
228 tree base_addr;
229 tree offset;
230 poly_int64_pod bytepos;
231 tree src;
232 tree alias_set;
233 tree vuse;
234 unsigned HOST_WIDE_INT range;
235 int n_ops;
236 };
237
238 #define BITS_PER_MARKER 8
239 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 #define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
243
244 /* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
249
250 /* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
255
256 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
259
260 inline bool
261 do_shift_rotate (enum tree_code code,
262 struct symbolic_number *n,
263 int count)
264 {
265 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266 unsigned head_marker;
267
268 if (count < 0
269 || count >= TYPE_PRECISION (n->type)
270 || count % BITS_PER_UNIT != 0)
271 return false;
272 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
273
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size < 64 / BITS_PER_MARKER)
277 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
278
279 switch (code)
280 {
281 case LSHIFT_EXPR:
282 n->n <<= count;
283 break;
284 case RSHIFT_EXPR:
285 head_marker = HEAD_MARKER (n->n, size);
286 n->n >>= count;
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n->type) && head_marker)
289 for (i = 0; i < count / BITS_PER_MARKER; i++)
290 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size - 1 - i) * BITS_PER_MARKER);
292 break;
293 case LROTATE_EXPR:
294 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295 break;
296 case RROTATE_EXPR:
297 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298 break;
299 default:
300 return false;
301 }
302 /* Zero unused bits for size. */
303 if (size < 64 / BITS_PER_MARKER)
304 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305 return true;
306 }
307
308 /* Perform sanity checking for the symbolic number N and the gimple
309 statement STMT. */
310
311 inline bool
312 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
313 {
314 tree lhs_type;
315
316 lhs_type = gimple_expr_type (stmt);
317
318 if (TREE_CODE (lhs_type) != INTEGER_TYPE)
319 return false;
320
321 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
322 return false;
323
324 return true;
325 }
326
327 /* Initialize the symbolic number N for the bswap pass from the base element
328 SRC manipulated by the bitwise OR expression. */
329
330 bool
331 init_symbolic_number (struct symbolic_number *n, tree src)
332 {
333 int size;
334
335 if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
336 return false;
337
338 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
339 n->src = src;
340
341 /* Set up the symbolic number N by setting each byte to a value between 1 and
342 the byte size of rhs1. The highest order byte is set to n->size and the
343 lowest order byte to 1. */
344 n->type = TREE_TYPE (src);
345 size = TYPE_PRECISION (n->type);
346 if (size % BITS_PER_UNIT != 0)
347 return false;
348 size /= BITS_PER_UNIT;
349 if (size > 64 / BITS_PER_MARKER)
350 return false;
351 n->range = size;
352 n->n = CMPNOP;
353 n->n_ops = 1;
354
355 if (size < 64 / BITS_PER_MARKER)
356 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
357
358 return true;
359 }
360
361 /* Check if STMT might be a byte swap or a nop from a memory source and returns
362 the answer. If so, REF is that memory source and the base of the memory area
363 accessed and the offset of the access from that base are recorded in N. */
364
365 bool
366 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
367 {
368 /* Leaf node is an array or component ref. Memorize its base and
369 offset from base to compare to other such leaf node. */
370 poly_int64 bitsize, bitpos, bytepos;
371 machine_mode mode;
372 int unsignedp, reversep, volatilep;
373 tree offset, base_addr;
374
375 /* Not prepared to handle PDP endian. */
376 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
377 return false;
378
379 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
380 return false;
381
382 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
383 &unsignedp, &reversep, &volatilep);
384
385 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
386 /* Do not rewrite TARGET_MEM_REF. */
387 return false;
388 else if (TREE_CODE (base_addr) == MEM_REF)
389 {
390 poly_offset_int bit_offset = 0;
391 tree off = TREE_OPERAND (base_addr, 1);
392
393 if (!integer_zerop (off))
394 {
395 poly_offset_int boff = mem_ref_offset (base_addr);
396 boff <<= LOG2_BITS_PER_UNIT;
397 bit_offset += boff;
398 }
399
400 base_addr = TREE_OPERAND (base_addr, 0);
401
402 /* Avoid returning a negative bitpos as this may wreak havoc later. */
403 if (maybe_lt (bit_offset, 0))
404 {
405 tree byte_offset = wide_int_to_tree
406 (sizetype, bits_to_bytes_round_down (bit_offset));
407 bit_offset = num_trailing_bits (bit_offset);
408 if (offset)
409 offset = size_binop (PLUS_EXPR, offset, byte_offset);
410 else
411 offset = byte_offset;
412 }
413
414 bitpos += bit_offset.force_shwi ();
415 }
416 else
417 base_addr = build_fold_addr_expr (base_addr);
418
419 if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
420 return false;
421 if (!multiple_p (bitsize, BITS_PER_UNIT))
422 return false;
423 if (reversep)
424 return false;
425
426 if (!init_symbolic_number (n, ref))
427 return false;
428 n->base_addr = base_addr;
429 n->offset = offset;
430 n->bytepos = bytepos;
431 n->alias_set = reference_alias_ptr_type (ref);
432 n->vuse = gimple_vuse (stmt);
433 return true;
434 }
435
436 /* Compute the symbolic number N representing the result of a bitwise OR on 2
437 symbolic number N1 and N2 whose source statements are respectively
438 SOURCE_STMT1 and SOURCE_STMT2. */
439
440 gimple *
441 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
442 gimple *source_stmt2, struct symbolic_number *n2,
443 struct symbolic_number *n)
444 {
445 int i, size;
446 uint64_t mask;
447 gimple *source_stmt;
448 struct symbolic_number *n_start;
449
450 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
451 if (TREE_CODE (rhs1) == BIT_FIELD_REF
452 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
453 rhs1 = TREE_OPERAND (rhs1, 0);
454 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
455 if (TREE_CODE (rhs2) == BIT_FIELD_REF
456 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
457 rhs2 = TREE_OPERAND (rhs2, 0);
458
459 /* Sources are different, cancel bswap if they are not memory location with
460 the same base (array, structure, ...). */
461 if (rhs1 != rhs2)
462 {
463 uint64_t inc;
464 HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
465 struct symbolic_number *toinc_n_ptr, *n_end;
466 basic_block bb1, bb2;
467
468 if (!n1->base_addr || !n2->base_addr
469 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
470 return NULL;
471
472 if (!n1->offset != !n2->offset
473 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
474 return NULL;
475
476 start1 = 0;
477 if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
478 return NULL;
479
480 if (start1 < start2)
481 {
482 n_start = n1;
483 start_sub = start2 - start1;
484 }
485 else
486 {
487 n_start = n2;
488 start_sub = start1 - start2;
489 }
490
491 bb1 = gimple_bb (source_stmt1);
492 bb2 = gimple_bb (source_stmt2);
493 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
494 source_stmt = source_stmt1;
495 else
496 source_stmt = source_stmt2;
497
498 /* Find the highest address at which a load is performed and
499 compute related info. */
500 end1 = start1 + (n1->range - 1);
501 end2 = start2 + (n2->range - 1);
502 if (end1 < end2)
503 {
504 end = end2;
505 end_sub = end2 - end1;
506 }
507 else
508 {
509 end = end1;
510 end_sub = end1 - end2;
511 }
512 n_end = (end2 > end1) ? n2 : n1;
513
514 /* Find symbolic number whose lsb is the most significant. */
515 if (BYTES_BIG_ENDIAN)
516 toinc_n_ptr = (n_end == n1) ? n2 : n1;
517 else
518 toinc_n_ptr = (n_start == n1) ? n2 : n1;
519
520 n->range = end - MIN (start1, start2) + 1;
521
522 /* Check that the range of memory covered can be represented by
523 a symbolic number. */
524 if (n->range > 64 / BITS_PER_MARKER)
525 return NULL;
526
527 /* Reinterpret byte marks in symbolic number holding the value of
528 bigger weight according to target endianness. */
529 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
530 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
531 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
532 {
533 unsigned marker
534 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
535 if (marker && marker != MARKER_BYTE_UNKNOWN)
536 toinc_n_ptr->n += inc;
537 }
538 }
539 else
540 {
541 n->range = n1->range;
542 n_start = n1;
543 source_stmt = source_stmt1;
544 }
545
546 if (!n1->alias_set
547 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
548 n->alias_set = n1->alias_set;
549 else
550 n->alias_set = ptr_type_node;
551 n->vuse = n_start->vuse;
552 n->base_addr = n_start->base_addr;
553 n->offset = n_start->offset;
554 n->src = n_start->src;
555 n->bytepos = n_start->bytepos;
556 n->type = n_start->type;
557 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
558
559 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
560 {
561 uint64_t masked1, masked2;
562
563 masked1 = n1->n & mask;
564 masked2 = n2->n & mask;
565 if (masked1 && masked2 && masked1 != masked2)
566 return NULL;
567 }
568 n->n = n1->n | n2->n;
569 n->n_ops = n1->n_ops + n2->n_ops;
570
571 return source_stmt;
572 }
573
574 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
575 the operation given by the rhs of STMT on the result. If the operation
576 could successfully be executed the function returns a gimple stmt whose
577 rhs's first tree is the expression of the source operand and NULL
578 otherwise. */
579
580 gimple *
581 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
582 {
583 enum tree_code code;
584 tree rhs1, rhs2 = NULL;
585 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
586 enum gimple_rhs_class rhs_class;
587
588 if (!limit || !is_gimple_assign (stmt))
589 return NULL;
590
591 rhs1 = gimple_assign_rhs1 (stmt);
592
593 if (find_bswap_or_nop_load (stmt, rhs1, n))
594 return stmt;
595
596 /* Handle BIT_FIELD_REF. */
597 if (TREE_CODE (rhs1) == BIT_FIELD_REF
598 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
599 {
600 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
601 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
602 if (bitpos % BITS_PER_UNIT == 0
603 && bitsize % BITS_PER_UNIT == 0
604 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
605 {
606 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
607 if (BYTES_BIG_ENDIAN)
608 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
609
610 /* Shift. */
611 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
612 return NULL;
613
614 /* Mask. */
615 uint64_t mask = 0;
616 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
617 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
618 i++, tmp <<= BITS_PER_UNIT)
619 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
620 n->n &= mask;
621
622 /* Convert. */
623 n->type = TREE_TYPE (rhs1);
624 if (!n->base_addr)
625 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
626
627 return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
628 }
629
630 return NULL;
631 }
632
633 if (TREE_CODE (rhs1) != SSA_NAME)
634 return NULL;
635
636 code = gimple_assign_rhs_code (stmt);
637 rhs_class = gimple_assign_rhs_class (stmt);
638 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
639
640 if (rhs_class == GIMPLE_BINARY_RHS)
641 rhs2 = gimple_assign_rhs2 (stmt);
642
643 /* Handle unary rhs and binary rhs with integer constants as second
644 operand. */
645
646 if (rhs_class == GIMPLE_UNARY_RHS
647 || (rhs_class == GIMPLE_BINARY_RHS
648 && TREE_CODE (rhs2) == INTEGER_CST))
649 {
650 if (code != BIT_AND_EXPR
651 && code != LSHIFT_EXPR
652 && code != RSHIFT_EXPR
653 && code != LROTATE_EXPR
654 && code != RROTATE_EXPR
655 && !CONVERT_EXPR_CODE_P (code))
656 return NULL;
657
658 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
659
660 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
661 we have to initialize the symbolic number. */
662 if (!source_stmt1)
663 {
664 if (gimple_assign_load_p (stmt)
665 || !init_symbolic_number (n, rhs1))
666 return NULL;
667 source_stmt1 = stmt;
668 }
669
670 switch (code)
671 {
672 case BIT_AND_EXPR:
673 {
674 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
675 uint64_t val = int_cst_value (rhs2), mask = 0;
676 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
677
678 /* Only constants masking full bytes are allowed. */
679 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
680 if ((val & tmp) != 0 && (val & tmp) != tmp)
681 return NULL;
682 else if (val & tmp)
683 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
684
685 n->n &= mask;
686 }
687 break;
688 case LSHIFT_EXPR:
689 case RSHIFT_EXPR:
690 case LROTATE_EXPR:
691 case RROTATE_EXPR:
692 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
693 return NULL;
694 break;
695 CASE_CONVERT:
696 {
697 int i, type_size, old_type_size;
698 tree type;
699
700 type = gimple_expr_type (stmt);
701 type_size = TYPE_PRECISION (type);
702 if (type_size % BITS_PER_UNIT != 0)
703 return NULL;
704 type_size /= BITS_PER_UNIT;
705 if (type_size > 64 / BITS_PER_MARKER)
706 return NULL;
707
708 /* Sign extension: result is dependent on the value. */
709 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
710 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
711 && HEAD_MARKER (n->n, old_type_size))
712 for (i = 0; i < type_size - old_type_size; i++)
713 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
714 << ((type_size - 1 - i) * BITS_PER_MARKER);
715
716 if (type_size < 64 / BITS_PER_MARKER)
717 {
718 /* If STMT casts to a smaller type mask out the bits not
719 belonging to the target type. */
720 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
721 }
722 n->type = type;
723 if (!n->base_addr)
724 n->range = type_size;
725 }
726 break;
727 default:
728 return NULL;
729 };
730 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
731 }
732
733 /* Handle binary rhs. */
734
735 if (rhs_class == GIMPLE_BINARY_RHS)
736 {
737 struct symbolic_number n1, n2;
738 gimple *source_stmt, *source_stmt2;
739
740 if (code != BIT_IOR_EXPR)
741 return NULL;
742
743 if (TREE_CODE (rhs2) != SSA_NAME)
744 return NULL;
745
746 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
747
748 switch (code)
749 {
750 case BIT_IOR_EXPR:
751 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
752
753 if (!source_stmt1)
754 return NULL;
755
756 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
757
758 if (!source_stmt2)
759 return NULL;
760
761 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
762 return NULL;
763
764 if (n1.vuse != n2.vuse)
765 return NULL;
766
767 source_stmt
768 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
769
770 if (!source_stmt)
771 return NULL;
772
773 if (!verify_symbolic_number_p (n, stmt))
774 return NULL;
775
776 break;
777 default:
778 return NULL;
779 }
780 return source_stmt;
781 }
782 return NULL;
783 }
784
785 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
786 *CMPXCHG, *CMPNOP and adjust *N. */
787
788 void
789 find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
790 uint64_t *cmpnop)
791 {
792 unsigned rsize;
793 uint64_t tmpn, mask;
794
795 /* The number which the find_bswap_or_nop_1 result should match in order
796 to have a full byte swap. The number is shifted to the right
797 according to the size of the symbolic number before using it. */
798 *cmpxchg = CMPXCHG;
799 *cmpnop = CMPNOP;
800
801 /* Find real size of result (highest non-zero byte). */
802 if (n->base_addr)
803 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
804 else
805 rsize = n->range;
806
807 /* Zero out the bits corresponding to untouched bytes in original gimple
808 expression. */
809 if (n->range < (int) sizeof (int64_t))
810 {
811 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
812 *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
813 *cmpnop &= mask;
814 }
815
816 /* Zero out the bits corresponding to unused bytes in the result of the
817 gimple expression. */
818 if (rsize < n->range)
819 {
820 if (BYTES_BIG_ENDIAN)
821 {
822 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
823 *cmpxchg &= mask;
824 *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
825 }
826 else
827 {
828 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
829 *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
830 *cmpnop &= mask;
831 }
832 n->range = rsize;
833 }
834
835 n->range *= BITS_PER_UNIT;
836 }
837
838 /* Check if STMT completes a bswap implementation or a read in a given
839 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
840 accordingly. It also sets N to represent the kind of operations
841 performed: size of the resulting expression and whether it works on
842 a memory source, and if so alias-set and vuse. At last, the
843 function returns a stmt whose rhs's first tree is the source
844 expression. */
845
846 gimple *
847 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
848 {
849 /* The last parameter determines the depth search limit. It usually
850 correlates directly to the number n of bytes to be touched. We
851 increase that number by 2 * (log2(n) + 1) here in order to also
852 cover signed -> unsigned conversions of the src operand as can be seen
853 in libgcc, and for initial shift/and operation of the src operand. */
854 int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
855 limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
856 gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
857
858 if (!ins_stmt)
859 return NULL;
860
861 uint64_t cmpxchg, cmpnop;
862 find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
863
864 /* A complete byte swap should make the symbolic number to start with
865 the largest digit in the highest order byte. Unchanged symbolic
866 number indicates a read with same endianness as target architecture. */
867 if (n->n == cmpnop)
868 *bswap = false;
869 else if (n->n == cmpxchg)
870 *bswap = true;
871 else
872 return NULL;
873
874 /* Useless bit manipulation performed by code. */
875 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
876 return NULL;
877
878 return ins_stmt;
879 }
880
881 const pass_data pass_data_optimize_bswap =
882 {
883 GIMPLE_PASS, /* type */
884 "bswap", /* name */
885 OPTGROUP_NONE, /* optinfo_flags */
886 TV_NONE, /* tv_id */
887 PROP_ssa, /* properties_required */
888 0, /* properties_provided */
889 0, /* properties_destroyed */
890 0, /* todo_flags_start */
891 0, /* todo_flags_finish */
892 };
893
894 class pass_optimize_bswap : public gimple_opt_pass
895 {
896 public:
897 pass_optimize_bswap (gcc::context *ctxt)
898 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
899 {}
900
901 /* opt_pass methods: */
902 virtual bool gate (function *)
903 {
904 return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
905 }
906
907 virtual unsigned int execute (function *);
908
909 }; // class pass_optimize_bswap
910
911 /* Perform the bswap optimization: replace the expression computed in the rhs
912 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
913 bswap, load or load + bswap expression.
914 Which of these alternatives replace the rhs is given by N->base_addr (non
915 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
916 load to perform are also given in N while the builtin bswap invoke is given
917 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
918 load statements involved to construct the rhs in gsi_stmt (GSI) and
919 N->range gives the size of the rhs expression for maintaining some
920 statistics.
921
922 Note that if the replacement involve a load and if gsi_stmt (GSI) is
923 non-NULL, that stmt is moved just after INS_STMT to do the load with the
924 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
925
926 tree
927 bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
928 tree bswap_type, tree load_type, struct symbolic_number *n,
929 bool bswap)
930 {
931 tree src, tmp, tgt = NULL_TREE;
932 gimple *bswap_stmt;
933
934 gimple *cur_stmt = gsi_stmt (gsi);
935 src = n->src;
936 if (cur_stmt)
937 tgt = gimple_assign_lhs (cur_stmt);
938
939 /* Need to load the value from memory first. */
940 if (n->base_addr)
941 {
942 gimple_stmt_iterator gsi_ins = gsi;
943 if (ins_stmt)
944 gsi_ins = gsi_for_stmt (ins_stmt);
945 tree addr_expr, addr_tmp, val_expr, val_tmp;
946 tree load_offset_ptr, aligned_load_type;
947 gimple *load_stmt;
948 unsigned align = get_object_alignment (src);
949 poly_int64 load_offset = 0;
950
951 if (cur_stmt)
952 {
953 basic_block ins_bb = gimple_bb (ins_stmt);
954 basic_block cur_bb = gimple_bb (cur_stmt);
955 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
956 return NULL_TREE;
957
958 /* Move cur_stmt just before one of the load of the original
959 to ensure it has the same VUSE. See PR61517 for what could
960 go wrong. */
961 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
962 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
963 gsi_move_before (&gsi, &gsi_ins);
964 gsi = gsi_for_stmt (cur_stmt);
965 }
966 else
967 gsi = gsi_ins;
968
969 /* Compute address to load from and cast according to the size
970 of the load. */
971 addr_expr = build_fold_addr_expr (src);
972 if (is_gimple_mem_ref_addr (addr_expr))
973 addr_tmp = unshare_expr (addr_expr);
974 else
975 {
976 addr_tmp = unshare_expr (n->base_addr);
977 if (!is_gimple_mem_ref_addr (addr_tmp))
978 addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
979 is_gimple_mem_ref_addr,
980 NULL_TREE, true,
981 GSI_SAME_STMT);
982 load_offset = n->bytepos;
983 if (n->offset)
984 {
985 tree off
986 = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
987 true, NULL_TREE, true,
988 GSI_SAME_STMT);
989 gimple *stmt
990 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
991 POINTER_PLUS_EXPR, addr_tmp, off);
992 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
993 addr_tmp = gimple_assign_lhs (stmt);
994 }
995 }
996
997 /* Perform the load. */
998 aligned_load_type = load_type;
999 if (align < TYPE_ALIGN (load_type))
1000 aligned_load_type = build_aligned_type (load_type, align);
1001 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1002 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1003 load_offset_ptr);
1004
1005 if (!bswap)
1006 {
1007 if (n->range == 16)
1008 nop_stats.found_16bit++;
1009 else if (n->range == 32)
1010 nop_stats.found_32bit++;
1011 else
1012 {
1013 gcc_assert (n->range == 64);
1014 nop_stats.found_64bit++;
1015 }
1016
1017 /* Convert the result of load if necessary. */
1018 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
1019 {
1020 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1021 "load_dst");
1022 load_stmt = gimple_build_assign (val_tmp, val_expr);
1023 gimple_set_vuse (load_stmt, n->vuse);
1024 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1025 gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
1026 update_stmt (cur_stmt);
1027 }
1028 else if (cur_stmt)
1029 {
1030 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1031 gimple_set_vuse (cur_stmt, n->vuse);
1032 update_stmt (cur_stmt);
1033 }
1034 else
1035 {
1036 tgt = make_ssa_name (load_type);
1037 cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1038 gimple_set_vuse (cur_stmt, n->vuse);
1039 gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
1040 }
1041
1042 if (dump_file)
1043 {
1044 fprintf (dump_file,
1045 "%d bit load in target endianness found at: ",
1046 (int) n->range);
1047 print_gimple_stmt (dump_file, cur_stmt, 0);
1048 }
1049 return tgt;
1050 }
1051 else
1052 {
1053 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1054 load_stmt = gimple_build_assign (val_tmp, val_expr);
1055 gimple_set_vuse (load_stmt, n->vuse);
1056 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1057 }
1058 src = val_tmp;
1059 }
1060 else if (!bswap)
1061 {
1062 gimple *g = NULL;
1063 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
1064 {
1065 if (!is_gimple_val (src))
1066 return NULL_TREE;
1067 g = gimple_build_assign (tgt, NOP_EXPR, src);
1068 }
1069 else if (cur_stmt)
1070 g = gimple_build_assign (tgt, src);
1071 else
1072 tgt = src;
1073 if (n->range == 16)
1074 nop_stats.found_16bit++;
1075 else if (n->range == 32)
1076 nop_stats.found_32bit++;
1077 else
1078 {
1079 gcc_assert (n->range == 64);
1080 nop_stats.found_64bit++;
1081 }
1082 if (dump_file)
1083 {
1084 fprintf (dump_file,
1085 "%d bit reshuffle in target endianness found at: ",
1086 (int) n->range);
1087 if (cur_stmt)
1088 print_gimple_stmt (dump_file, cur_stmt, 0);
1089 else
1090 {
1091 print_generic_expr (dump_file, tgt, TDF_NONE);
1092 fprintf (dump_file, "\n");
1093 }
1094 }
1095 if (cur_stmt)
1096 gsi_replace (&gsi, g, true);
1097 return tgt;
1098 }
1099 else if (TREE_CODE (src) == BIT_FIELD_REF)
1100 src = TREE_OPERAND (src, 0);
1101
1102 if (n->range == 16)
1103 bswap_stats.found_16bit++;
1104 else if (n->range == 32)
1105 bswap_stats.found_32bit++;
1106 else
1107 {
1108 gcc_assert (n->range == 64);
1109 bswap_stats.found_64bit++;
1110 }
1111
1112 tmp = src;
1113
1114 /* Convert the src expression if necessary. */
1115 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1116 {
1117 gimple *convert_stmt;
1118
1119 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1120 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1121 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1122 }
1123
1124 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1125 are considered as rotation of 2N bit values by N bits is generally not
1126 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1127 gives 0x03040102 while a bswap for that value is 0x04030201. */
1128 if (bswap && n->range == 16)
1129 {
1130 tree count = build_int_cst (NULL, BITS_PER_UNIT);
1131 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1132 bswap_stmt = gimple_build_assign (NULL, src);
1133 }
1134 else
1135 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1136
1137 if (tgt == NULL_TREE)
1138 tgt = make_ssa_name (bswap_type);
1139 tmp = tgt;
1140
1141 /* Convert the result if necessary. */
1142 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1143 {
1144 gimple *convert_stmt;
1145
1146 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1147 convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
1148 gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1149 }
1150
1151 gimple_set_lhs (bswap_stmt, tmp);
1152
1153 if (dump_file)
1154 {
1155 fprintf (dump_file, "%d bit bswap implementation found at: ",
1156 (int) n->range);
1157 if (cur_stmt)
1158 print_gimple_stmt (dump_file, cur_stmt, 0);
1159 else
1160 {
1161 print_generic_expr (dump_file, tgt, TDF_NONE);
1162 fprintf (dump_file, "\n");
1163 }
1164 }
1165
1166 if (cur_stmt)
1167 {
1168 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1169 gsi_remove (&gsi, true);
1170 }
1171 else
1172 gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1173 return tgt;
1174 }
1175
1176 /* Find manual byte swap implementations as well as load in a given
1177 endianness. Byte swaps are turned into a bswap builtin invokation
1178 while endian loads are converted to bswap builtin invokation or
1179 simple load according to the target endianness. */
1180
1181 unsigned int
1182 pass_optimize_bswap::execute (function *fun)
1183 {
1184 basic_block bb;
1185 bool bswap32_p, bswap64_p;
1186 bool changed = false;
1187 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1188
1189 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1190 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1191 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1192 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1193 || (bswap32_p && word_mode == SImode)));
1194
1195 /* Determine the argument type of the builtins. The code later on
1196 assumes that the return and argument type are the same. */
1197 if (bswap32_p)
1198 {
1199 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1200 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1201 }
1202
1203 if (bswap64_p)
1204 {
1205 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1206 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1207 }
1208
1209 memset (&nop_stats, 0, sizeof (nop_stats));
1210 memset (&bswap_stats, 0, sizeof (bswap_stats));
1211 calculate_dominance_info (CDI_DOMINATORS);
1212
1213 FOR_EACH_BB_FN (bb, fun)
1214 {
1215 gimple_stmt_iterator gsi;
1216
1217 /* We do a reverse scan for bswap patterns to make sure we get the
1218 widest match. As bswap pattern matching doesn't handle previously
1219 inserted smaller bswap replacements as sub-patterns, the wider
1220 variant wouldn't be detected. */
1221 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1222 {
1223 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1224 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1225 enum tree_code code;
1226 struct symbolic_number n;
1227 bool bswap;
1228
1229 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1230 might be moved to a different basic block by bswap_replace and gsi
1231 must not points to it if that's the case. Moving the gsi_prev
1232 there make sure that gsi points to the statement previous to
1233 cur_stmt while still making sure that all statements are
1234 considered in this basic block. */
1235 gsi_prev (&gsi);
1236
1237 if (!is_gimple_assign (cur_stmt))
1238 continue;
1239
1240 code = gimple_assign_rhs_code (cur_stmt);
1241 switch (code)
1242 {
1243 case LROTATE_EXPR:
1244 case RROTATE_EXPR:
1245 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1246 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1247 % BITS_PER_UNIT)
1248 continue;
1249 /* Fall through. */
1250 case BIT_IOR_EXPR:
1251 break;
1252 default:
1253 continue;
1254 }
1255
1256 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
1257
1258 if (!ins_stmt)
1259 continue;
1260
1261 switch (n.range)
1262 {
1263 case 16:
1264 /* Already in canonical form, nothing to do. */
1265 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1266 continue;
1267 load_type = bswap_type = uint16_type_node;
1268 break;
1269 case 32:
1270 load_type = uint32_type_node;
1271 if (bswap32_p)
1272 {
1273 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1274 bswap_type = bswap32_type;
1275 }
1276 break;
1277 case 64:
1278 load_type = uint64_type_node;
1279 if (bswap64_p)
1280 {
1281 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1282 bswap_type = bswap64_type;
1283 }
1284 break;
1285 default:
1286 continue;
1287 }
1288
1289 if (bswap && !fndecl && n.range != 16)
1290 continue;
1291
1292 if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1293 bswap_type, load_type, &n, bswap))
1294 changed = true;
1295 }
1296 }
1297
1298 statistics_counter_event (fun, "16-bit nop implementations found",
1299 nop_stats.found_16bit);
1300 statistics_counter_event (fun, "32-bit nop implementations found",
1301 nop_stats.found_32bit);
1302 statistics_counter_event (fun, "64-bit nop implementations found",
1303 nop_stats.found_64bit);
1304 statistics_counter_event (fun, "16-bit bswap implementations found",
1305 bswap_stats.found_16bit);
1306 statistics_counter_event (fun, "32-bit bswap implementations found",
1307 bswap_stats.found_32bit);
1308 statistics_counter_event (fun, "64-bit bswap implementations found",
1309 bswap_stats.found_64bit);
1310
1311 return (changed ? TODO_update_ssa : 0);
1312 }
1313
1314 } // anon namespace
1315
1316 gimple_opt_pass *
1317 make_pass_optimize_bswap (gcc::context *ctxt)
1318 {
1319 return new pass_optimize_bswap (ctxt);
1320 }
1321
1322 namespace {
1323
1324 /* Struct recording one operand for the store, which is either a constant,
1325 then VAL represents the constant and all the other fields are zero, or
1326 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1327 and the other fields also reflect the memory load, or an SSA name, then
1328 VAL represents the SSA name and all the other fields are zero, */
1329
1330 class store_operand_info
1331 {
1332 public:
1333 tree val;
1334 tree base_addr;
1335 poly_uint64 bitsize;
1336 poly_uint64 bitpos;
1337 poly_uint64 bitregion_start;
1338 poly_uint64 bitregion_end;
1339 gimple *stmt;
1340 bool bit_not_p;
1341 store_operand_info ();
1342 };
1343
1344 store_operand_info::store_operand_info ()
1345 : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
1346 bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
1347 {
1348 }
1349
1350 /* Struct recording the information about a single store of an immediate
1351 to memory. These are created in the first phase and coalesced into
1352 merged_store_group objects in the second phase. */
1353
1354 class store_immediate_info
1355 {
1356 public:
1357 unsigned HOST_WIDE_INT bitsize;
1358 unsigned HOST_WIDE_INT bitpos;
1359 unsigned HOST_WIDE_INT bitregion_start;
1360 /* This is one past the last bit of the bit region. */
1361 unsigned HOST_WIDE_INT bitregion_end;
1362 gimple *stmt;
1363 unsigned int order;
1364 /* INTEGER_CST for constant stores, MEM_REF for memory copy,
1365 BIT_*_EXPR for logical bitwise operation, BIT_INSERT_EXPR
1366 for bit insertion.
1367 LROTATE_EXPR if it can be only bswap optimized and
1368 ops are not really meaningful.
1369 NOP_EXPR if bswap optimization detected identity, ops
1370 are not meaningful. */
1371 enum tree_code rhs_code;
1372 /* Two fields for bswap optimization purposes. */
1373 struct symbolic_number n;
1374 gimple *ins_stmt;
1375 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
1376 bool bit_not_p;
1377 /* True if ops have been swapped and thus ops[1] represents
1378 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1379 bool ops_swapped_p;
1380 /* The index number of the landing pad, or 0 if there is none. */
1381 int lp_nr;
1382 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1383 just the first one. */
1384 store_operand_info ops[2];
1385 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1386 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1387 gimple *, unsigned int, enum tree_code,
1388 struct symbolic_number &, gimple *, bool, int,
1389 const store_operand_info &,
1390 const store_operand_info &);
1391 };
1392
1393 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
1394 unsigned HOST_WIDE_INT bp,
1395 unsigned HOST_WIDE_INT brs,
1396 unsigned HOST_WIDE_INT bre,
1397 gimple *st,
1398 unsigned int ord,
1399 enum tree_code rhscode,
1400 struct symbolic_number &nr,
1401 gimple *ins_stmtp,
1402 bool bitnotp,
1403 int nr2,
1404 const store_operand_info &op0r,
1405 const store_operand_info &op1r)
1406 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
1407 stmt (st), order (ord), rhs_code (rhscode), n (nr),
1408 ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1409 lp_nr (nr2)
1410 #if __cplusplus >= 201103L
1411 , ops { op0r, op1r }
1412 {
1413 }
1414 #else
1415 {
1416 ops[0] = op0r;
1417 ops[1] = op1r;
1418 }
1419 #endif
1420
1421 /* Struct representing a group of stores to contiguous memory locations.
1422 These are produced by the second phase (coalescing) and consumed in the
1423 third phase that outputs the widened stores. */
1424
1425 class merged_store_group
1426 {
1427 public:
1428 unsigned HOST_WIDE_INT start;
1429 unsigned HOST_WIDE_INT width;
1430 unsigned HOST_WIDE_INT bitregion_start;
1431 unsigned HOST_WIDE_INT bitregion_end;
1432 /* The size of the allocated memory for val and mask. */
1433 unsigned HOST_WIDE_INT buf_size;
1434 unsigned HOST_WIDE_INT align_base;
1435 poly_uint64 load_align_base[2];
1436
1437 unsigned int align;
1438 unsigned int load_align[2];
1439 unsigned int first_order;
1440 unsigned int last_order;
1441 bool bit_insertion;
1442 bool only_constants;
1443 unsigned int first_nonmergeable_order;
1444 int lp_nr;
1445
1446 auto_vec<store_immediate_info *> stores;
1447 /* We record the first and last original statements in the sequence because
1448 we'll need their vuse/vdef and replacement position. It's easier to keep
1449 track of them separately as 'stores' is reordered by apply_stores. */
1450 gimple *last_stmt;
1451 gimple *first_stmt;
1452 unsigned char *val;
1453 unsigned char *mask;
1454
1455 merged_store_group (store_immediate_info *);
1456 ~merged_store_group ();
1457 bool can_be_merged_into (store_immediate_info *);
1458 void merge_into (store_immediate_info *);
1459 void merge_overlapping (store_immediate_info *);
1460 bool apply_stores ();
1461 private:
1462 void do_merge (store_immediate_info *);
1463 };
1464
1465 /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1466
1467 static void
1468 dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1469 {
1470 if (!fd)
1471 return;
1472
1473 for (unsigned int i = 0; i < len; i++)
1474 fprintf (fd, "%02x ", ptr[i]);
1475 fprintf (fd, "\n");
1476 }
1477
1478 /* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
1479 bits between adjacent elements. AMNT should be within
1480 [0, BITS_PER_UNIT).
1481 Example, AMNT = 2:
1482 00011111|11100000 << 2 = 01111111|10000000
1483 PTR[1] | PTR[0] PTR[1] | PTR[0]. */
1484
1485 static void
1486 shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt)
1487 {
1488 if (amnt == 0)
1489 return;
1490
1491 unsigned char carry_over = 0U;
1492 unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
1493 unsigned char clear_mask = (~0U) << amnt;
1494
1495 for (unsigned int i = 0; i < sz; i++)
1496 {
1497 unsigned prev_carry_over = carry_over;
1498 carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
1499
1500 ptr[i] <<= amnt;
1501 if (i != 0)
1502 {
1503 ptr[i] &= clear_mask;
1504 ptr[i] |= prev_carry_over;
1505 }
1506 }
1507 }
1508
1509 /* Like shift_bytes_in_array but for big-endian.
1510 Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
1511 bits between adjacent elements. AMNT should be within
1512 [0, BITS_PER_UNIT).
1513 Example, AMNT = 2:
1514 00011111|11100000 >> 2 = 00000111|11111000
1515 PTR[0] | PTR[1] PTR[0] | PTR[1]. */
1516
1517 static void
1518 shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
1519 unsigned int amnt)
1520 {
1521 if (amnt == 0)
1522 return;
1523
1524 unsigned char carry_over = 0U;
1525 unsigned char carry_mask = ~(~0U << amnt);
1526
1527 for (unsigned int i = 0; i < sz; i++)
1528 {
1529 unsigned prev_carry_over = carry_over;
1530 carry_over = ptr[i] & carry_mask;
1531
1532 carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
1533 ptr[i] >>= amnt;
1534 ptr[i] |= prev_carry_over;
1535 }
1536 }
1537
1538 /* Clear out LEN bits starting from bit START in the byte array
1539 PTR. This clears the bits to the *right* from START.
1540 START must be within [0, BITS_PER_UNIT) and counts starting from
1541 the least significant bit. */
1542
1543 static void
1544 clear_bit_region_be (unsigned char *ptr, unsigned int start,
1545 unsigned int len)
1546 {
1547 if (len == 0)
1548 return;
1549 /* Clear len bits to the right of start. */
1550 else if (len <= start + 1)
1551 {
1552 unsigned char mask = (~(~0U << len));
1553 mask = mask << (start + 1U - len);
1554 ptr[0] &= ~mask;
1555 }
1556 else if (start != BITS_PER_UNIT - 1)
1557 {
1558 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1559 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1560 len - (start % BITS_PER_UNIT) - 1);
1561 }
1562 else if (start == BITS_PER_UNIT - 1
1563 && len > BITS_PER_UNIT)
1564 {
1565 unsigned int nbytes = len / BITS_PER_UNIT;
1566 memset (ptr, 0, nbytes);
1567 if (len % BITS_PER_UNIT != 0)
1568 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1569 len % BITS_PER_UNIT);
1570 }
1571 else
1572 gcc_unreachable ();
1573 }
1574
1575 /* In the byte array PTR clear the bit region starting at bit
1576 START and is LEN bits wide.
1577 For regions spanning multiple bytes do this recursively until we reach
1578 zero LEN or a region contained within a single byte. */
1579
1580 static void
1581 clear_bit_region (unsigned char *ptr, unsigned int start,
1582 unsigned int len)
1583 {
1584 /* Degenerate base case. */
1585 if (len == 0)
1586 return;
1587 else if (start >= BITS_PER_UNIT)
1588 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1589 /* Second base case. */
1590 else if ((start + len) <= BITS_PER_UNIT)
1591 {
1592 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
1593 mask >>= BITS_PER_UNIT - (start + len);
1594
1595 ptr[0] &= ~mask;
1596
1597 return;
1598 }
1599 /* Clear most significant bits in a byte and proceed with the next byte. */
1600 else if (start != 0)
1601 {
1602 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1603 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
1604 }
1605 /* Whole bytes need to be cleared. */
1606 else if (start == 0 && len > BITS_PER_UNIT)
1607 {
1608 unsigned int nbytes = len / BITS_PER_UNIT;
1609 /* We could recurse on each byte but we clear whole bytes, so a simple
1610 memset will do. */
1611 memset (ptr, '\0', nbytes);
1612 /* Clear the remaining sub-byte region if there is one. */
1613 if (len % BITS_PER_UNIT != 0)
1614 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1615 }
1616 else
1617 gcc_unreachable ();
1618 }
1619
1620 /* Write BITLEN bits of EXPR to the byte array PTR at
1621 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1622 Return true if the operation succeeded. */
1623
1624 static bool
1625 encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
1626 unsigned int total_bytes)
1627 {
1628 unsigned int first_byte = bitpos / BITS_PER_UNIT;
1629 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1630 || (bitpos % BITS_PER_UNIT)
1631 || !int_mode_for_size (bitlen, 0).exists ());
1632 bool empty_ctor_p
1633 = (TREE_CODE (expr) == CONSTRUCTOR
1634 && CONSTRUCTOR_NELTS (expr) == 0
1635 && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1636 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
1637
1638 if (!sub_byte_op_p)
1639 {
1640 if (first_byte >= total_bytes)
1641 return false;
1642 total_bytes -= first_byte;
1643 if (empty_ctor_p)
1644 {
1645 unsigned HOST_WIDE_INT rhs_bytes
1646 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1647 if (rhs_bytes > total_bytes)
1648 return false;
1649 memset (ptr + first_byte, '\0', rhs_bytes);
1650 return true;
1651 }
1652 return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1653 }
1654
1655 /* LITTLE-ENDIAN
1656 We are writing a non byte-sized quantity or at a position that is not
1657 at a byte boundary.
1658 |--------|--------|--------| ptr + first_byte
1659 ^ ^
1660 xxx xxxxxxxx xxx< bp>
1661 |______EXPR____|
1662
1663 First native_encode_expr EXPR into a temporary buffer and shift each
1664 byte in the buffer by 'bp' (carrying the bits over as necessary).
1665 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1666 <------bitlen---->< bp>
1667 Then we clear the destination bits:
1668 |---00000|00000000|000-----| ptr + first_byte
1669 <-------bitlen--->< bp>
1670
1671 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1672 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1673
1674 BIG-ENDIAN
1675 We are writing a non byte-sized quantity or at a position that is not
1676 at a byte boundary.
1677 ptr + first_byte |--------|--------|--------|
1678 ^ ^
1679 <bp >xxx xxxxxxxx xxx
1680 |_____EXPR_____|
1681
1682 First native_encode_expr EXPR into a temporary buffer and shift each
1683 byte in the buffer to the right by (carrying the bits over as necessary).
1684 We shift by as much as needed to align the most significant bit of EXPR
1685 with bitpos:
1686 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1687 <---bitlen----> <bp ><-----bitlen----->
1688 Then we clear the destination bits:
1689 ptr + first_byte |-----000||00000000||00000---|
1690 <bp ><-------bitlen----->
1691
1692 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1693 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1694 The awkwardness comes from the fact that bitpos is counted from the
1695 most significant bit of a byte. */
1696
1697 /* We must be dealing with fixed-size data at this point, since the
1698 total size is also fixed. */
1699 unsigned int byte_size;
1700 if (empty_ctor_p)
1701 {
1702 unsigned HOST_WIDE_INT rhs_bytes
1703 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1704 if (rhs_bytes > total_bytes)
1705 return false;
1706 byte_size = rhs_bytes;
1707 }
1708 else
1709 {
1710 fixed_size_mode mode
1711 = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
1712 byte_size = GET_MODE_SIZE (mode);
1713 }
1714 /* Allocate an extra byte so that we have space to shift into. */
1715 byte_size++;
1716 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
1717 memset (tmpbuf, '\0', byte_size);
1718 /* The store detection code should only have allowed constants that are
1719 accepted by native_encode_expr or empty ctors. */
1720 if (!empty_ctor_p
1721 && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
1722 gcc_unreachable ();
1723
1724 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1725 bytes to write. This means it can write more than
1726 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1727 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1728 bitlen and zero out the bits that are not relevant as well (that may
1729 contain a sign bit due to sign-extension). */
1730 unsigned int padding
1731 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
1732 /* On big-endian the padding is at the 'front' so just skip the initial
1733 bytes. */
1734 if (BYTES_BIG_ENDIAN)
1735 tmpbuf += padding;
1736
1737 byte_size -= padding;
1738
1739 if (bitlen % BITS_PER_UNIT != 0)
1740 {
1741 if (BYTES_BIG_ENDIAN)
1742 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1743 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1744 else
1745 clear_bit_region (tmpbuf, bitlen,
1746 byte_size * BITS_PER_UNIT - bitlen);
1747 }
1748 /* Left shifting relies on the last byte being clear if bitlen is
1749 a multiple of BITS_PER_UNIT, which might not be clear if
1750 there are padding bytes. */
1751 else if (!BYTES_BIG_ENDIAN)
1752 tmpbuf[byte_size - 1] = '\0';
1753
1754 /* Clear the bit region in PTR where the bits from TMPBUF will be
1755 inserted into. */
1756 if (BYTES_BIG_ENDIAN)
1757 clear_bit_region_be (ptr + first_byte,
1758 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1759 else
1760 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1761
1762 int shift_amnt;
1763 int bitlen_mod = bitlen % BITS_PER_UNIT;
1764 int bitpos_mod = bitpos % BITS_PER_UNIT;
1765
1766 bool skip_byte = false;
1767 if (BYTES_BIG_ENDIAN)
1768 {
1769 /* BITPOS and BITLEN are exactly aligned and no shifting
1770 is necessary. */
1771 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1772 || (bitpos_mod == 0 && bitlen_mod == 0))
1773 shift_amnt = 0;
1774 /* |. . . . . . . .|
1775 <bp > <blen >.
1776 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1777 of the value until it aligns with 'bp' in the next byte over. */
1778 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
1779 {
1780 shift_amnt = bitlen_mod + bitpos_mod;
1781 skip_byte = bitlen_mod != 0;
1782 }
1783 /* |. . . . . . . .|
1784 <----bp--->
1785 <---blen---->.
1786 Shift the value right within the same byte so it aligns with 'bp'. */
1787 else
1788 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
1789 }
1790 else
1791 shift_amnt = bitpos % BITS_PER_UNIT;
1792
1793 /* Create the shifted version of EXPR. */
1794 if (!BYTES_BIG_ENDIAN)
1795 {
1796 shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
1797 if (shift_amnt == 0)
1798 byte_size--;
1799 }
1800 else
1801 {
1802 gcc_assert (BYTES_BIG_ENDIAN);
1803 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
1804 /* If shifting right forced us to move into the next byte skip the now
1805 empty byte. */
1806 if (skip_byte)
1807 {
1808 tmpbuf++;
1809 byte_size--;
1810 }
1811 }
1812
1813 /* Insert the bits from TMPBUF. */
1814 for (unsigned int i = 0; i < byte_size; i++)
1815 ptr[first_byte + i] |= tmpbuf[i];
1816
1817 return true;
1818 }
1819
1820 /* Sorting function for store_immediate_info objects.
1821 Sorts them by bitposition. */
1822
1823 static int
1824 sort_by_bitpos (const void *x, const void *y)
1825 {
1826 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1827 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1828
1829 if ((*tmp)->bitpos < (*tmp2)->bitpos)
1830 return -1;
1831 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
1832 return 1;
1833 else
1834 /* If they are the same let's use the order which is guaranteed to
1835 be different. */
1836 return (*tmp)->order - (*tmp2)->order;
1837 }
1838
1839 /* Sorting function for store_immediate_info objects.
1840 Sorts them by the order field. */
1841
1842 static int
1843 sort_by_order (const void *x, const void *y)
1844 {
1845 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1846 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1847
1848 if ((*tmp)->order < (*tmp2)->order)
1849 return -1;
1850 else if ((*tmp)->order > (*tmp2)->order)
1851 return 1;
1852
1853 gcc_unreachable ();
1854 }
1855
1856 /* Initialize a merged_store_group object from a store_immediate_info
1857 object. */
1858
1859 merged_store_group::merged_store_group (store_immediate_info *info)
1860 {
1861 start = info->bitpos;
1862 width = info->bitsize;
1863 bitregion_start = info->bitregion_start;
1864 bitregion_end = info->bitregion_end;
1865 /* VAL has memory allocated for it in apply_stores once the group
1866 width has been finalized. */
1867 val = NULL;
1868 mask = NULL;
1869 bit_insertion = false;
1870 only_constants = info->rhs_code == INTEGER_CST;
1871 first_nonmergeable_order = ~0U;
1872 lp_nr = info->lp_nr;
1873 unsigned HOST_WIDE_INT align_bitpos = 0;
1874 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1875 &align, &align_bitpos);
1876 align_base = start - align_bitpos;
1877 for (int i = 0; i < 2; ++i)
1878 {
1879 store_operand_info &op = info->ops[i];
1880 if (op.base_addr == NULL_TREE)
1881 {
1882 load_align[i] = 0;
1883 load_align_base[i] = 0;
1884 }
1885 else
1886 {
1887 get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
1888 load_align_base[i] = op.bitpos - align_bitpos;
1889 }
1890 }
1891 stores.create (1);
1892 stores.safe_push (info);
1893 last_stmt = info->stmt;
1894 last_order = info->order;
1895 first_stmt = last_stmt;
1896 first_order = last_order;
1897 buf_size = 0;
1898 }
1899
1900 merged_store_group::~merged_store_group ()
1901 {
1902 if (val)
1903 XDELETEVEC (val);
1904 }
1905
1906 /* Return true if the store described by INFO can be merged into the group. */
1907
1908 bool
1909 merged_store_group::can_be_merged_into (store_immediate_info *info)
1910 {
1911 /* Do not merge bswap patterns. */
1912 if (info->rhs_code == LROTATE_EXPR)
1913 return false;
1914
1915 if (info->lp_nr != lp_nr)
1916 return false;
1917
1918 /* The canonical case. */
1919 if (info->rhs_code == stores[0]->rhs_code)
1920 return true;
1921
1922 /* BIT_INSERT_EXPR is compatible with INTEGER_CST. */
1923 if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
1924 return true;
1925
1926 if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
1927 return true;
1928
1929 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
1930 if (info->rhs_code == MEM_REF
1931 && (stores[0]->rhs_code == INTEGER_CST
1932 || stores[0]->rhs_code == BIT_INSERT_EXPR)
1933 && info->bitregion_start == stores[0]->bitregion_start
1934 && info->bitregion_end == stores[0]->bitregion_end)
1935 return true;
1936
1937 if (stores[0]->rhs_code == MEM_REF
1938 && (info->rhs_code == INTEGER_CST
1939 || info->rhs_code == BIT_INSERT_EXPR)
1940 && info->bitregion_start == stores[0]->bitregion_start
1941 && info->bitregion_end == stores[0]->bitregion_end)
1942 return true;
1943
1944 return false;
1945 }
1946
1947 /* Helper method for merge_into and merge_overlapping to do
1948 the common part. */
1949
1950 void
1951 merged_store_group::do_merge (store_immediate_info *info)
1952 {
1953 bitregion_start = MIN (bitregion_start, info->bitregion_start);
1954 bitregion_end = MAX (bitregion_end, info->bitregion_end);
1955
1956 unsigned int this_align;
1957 unsigned HOST_WIDE_INT align_bitpos = 0;
1958 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1959 &this_align, &align_bitpos);
1960 if (this_align > align)
1961 {
1962 align = this_align;
1963 align_base = info->bitpos - align_bitpos;
1964 }
1965 for (int i = 0; i < 2; ++i)
1966 {
1967 store_operand_info &op = info->ops[i];
1968 if (!op.base_addr)
1969 continue;
1970
1971 get_object_alignment_1 (op.val, &this_align, &align_bitpos);
1972 if (this_align > load_align[i])
1973 {
1974 load_align[i] = this_align;
1975 load_align_base[i] = op.bitpos - align_bitpos;
1976 }
1977 }
1978
1979 gimple *stmt = info->stmt;
1980 stores.safe_push (info);
1981 if (info->order > last_order)
1982 {
1983 last_order = info->order;
1984 last_stmt = stmt;
1985 }
1986 else if (info->order < first_order)
1987 {
1988 first_order = info->order;
1989 first_stmt = stmt;
1990 }
1991 if (info->rhs_code != INTEGER_CST)
1992 only_constants = false;
1993 }
1994
1995 /* Merge a store recorded by INFO into this merged store.
1996 The store is not overlapping with the existing recorded
1997 stores. */
1998
1999 void
2000 merged_store_group::merge_into (store_immediate_info *info)
2001 {
2002 /* Make sure we're inserting in the position we think we're inserting. */
2003 gcc_assert (info->bitpos >= start + width
2004 && info->bitregion_start <= bitregion_end);
2005
2006 width = info->bitpos + info->bitsize - start;
2007 do_merge (info);
2008 }
2009
2010 /* Merge a store described by INFO into this merged store.
2011 INFO overlaps in some way with the current store (i.e. it's not contiguous
2012 which is handled by merged_store_group::merge_into). */
2013
2014 void
2015 merged_store_group::merge_overlapping (store_immediate_info *info)
2016 {
2017 /* If the store extends the size of the group, extend the width. */
2018 if (info->bitpos + info->bitsize > start + width)
2019 width = info->bitpos + info->bitsize - start;
2020
2021 do_merge (info);
2022 }
2023
2024 /* Go through all the recorded stores in this group in program order and
2025 apply their values to the VAL byte array to create the final merged
2026 value. Return true if the operation succeeded. */
2027
2028 bool
2029 merged_store_group::apply_stores ()
2030 {
2031 /* Make sure we have more than one store in the group, otherwise we cannot
2032 merge anything. */
2033 if (bitregion_start % BITS_PER_UNIT != 0
2034 || bitregion_end % BITS_PER_UNIT != 0
2035 || stores.length () == 1)
2036 return false;
2037
2038 stores.qsort (sort_by_order);
2039 store_immediate_info *info;
2040 unsigned int i;
2041 /* Create a power-of-2-sized buffer for native_encode_expr. */
2042 buf_size = 1 << ceil_log2 ((bitregion_end - bitregion_start) / BITS_PER_UNIT);
2043 val = XNEWVEC (unsigned char, 2 * buf_size);
2044 mask = val + buf_size;
2045 memset (val, 0, buf_size);
2046 memset (mask, ~0U, buf_size);
2047
2048 FOR_EACH_VEC_ELT (stores, i, info)
2049 {
2050 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
2051 tree cst;
2052 if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2053 cst = info->ops[0].val;
2054 else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2055 cst = info->ops[1].val;
2056 else
2057 cst = NULL_TREE;
2058 bool ret = true;
2059 if (cst)
2060 {
2061 if (info->rhs_code == BIT_INSERT_EXPR)
2062 bit_insertion = true;
2063 else
2064 ret = encode_tree_to_bitpos (cst, val, info->bitsize,
2065 pos_in_buffer, buf_size);
2066 }
2067 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2068 if (BYTES_BIG_ENDIAN)
2069 clear_bit_region_be (m, (BITS_PER_UNIT - 1
2070 - (pos_in_buffer % BITS_PER_UNIT)),
2071 info->bitsize);
2072 else
2073 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
2074 if (cst && dump_file && (dump_flags & TDF_DETAILS))
2075 {
2076 if (ret)
2077 {
2078 fputs ("After writing ", dump_file);
2079 print_generic_expr (dump_file, cst, TDF_NONE);
2080 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
2081 " at position %d\n", info->bitsize, pos_in_buffer);
2082 fputs (" the merged value contains ", dump_file);
2083 dump_char_array (dump_file, val, buf_size);
2084 fputs (" the merged mask contains ", dump_file);
2085 dump_char_array (dump_file, mask, buf_size);
2086 if (bit_insertion)
2087 fputs (" bit insertion is required\n", dump_file);
2088 }
2089 else
2090 fprintf (dump_file, "Failed to merge stores\n");
2091 }
2092 if (!ret)
2093 return false;
2094 }
2095 stores.qsort (sort_by_bitpos);
2096 return true;
2097 }
2098
2099 /* Structure describing the store chain. */
2100
2101 class imm_store_chain_info
2102 {
2103 public:
2104 /* Doubly-linked list that imposes an order on chain processing.
2105 PNXP (prev's next pointer) points to the head of a list, or to
2106 the next field in the previous chain in the list.
2107 See pass_store_merging::m_stores_head for more rationale. */
2108 imm_store_chain_info *next, **pnxp;
2109 tree base_addr;
2110 auto_vec<store_immediate_info *> m_store_info;
2111 auto_vec<merged_store_group *> m_merged_store_groups;
2112
2113 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2114 : next (inspt), pnxp (&inspt), base_addr (b_a)
2115 {
2116 inspt = this;
2117 if (next)
2118 {
2119 gcc_checking_assert (pnxp == next->pnxp);
2120 next->pnxp = &next;
2121 }
2122 }
2123 ~imm_store_chain_info ()
2124 {
2125 *pnxp = next;
2126 if (next)
2127 {
2128 gcc_checking_assert (&next == next->pnxp);
2129 next->pnxp = pnxp;
2130 }
2131 }
2132 bool terminate_and_process_chain ();
2133 bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
2134 bool coalesce_immediate_stores ();
2135 bool output_merged_store (merged_store_group *);
2136 bool output_merged_stores ();
2137 };
2138
2139 const pass_data pass_data_tree_store_merging = {
2140 GIMPLE_PASS, /* type */
2141 "store-merging", /* name */
2142 OPTGROUP_NONE, /* optinfo_flags */
2143 TV_GIMPLE_STORE_MERGING, /* tv_id */
2144 PROP_ssa, /* properties_required */
2145 0, /* properties_provided */
2146 0, /* properties_destroyed */
2147 0, /* todo_flags_start */
2148 TODO_update_ssa, /* todo_flags_finish */
2149 };
2150
2151 class pass_store_merging : public gimple_opt_pass
2152 {
2153 public:
2154 pass_store_merging (gcc::context *ctxt)
2155 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head ()
2156 {
2157 }
2158
2159 /* Pass not supported for PDP-endian, nor for insane hosts or
2160 target character sizes where native_{encode,interpret}_expr
2161 doesn't work properly. */
2162 virtual bool
2163 gate (function *)
2164 {
2165 return flag_store_merging
2166 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
2167 && CHAR_BIT == 8
2168 && BITS_PER_UNIT == 8;
2169 }
2170
2171 virtual unsigned int execute (function *);
2172
2173 private:
2174 hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
2175
2176 /* Form a doubly-linked stack of the elements of m_stores, so that
2177 we can iterate over them in a predictable way. Using this order
2178 avoids extraneous differences in the compiler output just because
2179 of tree pointer variations (e.g. different chains end up in
2180 different positions of m_stores, so they are handled in different
2181 orders, so they allocate or release SSA names in different
2182 orders, and when they get reused, subsequent passes end up
2183 getting different SSA names, which may ultimately change
2184 decisions when going out of SSA). */
2185 imm_store_chain_info *m_stores_head;
2186
2187 bool process_store (gimple *);
2188 bool terminate_and_process_chain (imm_store_chain_info *);
2189 bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
2190 bool terminate_and_process_all_chains ();
2191 }; // class pass_store_merging
2192
2193 /* Terminate and process all recorded chains. Return true if any changes
2194 were made. */
2195
2196 bool
2197 pass_store_merging::terminate_and_process_all_chains ()
2198 {
2199 bool ret = false;
2200 while (m_stores_head)
2201 ret |= terminate_and_process_chain (m_stores_head);
2202 gcc_assert (m_stores.is_empty ());
2203 return ret;
2204 }
2205
2206 /* Terminate all chains that are affected by the statement STMT.
2207 CHAIN_INFO is the chain we should ignore from the checks if
2208 non-NULL. Return true if any changes were made. */
2209
2210 bool
2211 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2212 **chain_info,
2213 gimple *stmt)
2214 {
2215 bool ret = false;
2216
2217 /* If the statement doesn't touch memory it can't alias. */
2218 if (!gimple_vuse (stmt))
2219 return false;
2220
2221 tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
2222 ao_ref store_lhs_ref;
2223 ao_ref_init (&store_lhs_ref, store_lhs);
2224 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
2225 {
2226 next = cur->next;
2227
2228 /* We already checked all the stores in chain_info and terminated the
2229 chain if necessary. Skip it here. */
2230 if (chain_info && *chain_info == cur)
2231 continue;
2232
2233 store_immediate_info *info;
2234 unsigned int i;
2235 FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
2236 {
2237 tree lhs = gimple_assign_lhs (info->stmt);
2238 ao_ref lhs_ref;
2239 ao_ref_init (&lhs_ref, lhs);
2240 if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2241 || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2242 || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2243 &lhs_ref, false)))
2244 {
2245 if (dump_file && (dump_flags & TDF_DETAILS))
2246 {
2247 fprintf (dump_file, "stmt causes chain termination:\n");
2248 print_gimple_stmt (dump_file, stmt, 0);
2249 }
2250 ret |= terminate_and_process_chain (cur);
2251 break;
2252 }
2253 }
2254 }
2255
2256 return ret;
2257 }
2258
2259 /* Helper function. Terminate the recorded chain storing to base object
2260 BASE. Return true if the merging and output was successful. The m_stores
2261 entry is removed after the processing in any case. */
2262
2263 bool
2264 pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
2265 {
2266 bool ret = chain_info->terminate_and_process_chain ();
2267 m_stores.remove (chain_info->base_addr);
2268 delete chain_info;
2269 return ret;
2270 }
2271
2272 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2273 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2274 be able to sink load of REF across stores between FIRST and LAST, up
2275 to right before LAST. */
2276
2277 bool
2278 stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2279 {
2280 ao_ref r;
2281 ao_ref_init (&r, ref);
2282 unsigned int count = 0;
2283 tree vop = gimple_vdef (last);
2284 gimple *stmt;
2285
2286 /* Return true conservatively if the basic blocks are different. */
2287 if (gimple_bb (first) != gimple_bb (last))
2288 return true;
2289
2290 do
2291 {
2292 stmt = SSA_NAME_DEF_STMT (vop);
2293 if (stmt_may_clobber_ref_p_1 (stmt, &r))
2294 return true;
2295 if (gimple_store_p (stmt)
2296 && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2297 return true;
2298 /* Avoid quadratic compile time by bounding the number of checks
2299 we perform. */
2300 if (++count > MAX_STORE_ALIAS_CHECKS)
2301 return true;
2302 vop = gimple_vuse (stmt);
2303 }
2304 while (stmt != first);
2305
2306 return false;
2307 }
2308
2309 /* Return true if INFO->ops[IDX] is mergeable with the
2310 corresponding loads already in MERGED_STORE group.
2311 BASE_ADDR is the base address of the whole store group. */
2312
2313 bool
2314 compatible_load_p (merged_store_group *merged_store,
2315 store_immediate_info *info,
2316 tree base_addr, int idx)
2317 {
2318 store_immediate_info *infof = merged_store->stores[0];
2319 if (!info->ops[idx].base_addr
2320 || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2321 info->bitpos - infof->bitpos)
2322 || !operand_equal_p (info->ops[idx].base_addr,
2323 infof->ops[idx].base_addr, 0))
2324 return false;
2325
2326 store_immediate_info *infol = merged_store->stores.last ();
2327 tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2328 /* In this case all vuses should be the same, e.g.
2329 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2330 or
2331 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2332 and we can emit the coalesced load next to any of those loads. */
2333 if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2334 && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2335 return true;
2336
2337 /* Otherwise, at least for now require that the load has the same
2338 vuse as the store. See following examples. */
2339 if (gimple_vuse (info->stmt) != load_vuse)
2340 return false;
2341
2342 if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2343 || (infof != infol
2344 && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2345 return false;
2346
2347 /* If the load is from the same location as the store, already
2348 the construction of the immediate chain info guarantees no intervening
2349 stores, so no further checks are needed. Example:
2350 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
2351 if (known_eq (info->ops[idx].bitpos, info->bitpos)
2352 && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2353 return true;
2354
2355 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2356 of the stores in the group, or any other stores in between those.
2357 Previous calls to compatible_load_p ensured that for all the
2358 merged_store->stores IDX loads, no stmts starting with
2359 merged_store->first_stmt and ending right before merged_store->last_stmt
2360 clobbers those loads. */
2361 gimple *first = merged_store->first_stmt;
2362 gimple *last = merged_store->last_stmt;
2363 unsigned int i;
2364 store_immediate_info *infoc;
2365 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2366 comes before the so far first load, we'll be changing
2367 merged_store->first_stmt. In that case we need to give up if
2368 any of the earlier processed loads clobber with the stmts in the new
2369 range. */
2370 if (info->order < merged_store->first_order)
2371 {
2372 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2373 if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2374 return false;
2375 first = info->stmt;
2376 }
2377 /* Similarly, we could change merged_store->last_stmt, so ensure
2378 in that case no stmts in the new range clobber any of the earlier
2379 processed loads. */
2380 else if (info->order > merged_store->last_order)
2381 {
2382 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2383 if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2384 return false;
2385 last = info->stmt;
2386 }
2387 /* And finally, we'd be adding a new load to the set, ensure it isn't
2388 clobbered in the new range. */
2389 if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2390 return false;
2391
2392 /* Otherwise, we are looking for:
2393 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2394 or
2395 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2396 return true;
2397 }
2398
2399 /* Add all refs loaded to compute VAL to REFS vector. */
2400
2401 void
2402 gather_bswap_load_refs (vec<tree> *refs, tree val)
2403 {
2404 if (TREE_CODE (val) != SSA_NAME)
2405 return;
2406
2407 gimple *stmt = SSA_NAME_DEF_STMT (val);
2408 if (!is_gimple_assign (stmt))
2409 return;
2410
2411 if (gimple_assign_load_p (stmt))
2412 {
2413 refs->safe_push (gimple_assign_rhs1 (stmt));
2414 return;
2415 }
2416
2417 switch (gimple_assign_rhs_class (stmt))
2418 {
2419 case GIMPLE_BINARY_RHS:
2420 gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2421 /* FALLTHRU */
2422 case GIMPLE_UNARY_RHS:
2423 gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2424 break;
2425 default:
2426 gcc_unreachable ();
2427 }
2428 }
2429
2430 /* Check if there are any stores in M_STORE_INFO after index I
2431 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2432 a potential group ending with END that have their order
2433 smaller than LAST_ORDER. RHS_CODE is the kind of store in the
2434 group. Return true if there are no such stores.
2435 Consider:
2436 MEM[(long long int *)p_28] = 0;
2437 MEM[(long long int *)p_28 + 8B] = 0;
2438 MEM[(long long int *)p_28 + 16B] = 0;
2439 MEM[(long long int *)p_28 + 24B] = 0;
2440 _129 = (int) _130;
2441 MEM[(int *)p_28 + 8B] = _129;
2442 MEM[(int *)p_28].a = -1;
2443 We already have
2444 MEM[(long long int *)p_28] = 0;
2445 MEM[(int *)p_28].a = -1;
2446 stmts in the current group and need to consider if it is safe to
2447 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2448 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2449 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2450 into the group and merging of those 3 stores is successful, merged
2451 stmts will be emitted at the latest store from that group, i.e.
2452 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2453 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2454 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2455 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2456 into the group. That way it will be its own store group and will
2457 not be touched. If RHS_CODE is INTEGER_CST and there are overlapping
2458 INTEGER_CST stores, those are mergeable using merge_overlapping,
2459 so don't return false for those. */
2460
2461 static bool
2462 check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
2463 enum tree_code rhs_code, unsigned int last_order,
2464 unsigned HOST_WIDE_INT end)
2465 {
2466 unsigned int len = m_store_info.length ();
2467 for (++i; i < len; ++i)
2468 {
2469 store_immediate_info *info = m_store_info[i];
2470 if (info->bitpos >= end)
2471 break;
2472 if (info->order < last_order
2473 && (rhs_code != INTEGER_CST || info->rhs_code != INTEGER_CST))
2474 return false;
2475 }
2476 return true;
2477 }
2478
2479 /* Return true if m_store_info[first] and at least one following store
2480 form a group which store try_size bitsize value which is byte swapped
2481 from a memory load or some value, or identity from some value.
2482 This uses the bswap pass APIs. */
2483
2484 bool
2485 imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2486 unsigned int first,
2487 unsigned int try_size)
2488 {
2489 unsigned int len = m_store_info.length (), last = first;
2490 unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2491 if (width >= try_size)
2492 return false;
2493 for (unsigned int i = first + 1; i < len; ++i)
2494 {
2495 if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2496 || m_store_info[i]->ins_stmt == NULL)
2497 return false;
2498 width += m_store_info[i]->bitsize;
2499 if (width >= try_size)
2500 {
2501 last = i;
2502 break;
2503 }
2504 }
2505 if (width != try_size)
2506 return false;
2507
2508 bool allow_unaligned
2509 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
2510 /* Punt if the combined store would not be aligned and we need alignment. */
2511 if (!allow_unaligned)
2512 {
2513 unsigned int align = merged_store->align;
2514 unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2515 for (unsigned int i = first + 1; i <= last; ++i)
2516 {
2517 unsigned int this_align;
2518 unsigned HOST_WIDE_INT align_bitpos = 0;
2519 get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2520 &this_align, &align_bitpos);
2521 if (this_align > align)
2522 {
2523 align = this_align;
2524 align_base = m_store_info[i]->bitpos - align_bitpos;
2525 }
2526 }
2527 unsigned HOST_WIDE_INT align_bitpos
2528 = (m_store_info[first]->bitpos - align_base) & (align - 1);
2529 if (align_bitpos)
2530 align = least_bit_hwi (align_bitpos);
2531 if (align < try_size)
2532 return false;
2533 }
2534
2535 tree type;
2536 switch (try_size)
2537 {
2538 case 16: type = uint16_type_node; break;
2539 case 32: type = uint32_type_node; break;
2540 case 64: type = uint64_type_node; break;
2541 default: gcc_unreachable ();
2542 }
2543 struct symbolic_number n;
2544 gimple *ins_stmt = NULL;
2545 int vuse_store = -1;
2546 unsigned int first_order = merged_store->first_order;
2547 unsigned int last_order = merged_store->last_order;
2548 gimple *first_stmt = merged_store->first_stmt;
2549 gimple *last_stmt = merged_store->last_stmt;
2550 unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
2551 store_immediate_info *infof = m_store_info[first];
2552
2553 for (unsigned int i = first; i <= last; ++i)
2554 {
2555 store_immediate_info *info = m_store_info[i];
2556 struct symbolic_number this_n = info->n;
2557 this_n.type = type;
2558 if (!this_n.base_addr)
2559 this_n.range = try_size / BITS_PER_UNIT;
2560 else
2561 /* Update vuse in case it has changed by output_merged_stores. */
2562 this_n.vuse = gimple_vuse (info->ins_stmt);
2563 unsigned int bitpos = info->bitpos - infof->bitpos;
2564 if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2565 BYTES_BIG_ENDIAN
2566 ? try_size - info->bitsize - bitpos
2567 : bitpos))
2568 return false;
2569 if (this_n.base_addr && vuse_store)
2570 {
2571 unsigned int j;
2572 for (j = first; j <= last; ++j)
2573 if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2574 break;
2575 if (j > last)
2576 {
2577 if (vuse_store == 1)
2578 return false;
2579 vuse_store = 0;
2580 }
2581 }
2582 if (i == first)
2583 {
2584 n = this_n;
2585 ins_stmt = info->ins_stmt;
2586 }
2587 else
2588 {
2589 if (n.base_addr && n.vuse != this_n.vuse)
2590 {
2591 if (vuse_store == 0)
2592 return false;
2593 vuse_store = 1;
2594 }
2595 if (info->order > last_order)
2596 {
2597 last_order = info->order;
2598 last_stmt = info->stmt;
2599 }
2600 else if (info->order < first_order)
2601 {
2602 first_order = info->order;
2603 first_stmt = info->stmt;
2604 }
2605 end = MAX (end, info->bitpos + info->bitsize);
2606
2607 ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2608 &this_n, &n);
2609 if (ins_stmt == NULL)
2610 return false;
2611 }
2612 }
2613
2614 uint64_t cmpxchg, cmpnop;
2615 find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop);
2616
2617 /* A complete byte swap should make the symbolic number to start with
2618 the largest digit in the highest order byte. Unchanged symbolic
2619 number indicates a read with same endianness as target architecture. */
2620 if (n.n != cmpnop && n.n != cmpxchg)
2621 return false;
2622
2623 if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2624 return false;
2625
2626 if (!check_no_overlap (m_store_info, last, LROTATE_EXPR, last_order, end))
2627 return false;
2628
2629 /* Don't handle memory copy this way if normal non-bswap processing
2630 would handle it too. */
2631 if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2632 {
2633 unsigned int i;
2634 for (i = first; i <= last; ++i)
2635 if (m_store_info[i]->rhs_code != MEM_REF)
2636 break;
2637 if (i == last + 1)
2638 return false;
2639 }
2640
2641 if (n.n == cmpxchg)
2642 switch (try_size)
2643 {
2644 case 16:
2645 /* Will emit LROTATE_EXPR. */
2646 break;
2647 case 32:
2648 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2649 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2650 break;
2651 return false;
2652 case 64:
2653 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2654 && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2655 break;
2656 return false;
2657 default:
2658 gcc_unreachable ();
2659 }
2660
2661 if (!allow_unaligned && n.base_addr)
2662 {
2663 unsigned int align = get_object_alignment (n.src);
2664 if (align < try_size)
2665 return false;
2666 }
2667
2668 /* If each load has vuse of the corresponding store, need to verify
2669 the loads can be sunk right before the last store. */
2670 if (vuse_store == 1)
2671 {
2672 auto_vec<tree, 64> refs;
2673 for (unsigned int i = first; i <= last; ++i)
2674 gather_bswap_load_refs (&refs,
2675 gimple_assign_rhs1 (m_store_info[i]->stmt));
2676
2677 unsigned int i;
2678 tree ref;
2679 FOR_EACH_VEC_ELT (refs, i, ref)
2680 if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2681 return false;
2682 n.vuse = NULL_TREE;
2683 }
2684
2685 infof->n = n;
2686 infof->ins_stmt = ins_stmt;
2687 for (unsigned int i = first; i <= last; ++i)
2688 {
2689 m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
2690 m_store_info[i]->ops[0].base_addr = NULL_TREE;
2691 m_store_info[i]->ops[1].base_addr = NULL_TREE;
2692 if (i != first)
2693 merged_store->merge_into (m_store_info[i]);
2694 }
2695
2696 return true;
2697 }
2698
2699 /* Go through the candidate stores recorded in m_store_info and merge them
2700 into merged_store_group objects recorded into m_merged_store_groups
2701 representing the widened stores. Return true if coalescing was successful
2702 and the number of widened stores is fewer than the original number
2703 of stores. */
2704
2705 bool
2706 imm_store_chain_info::coalesce_immediate_stores ()
2707 {
2708 /* Anything less can't be processed. */
2709 if (m_store_info.length () < 2)
2710 return false;
2711
2712 if (dump_file && (dump_flags & TDF_DETAILS))
2713 fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
2714 m_store_info.length ());
2715
2716 store_immediate_info *info;
2717 unsigned int i, ignore = 0;
2718
2719 /* Order the stores by the bitposition they write to. */
2720 m_store_info.qsort (sort_by_bitpos);
2721
2722 info = m_store_info[0];
2723 merged_store_group *merged_store = new merged_store_group (info);
2724 if (dump_file && (dump_flags & TDF_DETAILS))
2725 fputs ("New store group\n", dump_file);
2726
2727 FOR_EACH_VEC_ELT (m_store_info, i, info)
2728 {
2729 unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
2730
2731 if (i <= ignore)
2732 goto done;
2733
2734 /* First try to handle group of stores like:
2735 p[0] = data >> 24;
2736 p[1] = data >> 16;
2737 p[2] = data >> 8;
2738 p[3] = data;
2739 using the bswap framework. */
2740 if (info->bitpos == merged_store->start + merged_store->width
2741 && merged_store->stores.length () == 1
2742 && merged_store->stores[0]->ins_stmt != NULL
2743 && info->ins_stmt != NULL)
2744 {
2745 unsigned int try_size;
2746 for (try_size = 64; try_size >= 16; try_size >>= 1)
2747 if (try_coalesce_bswap (merged_store, i - 1, try_size))
2748 break;
2749
2750 if (try_size >= 16)
2751 {
2752 ignore = i + merged_store->stores.length () - 1;
2753 m_merged_store_groups.safe_push (merged_store);
2754 if (ignore < m_store_info.length ())
2755 merged_store = new merged_store_group (m_store_info[ignore]);
2756 else
2757 merged_store = NULL;
2758 goto done;
2759 }
2760 }
2761
2762 new_bitregion_start
2763 = MIN (merged_store->bitregion_start, info->bitregion_start);
2764 new_bitregion_end
2765 = MAX (merged_store->bitregion_end, info->bitregion_end);
2766
2767 if (info->order >= merged_store->first_nonmergeable_order
2768 || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
2769 > (unsigned) param_store_merging_max_size))
2770 ;
2771
2772 /* |---store 1---|
2773 |---store 2---|
2774 Overlapping stores. */
2775 else if (IN_RANGE (info->bitpos, merged_store->start,
2776 merged_store->start + merged_store->width - 1))
2777 {
2778 /* Only allow overlapping stores of constants. */
2779 if (info->rhs_code == INTEGER_CST
2780 && merged_store->only_constants
2781 && info->lp_nr == merged_store->lp_nr)
2782 {
2783 unsigned int last_order
2784 = MAX (merged_store->last_order, info->order);
2785 unsigned HOST_WIDE_INT end
2786 = MAX (merged_store->start + merged_store->width,
2787 info->bitpos + info->bitsize);
2788 if (check_no_overlap (m_store_info, i, INTEGER_CST,
2789 last_order, end))
2790 {
2791 /* check_no_overlap call above made sure there are no
2792 overlapping stores with non-INTEGER_CST rhs_code
2793 in between the first and last of the stores we've
2794 just merged. If there are any INTEGER_CST rhs_code
2795 stores in between, we need to merge_overlapping them
2796 even if in the sort_by_bitpos order there are other
2797 overlapping stores in between. Keep those stores as is.
2798 Example:
2799 MEM[(int *)p_28] = 0;
2800 MEM[(char *)p_28 + 3B] = 1;
2801 MEM[(char *)p_28 + 1B] = 2;
2802 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
2803 We can't merge the zero store with the store of two and
2804 not merge anything else, because the store of one is
2805 in the original order in between those two, but in
2806 store_by_bitpos order it comes after the last store that
2807 we can't merge with them. We can merge the first 3 stores
2808 and keep the last store as is though. */
2809 unsigned int len = m_store_info.length ();
2810 unsigned int try_order = last_order;
2811 unsigned int first_nonmergeable_order;
2812 unsigned int k;
2813 bool last_iter = false;
2814 int attempts = 0;
2815 do
2816 {
2817 unsigned int max_order = 0;
2818 unsigned first_nonmergeable_int_order = ~0U;
2819 unsigned HOST_WIDE_INT this_end = end;
2820 k = i;
2821 first_nonmergeable_order = ~0U;
2822 for (unsigned int j = i + 1; j < len; ++j)
2823 {
2824 store_immediate_info *info2 = m_store_info[j];
2825 if (info2->bitpos >= this_end)
2826 break;
2827 if (info2->order < try_order)
2828 {
2829 if (info2->rhs_code != INTEGER_CST)
2830 {
2831 /* Normally check_no_overlap makes sure this
2832 doesn't happen, but if end grows below,
2833 then we need to process more stores than
2834 check_no_overlap verified. Example:
2835 MEM[(int *)p_5] = 0;
2836 MEM[(short *)p_5 + 3B] = 1;
2837 MEM[(char *)p_5 + 4B] = _9;
2838 MEM[(char *)p_5 + 2B] = 2; */
2839 k = 0;
2840 break;
2841 }
2842 k = j;
2843 this_end = MAX (this_end,
2844 info2->bitpos + info2->bitsize);
2845 }
2846 else if (info2->rhs_code == INTEGER_CST
2847 && !last_iter)
2848 {
2849 max_order = MAX (max_order, info2->order + 1);
2850 first_nonmergeable_int_order
2851 = MIN (first_nonmergeable_int_order,
2852 info2->order);
2853 }
2854 else
2855 first_nonmergeable_order
2856 = MIN (first_nonmergeable_order, info2->order);
2857 }
2858 if (k == 0)
2859 {
2860 if (last_order == try_order)
2861 break;
2862 /* If this failed, but only because we grew
2863 try_order, retry with the last working one,
2864 so that we merge at least something. */
2865 try_order = last_order;
2866 last_iter = true;
2867 continue;
2868 }
2869 last_order = try_order;
2870 /* Retry with a larger try_order to see if we could
2871 merge some further INTEGER_CST stores. */
2872 if (max_order
2873 && (first_nonmergeable_int_order
2874 < first_nonmergeable_order))
2875 {
2876 try_order = MIN (max_order,
2877 first_nonmergeable_order);
2878 try_order
2879 = MIN (try_order,
2880 merged_store->first_nonmergeable_order);
2881 if (try_order > last_order && ++attempts < 16)
2882 continue;
2883 }
2884 first_nonmergeable_order
2885 = MIN (first_nonmergeable_order,
2886 first_nonmergeable_int_order);
2887 end = this_end;
2888 break;
2889 }
2890 while (1);
2891
2892 if (k != 0)
2893 {
2894 merged_store->merge_overlapping (info);
2895
2896 merged_store->first_nonmergeable_order
2897 = MIN (merged_store->first_nonmergeable_order,
2898 first_nonmergeable_order);
2899
2900 for (unsigned int j = i + 1; j <= k; j++)
2901 {
2902 store_immediate_info *info2 = m_store_info[j];
2903 gcc_assert (info2->bitpos < end);
2904 if (info2->order < last_order)
2905 {
2906 gcc_assert (info2->rhs_code == INTEGER_CST);
2907 if (info != info2)
2908 merged_store->merge_overlapping (info2);
2909 }
2910 /* Other stores are kept and not merged in any
2911 way. */
2912 }
2913 ignore = k;
2914 goto done;
2915 }
2916 }
2917 }
2918 }
2919 /* |---store 1---||---store 2---|
2920 This store is consecutive to the previous one.
2921 Merge it into the current store group. There can be gaps in between
2922 the stores, but there can't be gaps in between bitregions. */
2923 else if (info->bitregion_start <= merged_store->bitregion_end
2924 && merged_store->can_be_merged_into (info))
2925 {
2926 store_immediate_info *infof = merged_store->stores[0];
2927
2928 /* All the rhs_code ops that take 2 operands are commutative,
2929 swap the operands if it could make the operands compatible. */
2930 if (infof->ops[0].base_addr
2931 && infof->ops[1].base_addr
2932 && info->ops[0].base_addr
2933 && info->ops[1].base_addr
2934 && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
2935 info->bitpos - infof->bitpos)
2936 && operand_equal_p (info->ops[1].base_addr,
2937 infof->ops[0].base_addr, 0))
2938 {
2939 std::swap (info->ops[0], info->ops[1]);
2940 info->ops_swapped_p = true;
2941 }
2942 if (check_no_overlap (m_store_info, i, info->rhs_code,
2943 MAX (merged_store->last_order, info->order),
2944 MAX (merged_store->start + merged_store->width,
2945 info->bitpos + info->bitsize)))
2946 {
2947 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
2948 if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
2949 {
2950 info->rhs_code = BIT_INSERT_EXPR;
2951 info->ops[0].val = gimple_assign_rhs1 (info->stmt);
2952 info->ops[0].base_addr = NULL_TREE;
2953 }
2954 else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
2955 {
2956 store_immediate_info *infoj;
2957 unsigned int j;
2958 FOR_EACH_VEC_ELT (merged_store->stores, j, infoj)
2959 {
2960 infoj->rhs_code = BIT_INSERT_EXPR;
2961 infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
2962 infoj->ops[0].base_addr = NULL_TREE;
2963 }
2964 }
2965 if ((infof->ops[0].base_addr
2966 ? compatible_load_p (merged_store, info, base_addr, 0)
2967 : !info->ops[0].base_addr)
2968 && (infof->ops[1].base_addr
2969 ? compatible_load_p (merged_store, info, base_addr, 1)
2970 : !info->ops[1].base_addr))
2971 {
2972 merged_store->merge_into (info);
2973 goto done;
2974 }
2975 }
2976 }
2977
2978 /* |---store 1---| <gap> |---store 2---|.
2979 Gap between stores or the rhs not compatible. Start a new group. */
2980
2981 /* Try to apply all the stores recorded for the group to determine
2982 the bitpattern they write and discard it if that fails.
2983 This will also reject single-store groups. */
2984 if (merged_store->apply_stores ())
2985 m_merged_store_groups.safe_push (merged_store);
2986 else
2987 delete merged_store;
2988
2989 merged_store = new merged_store_group (info);
2990 if (dump_file && (dump_flags & TDF_DETAILS))
2991 fputs ("New store group\n", dump_file);
2992
2993 done:
2994 if (dump_file && (dump_flags & TDF_DETAILS))
2995 {
2996 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
2997 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
2998 i, info->bitsize, info->bitpos);
2999 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
3000 fputc ('\n', dump_file);
3001 }
3002 }
3003
3004 /* Record or discard the last store group. */
3005 if (merged_store)
3006 {
3007 if (merged_store->apply_stores ())
3008 m_merged_store_groups.safe_push (merged_store);
3009 else
3010 delete merged_store;
3011 }
3012
3013 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
3014
3015 bool success
3016 = !m_merged_store_groups.is_empty ()
3017 && m_merged_store_groups.length () < m_store_info.length ();
3018
3019 if (success && dump_file)
3020 fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
3021 m_merged_store_groups.length ());
3022
3023 return success;
3024 }
3025
3026 /* Return the type to use for the merged stores or loads described by STMTS.
3027 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3028 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3029 of the MEM_REFs if any. */
3030
3031 static tree
3032 get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3033 unsigned short *cliquep, unsigned short *basep)
3034 {
3035 gimple *stmt;
3036 unsigned int i;
3037 tree type = NULL_TREE;
3038 tree ret = NULL_TREE;
3039 *cliquep = 0;
3040 *basep = 0;
3041
3042 FOR_EACH_VEC_ELT (stmts, i, stmt)
3043 {
3044 tree ref = is_load ? gimple_assign_rhs1 (stmt)
3045 : gimple_assign_lhs (stmt);
3046 tree type1 = reference_alias_ptr_type (ref);
3047 tree base = get_base_address (ref);
3048
3049 if (i == 0)
3050 {
3051 if (TREE_CODE (base) == MEM_REF)
3052 {
3053 *cliquep = MR_DEPENDENCE_CLIQUE (base);
3054 *basep = MR_DEPENDENCE_BASE (base);
3055 }
3056 ret = type = type1;
3057 continue;
3058 }
3059 if (!alias_ptr_types_compatible_p (type, type1))
3060 ret = ptr_type_node;
3061 if (TREE_CODE (base) != MEM_REF
3062 || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3063 || *basep != MR_DEPENDENCE_BASE (base))
3064 {
3065 *cliquep = 0;
3066 *basep = 0;
3067 }
3068 }
3069 return ret;
3070 }
3071
3072 /* Return the location_t information we can find among the statements
3073 in STMTS. */
3074
3075 static location_t
3076 get_location_for_stmts (vec<gimple *> &stmts)
3077 {
3078 gimple *stmt;
3079 unsigned int i;
3080
3081 FOR_EACH_VEC_ELT (stmts, i, stmt)
3082 if (gimple_has_location (stmt))
3083 return gimple_location (stmt);
3084
3085 return UNKNOWN_LOCATION;
3086 }
3087
3088 /* Used to decribe a store resulting from splitting a wide store in smaller
3089 regularly-sized stores in split_group. */
3090
3091 class split_store
3092 {
3093 public:
3094 unsigned HOST_WIDE_INT bytepos;
3095 unsigned HOST_WIDE_INT size;
3096 unsigned HOST_WIDE_INT align;
3097 auto_vec<store_immediate_info *> orig_stores;
3098 /* True if there is a single orig stmt covering the whole split store. */
3099 bool orig;
3100 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3101 unsigned HOST_WIDE_INT);
3102 };
3103
3104 /* Simple constructor. */
3105
3106 split_store::split_store (unsigned HOST_WIDE_INT bp,
3107 unsigned HOST_WIDE_INT sz,
3108 unsigned HOST_WIDE_INT al)
3109 : bytepos (bp), size (sz), align (al), orig (false)
3110 {
3111 orig_stores.create (0);
3112 }
3113
3114 /* Record all stores in GROUP that write to the region starting at BITPOS and
3115 is of size BITSIZE. Record infos for such statements in STORES if
3116 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
3117 if there is exactly one original store in the range (in that case ignore
3118 clobber stmts, unless there are only clobber stmts). */
3119
3120 static store_immediate_info *
3121 find_constituent_stores (class merged_store_group *group,
3122 vec<store_immediate_info *> *stores,
3123 unsigned int *first,
3124 unsigned HOST_WIDE_INT bitpos,
3125 unsigned HOST_WIDE_INT bitsize)
3126 {
3127 store_immediate_info *info, *ret = NULL;
3128 unsigned int i;
3129 bool second = false;
3130 bool update_first = true;
3131 unsigned HOST_WIDE_INT end = bitpos + bitsize;
3132 for (i = *first; group->stores.iterate (i, &info); ++i)
3133 {
3134 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3135 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
3136 if (stmt_end <= bitpos)
3137 {
3138 /* BITPOS passed to this function never decreases from within the
3139 same split_group call, so optimize and don't scan info records
3140 which are known to end before or at BITPOS next time.
3141 Only do it if all stores before this one also pass this. */
3142 if (update_first)
3143 *first = i + 1;
3144 continue;
3145 }
3146 else
3147 update_first = false;
3148
3149 /* The stores in GROUP are ordered by bitposition so if we're past
3150 the region for this group return early. */
3151 if (stmt_start >= end)
3152 return ret;
3153
3154 if (gimple_clobber_p (info->stmt))
3155 {
3156 if (stores)
3157 stores->safe_push (info);
3158 if (ret == NULL)
3159 ret = info;
3160 continue;
3161 }
3162 if (stores)
3163 {
3164 stores->safe_push (info);
3165 if (ret && !gimple_clobber_p (ret->stmt))
3166 {
3167 ret = NULL;
3168 second = true;
3169 }
3170 }
3171 else if (ret && !gimple_clobber_p (ret->stmt))
3172 return NULL;
3173 if (!second)
3174 ret = info;
3175 }
3176 return ret;
3177 }
3178
3179 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3180 store have multiple uses. If any SSA_NAME has multiple uses, also
3181 count statements needed to compute it. */
3182
3183 static unsigned
3184 count_multiple_uses (store_immediate_info *info)
3185 {
3186 gimple *stmt = info->stmt;
3187 unsigned ret = 0;
3188 switch (info->rhs_code)
3189 {
3190 case INTEGER_CST:
3191 return 0;
3192 case BIT_AND_EXPR:
3193 case BIT_IOR_EXPR:
3194 case BIT_XOR_EXPR:
3195 if (info->bit_not_p)
3196 {
3197 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3198 ret = 1; /* Fall through below to return
3199 the BIT_NOT_EXPR stmt and then
3200 BIT_{AND,IOR,XOR}_EXPR and anything it
3201 uses. */
3202 else
3203 /* stmt is after this the BIT_NOT_EXPR. */
3204 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3205 }
3206 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3207 {
3208 ret += 1 + info->ops[0].bit_not_p;
3209 if (info->ops[1].base_addr)
3210 ret += 1 + info->ops[1].bit_not_p;
3211 return ret + 1;
3212 }
3213 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3214 /* stmt is now the BIT_*_EXPR. */
3215 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3216 ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3217 else if (info->ops[info->ops_swapped_p].bit_not_p)
3218 {
3219 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3220 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3221 ++ret;
3222 }
3223 if (info->ops[1].base_addr == NULL_TREE)
3224 {
3225 gcc_checking_assert (!info->ops_swapped_p);
3226 return ret;
3227 }
3228 if (!has_single_use (gimple_assign_rhs2 (stmt)))
3229 ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3230 else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
3231 {
3232 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3233 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3234 ++ret;
3235 }
3236 return ret;
3237 case MEM_REF:
3238 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3239 return 1 + info->ops[0].bit_not_p;
3240 else if (info->ops[0].bit_not_p)
3241 {
3242 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3243 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3244 return 1;
3245 }
3246 return 0;
3247 case BIT_INSERT_EXPR:
3248 return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
3249 default:
3250 gcc_unreachable ();
3251 }
3252 }
3253
3254 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3255 vector (if non-NULL) with split_store structs describing the byte offset
3256 (from the base), the bit size and alignment of each store as well as the
3257 original statements involved in each such split group.
3258 This is to separate the splitting strategy from the statement
3259 building/emission/linking done in output_merged_store.
3260 Return number of new stores.
3261 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3262 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3263 BZERO_FIRST may be true only when the first store covers the whole group
3264 and clears it; if BZERO_FIRST is true, keep that first store in the set
3265 unmodified and emit further stores for the overrides only.
3266 If SPLIT_STORES is NULL, it is just a dry run to count number of
3267 new stores. */
3268
3269 static unsigned int
3270 split_group (merged_store_group *group, bool allow_unaligned_store,
3271 bool allow_unaligned_load, bool bzero_first,
3272 vec<split_store *> *split_stores,
3273 unsigned *total_orig,
3274 unsigned *total_new)
3275 {
3276 unsigned HOST_WIDE_INT pos = group->bitregion_start;
3277 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
3278 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
3279 unsigned HOST_WIDE_INT group_align = group->align;
3280 unsigned HOST_WIDE_INT align_base = group->align_base;
3281 unsigned HOST_WIDE_INT group_load_align = group_align;
3282 bool any_orig = false;
3283
3284 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3285
3286 if (group->stores[0]->rhs_code == LROTATE_EXPR
3287 || group->stores[0]->rhs_code == NOP_EXPR)
3288 {
3289 gcc_assert (!bzero_first);
3290 /* For bswap framework using sets of stores, all the checking
3291 has been done earlier in try_coalesce_bswap and needs to be
3292 emitted as a single store. */
3293 if (total_orig)
3294 {
3295 /* Avoid the old/new stmt count heuristics. It should be
3296 always beneficial. */
3297 total_new[0] = 1;
3298 total_orig[0] = 2;
3299 }
3300
3301 if (split_stores)
3302 {
3303 unsigned HOST_WIDE_INT align_bitpos
3304 = (group->start - align_base) & (group_align - 1);
3305 unsigned HOST_WIDE_INT align = group_align;
3306 if (align_bitpos)
3307 align = least_bit_hwi (align_bitpos);
3308 bytepos = group->start / BITS_PER_UNIT;
3309 split_store *store
3310 = new split_store (bytepos, group->width, align);
3311 unsigned int first = 0;
3312 find_constituent_stores (group, &store->orig_stores,
3313 &first, group->start, group->width);
3314 split_stores->safe_push (store);
3315 }
3316
3317 return 1;
3318 }
3319
3320 unsigned int ret = 0, first = 0;
3321 unsigned HOST_WIDE_INT try_pos = bytepos;
3322
3323 if (total_orig)
3324 {
3325 unsigned int i;
3326 store_immediate_info *info = group->stores[0];
3327
3328 total_new[0] = 0;
3329 total_orig[0] = 1; /* The orig store. */
3330 info = group->stores[0];
3331 if (info->ops[0].base_addr)
3332 total_orig[0]++;
3333 if (info->ops[1].base_addr)
3334 total_orig[0]++;
3335 switch (info->rhs_code)
3336 {
3337 case BIT_AND_EXPR:
3338 case BIT_IOR_EXPR:
3339 case BIT_XOR_EXPR:
3340 total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
3341 break;
3342 default:
3343 break;
3344 }
3345 total_orig[0] *= group->stores.length ();
3346
3347 FOR_EACH_VEC_ELT (group->stores, i, info)
3348 {
3349 total_new[0] += count_multiple_uses (info);
3350 total_orig[0] += (info->bit_not_p
3351 + info->ops[0].bit_not_p
3352 + info->ops[1].bit_not_p);
3353 }
3354 }
3355
3356 if (!allow_unaligned_load)
3357 for (int i = 0; i < 2; ++i)
3358 if (group->load_align[i])
3359 group_load_align = MIN (group_load_align, group->load_align[i]);
3360
3361 if (bzero_first)
3362 {
3363 store_immediate_info *gstore;
3364 FOR_EACH_VEC_ELT (group->stores, first, gstore)
3365 if (!gimple_clobber_p (gstore->stmt))
3366 break;
3367 ++first;
3368 ret = 1;
3369 if (split_stores)
3370 {
3371 split_store *store
3372 = new split_store (bytepos, gstore->bitsize, align_base);
3373 store->orig_stores.safe_push (gstore);
3374 store->orig = true;
3375 any_orig = true;
3376 split_stores->safe_push (store);
3377 }
3378 }
3379
3380 while (size > 0)
3381 {
3382 if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3383 && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3384 || (bzero_first && group->val[try_pos - bytepos] == 0)))
3385 {
3386 /* Skip padding bytes. */
3387 ++try_pos;
3388 size -= BITS_PER_UNIT;
3389 continue;
3390 }
3391
3392 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3393 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3394 unsigned HOST_WIDE_INT align_bitpos
3395 = (try_bitpos - align_base) & (group_align - 1);
3396 unsigned HOST_WIDE_INT align = group_align;
3397 bool found_orig = false;
3398 if (align_bitpos)
3399 align = least_bit_hwi (align_bitpos);
3400 if (!allow_unaligned_store)
3401 try_size = MIN (try_size, align);
3402 if (!allow_unaligned_load)
3403 {
3404 /* If we can't do or don't want to do unaligned stores
3405 as well as loads, we need to take the loads into account
3406 as well. */
3407 unsigned HOST_WIDE_INT load_align = group_load_align;
3408 align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3409 if (align_bitpos)
3410 load_align = least_bit_hwi (align_bitpos);
3411 for (int i = 0; i < 2; ++i)
3412 if (group->load_align[i])
3413 {
3414 align_bitpos
3415 = known_alignment (try_bitpos
3416 - group->stores[0]->bitpos
3417 + group->stores[0]->ops[i].bitpos
3418 - group->load_align_base[i]);
3419 if (align_bitpos & (group_load_align - 1))
3420 {
3421 unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3422 load_align = MIN (load_align, a);
3423 }
3424 }
3425 try_size = MIN (try_size, load_align);
3426 }
3427 store_immediate_info *info
3428 = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
3429 if (info && !gimple_clobber_p (info->stmt))
3430 {
3431 /* If there is just one original statement for the range, see if
3432 we can just reuse the original store which could be even larger
3433 than try_size. */
3434 unsigned HOST_WIDE_INT stmt_end
3435 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
3436 info = find_constituent_stores (group, NULL, &first, try_bitpos,
3437 stmt_end - try_bitpos);
3438 if (info && info->bitpos >= try_bitpos)
3439 {
3440 store_immediate_info *info2 = NULL;
3441 unsigned int first_copy = first;
3442 if (info->bitpos > try_bitpos
3443 && stmt_end - try_bitpos <= try_size)
3444 {
3445 info2 = find_constituent_stores (group, NULL, &first_copy,
3446 try_bitpos,
3447 info->bitpos - try_bitpos);
3448 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3449 }
3450 if (info2 == NULL && stmt_end - try_bitpos < try_size)
3451 {
3452 info2 = find_constituent_stores (group, NULL, &first_copy,
3453 stmt_end,
3454 (try_bitpos + try_size)
3455 - stmt_end);
3456 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3457 }
3458 if (info2 == NULL)
3459 {
3460 try_size = stmt_end - try_bitpos;
3461 found_orig = true;
3462 goto found;
3463 }
3464 }
3465 }
3466
3467 /* Approximate store bitsize for the case when there are no padding
3468 bits. */
3469 while (try_size > size)
3470 try_size /= 2;
3471 /* Now look for whole padding bytes at the end of that bitsize. */
3472 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3473 if (group->mask[try_pos - bytepos + nonmasked - 1]
3474 != (unsigned char) ~0U
3475 && (!bzero_first
3476 || group->val[try_pos - bytepos + nonmasked - 1] != 0))
3477 break;
3478 if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
3479 {
3480 /* If entire try_size range is padding, skip it. */
3481 try_pos += try_size / BITS_PER_UNIT;
3482 size -= try_size;
3483 continue;
3484 }
3485 /* Otherwise try to decrease try_size if second half, last 3 quarters
3486 etc. are padding. */
3487 nonmasked *= BITS_PER_UNIT;
3488 while (nonmasked <= try_size / 2)
3489 try_size /= 2;
3490 if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
3491 {
3492 /* Now look for whole padding bytes at the start of that bitsize. */
3493 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3494 for (masked = 0; masked < try_bytesize; ++masked)
3495 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3496 && (!bzero_first
3497 || group->val[try_pos - bytepos + masked] != 0))
3498 break;
3499 masked *= BITS_PER_UNIT;
3500 gcc_assert (masked < try_size);
3501 if (masked >= try_size / 2)
3502 {
3503 while (masked >= try_size / 2)
3504 {
3505 try_size /= 2;
3506 try_pos += try_size / BITS_PER_UNIT;
3507 size -= try_size;
3508 masked -= try_size;
3509 }
3510 /* Need to recompute the alignment, so just retry at the new
3511 position. */
3512 continue;
3513 }
3514 }
3515
3516 found:
3517 ++ret;
3518
3519 if (split_stores)
3520 {
3521 split_store *store
3522 = new split_store (try_pos, try_size, align);
3523 info = find_constituent_stores (group, &store->orig_stores,
3524 &first, try_bitpos, try_size);
3525 if (info
3526 && !gimple_clobber_p (info->stmt)
3527 && info->bitpos >= try_bitpos
3528 && info->bitpos + info->bitsize <= try_bitpos + try_size
3529 && (store->orig_stores.length () == 1
3530 || found_orig
3531 || (info->bitpos == try_bitpos
3532 && (info->bitpos + info->bitsize
3533 == try_bitpos + try_size))))
3534 {
3535 store->orig = true;
3536 any_orig = true;
3537 }
3538 split_stores->safe_push (store);
3539 }
3540
3541 try_pos += try_size / BITS_PER_UNIT;
3542 size -= try_size;
3543 }
3544
3545 if (total_orig)
3546 {
3547 unsigned int i;
3548 split_store *store;
3549 /* If we are reusing some original stores and any of the
3550 original SSA_NAMEs had multiple uses, we need to subtract
3551 those now before we add the new ones. */
3552 if (total_new[0] && any_orig)
3553 {
3554 FOR_EACH_VEC_ELT (*split_stores, i, store)
3555 if (store->orig)
3556 total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3557 }
3558 total_new[0] += ret; /* The new store. */
3559 store_immediate_info *info = group->stores[0];
3560 if (info->ops[0].base_addr)
3561 total_new[0] += ret;
3562 if (info->ops[1].base_addr)
3563 total_new[0] += ret;
3564 switch (info->rhs_code)
3565 {
3566 case BIT_AND_EXPR:
3567 case BIT_IOR_EXPR:
3568 case BIT_XOR_EXPR:
3569 total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
3570 break;
3571 default:
3572 break;
3573 }
3574 FOR_EACH_VEC_ELT (*split_stores, i, store)
3575 {
3576 unsigned int j;
3577 bool bit_not_p[3] = { false, false, false };
3578 /* If all orig_stores have certain bit_not_p set, then
3579 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3580 If some orig_stores have certain bit_not_p set, then
3581 we'd use a BIT_XOR_EXPR with a mask and need to account for
3582 it. */
3583 FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3584 {
3585 if (info->ops[0].bit_not_p)
3586 bit_not_p[0] = true;
3587 if (info->ops[1].bit_not_p)
3588 bit_not_p[1] = true;
3589 if (info->bit_not_p)
3590 bit_not_p[2] = true;
3591 }
3592 total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3593 }
3594
3595 }
3596
3597 return ret;
3598 }
3599
3600 /* Return the operation through which the operand IDX (if < 2) or
3601 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3602 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3603 the bits should be xored with mask. */
3604
3605 static enum tree_code
3606 invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3607 {
3608 unsigned int i;
3609 store_immediate_info *info;
3610 unsigned int cnt = 0;
3611 bool any_paddings = false;
3612 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3613 {
3614 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3615 if (bit_not_p)
3616 {
3617 ++cnt;
3618 tree lhs = gimple_assign_lhs (info->stmt);
3619 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3620 && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3621 any_paddings = true;
3622 }
3623 }
3624 mask = NULL_TREE;
3625 if (cnt == 0)
3626 return NOP_EXPR;
3627 if (cnt == split_store->orig_stores.length () && !any_paddings)
3628 return BIT_NOT_EXPR;
3629
3630 unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3631 unsigned buf_size = split_store->size / BITS_PER_UNIT;
3632 unsigned char *buf
3633 = XALLOCAVEC (unsigned char, buf_size);
3634 memset (buf, ~0U, buf_size);
3635 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3636 {
3637 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3638 if (!bit_not_p)
3639 continue;
3640 /* Clear regions with bit_not_p and invert afterwards, rather than
3641 clear regions with !bit_not_p, so that gaps in between stores aren't
3642 set in the mask. */
3643 unsigned HOST_WIDE_INT bitsize = info->bitsize;
3644 unsigned HOST_WIDE_INT prec = bitsize;
3645 unsigned int pos_in_buffer = 0;
3646 if (any_paddings)
3647 {
3648 tree lhs = gimple_assign_lhs (info->stmt);
3649 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3650 && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
3651 prec = TYPE_PRECISION (TREE_TYPE (lhs));
3652 }
3653 if (info->bitpos < try_bitpos)
3654 {
3655 gcc_assert (info->bitpos + bitsize > try_bitpos);
3656 if (!BYTES_BIG_ENDIAN)
3657 {
3658 if (prec <= try_bitpos - info->bitpos)
3659 continue;
3660 prec -= try_bitpos - info->bitpos;
3661 }
3662 bitsize -= try_bitpos - info->bitpos;
3663 if (BYTES_BIG_ENDIAN && prec > bitsize)
3664 prec = bitsize;
3665 }
3666 else
3667 pos_in_buffer = info->bitpos - try_bitpos;
3668 if (prec < bitsize)
3669 {
3670 /* If this is a bool inversion, invert just the least significant
3671 prec bits rather than all bits of it. */
3672 if (BYTES_BIG_ENDIAN)
3673 {
3674 pos_in_buffer += bitsize - prec;
3675 if (pos_in_buffer >= split_store->size)
3676 continue;
3677 }
3678 bitsize = prec;
3679 }
3680 if (pos_in_buffer + bitsize > split_store->size)
3681 bitsize = split_store->size - pos_in_buffer;
3682 unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
3683 if (BYTES_BIG_ENDIAN)
3684 clear_bit_region_be (p, (BITS_PER_UNIT - 1
3685 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
3686 else
3687 clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
3688 }
3689 for (unsigned int i = 0; i < buf_size; ++i)
3690 buf[i] = ~buf[i];
3691 mask = native_interpret_expr (int_type, buf, buf_size);
3692 return BIT_XOR_EXPR;
3693 }
3694
3695 /* Given a merged store group GROUP output the widened version of it.
3696 The store chain is against the base object BASE.
3697 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
3698 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
3699 Make sure that the number of statements output is less than the number of
3700 original statements. If a better sequence is possible emit it and
3701 return true. */
3702
3703 bool
3704 imm_store_chain_info::output_merged_store (merged_store_group *group)
3705 {
3706 split_store *split_store;
3707 unsigned int i;
3708 unsigned HOST_WIDE_INT start_byte_pos
3709 = group->bitregion_start / BITS_PER_UNIT;
3710
3711 unsigned int orig_num_stmts = group->stores.length ();
3712 if (orig_num_stmts < 2)
3713 return false;
3714
3715 auto_vec<class split_store *, 32> split_stores;
3716 bool allow_unaligned_store
3717 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
3718 bool allow_unaligned_load = allow_unaligned_store;
3719 bool bzero_first = false;
3720 store_immediate_info *store;
3721 unsigned int num_clobber_stmts = 0;
3722 if (group->stores[0]->rhs_code == INTEGER_CST)
3723 {
3724 FOR_EACH_VEC_ELT (group->stores, i, store)
3725 if (gimple_clobber_p (store->stmt))
3726 num_clobber_stmts++;
3727 else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
3728 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
3729 && group->start == store->bitpos
3730 && group->width == store->bitsize
3731 && (group->start % BITS_PER_UNIT) == 0
3732 && (group->width % BITS_PER_UNIT) == 0)
3733 {
3734 bzero_first = true;
3735 break;
3736 }
3737 else
3738 break;
3739 FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
3740 if (gimple_clobber_p (store->stmt))
3741 num_clobber_stmts++;
3742 if (num_clobber_stmts == orig_num_stmts)
3743 return false;
3744 orig_num_stmts -= num_clobber_stmts;
3745 }
3746 if (allow_unaligned_store || bzero_first)
3747 {
3748 /* If unaligned stores are allowed, see how many stores we'd emit
3749 for unaligned and how many stores we'd emit for aligned stores.
3750 Only use unaligned stores if it allows fewer stores than aligned.
3751 Similarly, if there is a whole region clear first, prefer expanding
3752 it together compared to expanding clear first followed by merged
3753 further stores. */
3754 unsigned cnt[4] = { ~0, ~0, ~0, ~0 };
3755 int pass_min = 0;
3756 for (int pass = 0; pass < 4; ++pass)
3757 {
3758 if (!allow_unaligned_store && (pass & 1) != 0)
3759 continue;
3760 if (!bzero_first && (pass & 2) != 0)
3761 continue;
3762 cnt[pass] = split_group (group, (pass & 1) != 0,
3763 allow_unaligned_load, (pass & 2) != 0,
3764 NULL, NULL, NULL);
3765 if (cnt[pass] < cnt[pass_min])
3766 pass_min = pass;
3767 }
3768 if ((pass_min & 1) == 0)
3769 allow_unaligned_store = false;
3770 if ((pass_min & 2) == 0)
3771 bzero_first = false;
3772 }
3773 unsigned total_orig, total_new;
3774 split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
3775 &split_stores, &total_orig, &total_new);
3776
3777 /* Determine if there is a clobber covering the whole group at the start,
3778 followed by proposed split stores that cover the whole group. In that
3779 case, prefer the transformation even if
3780 split_stores.length () == orig_num_stmts. */
3781 bool clobber_first = false;
3782 if (num_clobber_stmts
3783 && gimple_clobber_p (group->stores[0]->stmt)
3784 && group->start == group->stores[0]->bitpos
3785 && group->width == group->stores[0]->bitsize
3786 && (group->start % BITS_PER_UNIT) == 0
3787 && (group->width % BITS_PER_UNIT) == 0)
3788 {
3789 clobber_first = true;
3790 unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
3791 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3792 if (split_store->bytepos != pos)
3793 {
3794 clobber_first = false;
3795 break;
3796 }
3797 else
3798 pos += split_store->size / BITS_PER_UNIT;
3799 if (pos != (group->start + group->width) / BITS_PER_UNIT)
3800 clobber_first = false;
3801 }
3802
3803 if (split_stores.length () >= orig_num_stmts + clobber_first)
3804 {
3805
3806 /* We didn't manage to reduce the number of statements. Bail out. */
3807 if (dump_file && (dump_flags & TDF_DETAILS))
3808 fprintf (dump_file, "Exceeded original number of stmts (%u)."
3809 " Not profitable to emit new sequence.\n",
3810 orig_num_stmts);
3811 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3812 delete split_store;
3813 return false;
3814 }
3815 if (total_orig <= total_new)
3816 {
3817 /* If number of estimated new statements is above estimated original
3818 statements, bail out too. */
3819 if (dump_file && (dump_flags & TDF_DETAILS))
3820 fprintf (dump_file, "Estimated number of original stmts (%u)"
3821 " not larger than estimated number of new"
3822 " stmts (%u).\n",
3823 total_orig, total_new);
3824 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3825 delete split_store;
3826 return false;
3827 }
3828 if (group->stores[0]->rhs_code == INTEGER_CST)
3829 {
3830 bool all_orig = true;
3831 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3832 if (!split_store->orig)
3833 {
3834 all_orig = false;
3835 break;
3836 }
3837 if (all_orig)
3838 {
3839 unsigned int cnt = split_stores.length ();
3840 store_immediate_info *store;
3841 FOR_EACH_VEC_ELT (group->stores, i, store)
3842 if (gimple_clobber_p (store->stmt))
3843 ++cnt;
3844 /* Punt if we wouldn't make any real changes, i.e. keep all
3845 orig stmts + all clobbers. */
3846 if (cnt == group->stores.length ())
3847 {
3848 if (dump_file && (dump_flags & TDF_DETAILS))
3849 fprintf (dump_file, "Exceeded original number of stmts (%u)."
3850 " Not profitable to emit new sequence.\n",
3851 orig_num_stmts);
3852 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3853 delete split_store;
3854 return false;
3855 }
3856 }
3857 }
3858
3859 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
3860 gimple_seq seq = NULL;
3861 tree last_vdef, new_vuse;
3862 last_vdef = gimple_vdef (group->last_stmt);
3863 new_vuse = gimple_vuse (group->last_stmt);
3864 tree bswap_res = NULL_TREE;
3865
3866 /* Clobbers are not removed. */
3867 if (gimple_clobber_p (group->last_stmt))
3868 {
3869 new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
3870 gimple_set_vdef (group->last_stmt, new_vuse);
3871 }
3872
3873 if (group->stores[0]->rhs_code == LROTATE_EXPR
3874 || group->stores[0]->rhs_code == NOP_EXPR)
3875 {
3876 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
3877 gimple *ins_stmt = group->stores[0]->ins_stmt;
3878 struct symbolic_number *n = &group->stores[0]->n;
3879 bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
3880
3881 switch (n->range)
3882 {
3883 case 16:
3884 load_type = bswap_type = uint16_type_node;
3885 break;
3886 case 32:
3887 load_type = uint32_type_node;
3888 if (bswap)
3889 {
3890 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
3891 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3892 }
3893 break;
3894 case 64:
3895 load_type = uint64_type_node;
3896 if (bswap)
3897 {
3898 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
3899 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3900 }
3901 break;
3902 default:
3903 gcc_unreachable ();
3904 }
3905
3906 /* If the loads have each vuse of the corresponding store,
3907 we've checked the aliasing already in try_coalesce_bswap and
3908 we want to sink the need load into seq. So need to use new_vuse
3909 on the load. */
3910 if (n->base_addr)
3911 {
3912 if (n->vuse == NULL)
3913 {
3914 n->vuse = new_vuse;
3915 ins_stmt = NULL;
3916 }
3917 else
3918 /* Update vuse in case it has changed by output_merged_stores. */
3919 n->vuse = gimple_vuse (ins_stmt);
3920 }
3921 bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
3922 bswap_type, load_type, n, bswap);
3923 gcc_assert (bswap_res);
3924 }
3925
3926 gimple *stmt = NULL;
3927 auto_vec<gimple *, 32> orig_stmts;
3928 gimple_seq this_seq;
3929 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
3930 is_gimple_mem_ref_addr, NULL_TREE);
3931 gimple_seq_add_seq_without_update (&seq, this_seq);
3932
3933 tree load_addr[2] = { NULL_TREE, NULL_TREE };
3934 gimple_seq load_seq[2] = { NULL, NULL };
3935 gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
3936 for (int j = 0; j < 2; ++j)
3937 {
3938 store_operand_info &op = group->stores[0]->ops[j];
3939 if (op.base_addr == NULL_TREE)
3940 continue;
3941
3942 store_immediate_info *infol = group->stores.last ();
3943 if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
3944 {
3945 /* We can't pick the location randomly; while we've verified
3946 all the loads have the same vuse, they can be still in different
3947 basic blocks and we need to pick the one from the last bb:
3948 int x = q[0];
3949 if (x == N) return;
3950 int y = q[1];
3951 p[0] = x;
3952 p[1] = y;
3953 otherwise if we put the wider load at the q[0] load, we might
3954 segfault if q[1] is not mapped. */
3955 basic_block bb = gimple_bb (op.stmt);
3956 gimple *ostmt = op.stmt;
3957 store_immediate_info *info;
3958 FOR_EACH_VEC_ELT (group->stores, i, info)
3959 {
3960 gimple *tstmt = info->ops[j].stmt;
3961 basic_block tbb = gimple_bb (tstmt);
3962 if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
3963 {
3964 ostmt = tstmt;
3965 bb = tbb;
3966 }
3967 }
3968 load_gsi[j] = gsi_for_stmt (ostmt);
3969 load_addr[j]
3970 = force_gimple_operand_1 (unshare_expr (op.base_addr),
3971 &load_seq[j], is_gimple_mem_ref_addr,
3972 NULL_TREE);
3973 }
3974 else if (operand_equal_p (base_addr, op.base_addr, 0))
3975 load_addr[j] = addr;
3976 else
3977 {
3978 load_addr[j]
3979 = force_gimple_operand_1 (unshare_expr (op.base_addr),
3980 &this_seq, is_gimple_mem_ref_addr,
3981 NULL_TREE);
3982 gimple_seq_add_seq_without_update (&seq, this_seq);
3983 }
3984 }
3985
3986 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3987 {
3988 unsigned HOST_WIDE_INT try_size = split_store->size;
3989 unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
3990 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3991 unsigned HOST_WIDE_INT align = split_store->align;
3992 tree dest, src;
3993 location_t loc;
3994 if (split_store->orig)
3995 {
3996 /* If there is just a single non-clobber constituent store
3997 which covers the whole area, just reuse the lhs and rhs. */
3998 gimple *orig_stmt = NULL;
3999 store_immediate_info *store;
4000 unsigned int j;
4001 FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4002 if (!gimple_clobber_p (store->stmt))
4003 {
4004 orig_stmt = store->stmt;
4005 break;
4006 }
4007 dest = gimple_assign_lhs (orig_stmt);
4008 src = gimple_assign_rhs1 (orig_stmt);
4009 loc = gimple_location (orig_stmt);
4010 }
4011 else
4012 {
4013 store_immediate_info *info;
4014 unsigned short clique, base;
4015 unsigned int k;
4016 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4017 orig_stmts.safe_push (info->stmt);
4018 tree offset_type
4019 = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
4020 loc = get_location_for_stmts (orig_stmts);
4021 orig_stmts.truncate (0);
4022
4023 tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4024 int_type = build_aligned_type (int_type, align);
4025 dest = fold_build2 (MEM_REF, int_type, addr,
4026 build_int_cst (offset_type, try_pos));
4027 if (TREE_CODE (dest) == MEM_REF)
4028 {
4029 MR_DEPENDENCE_CLIQUE (dest) = clique;
4030 MR_DEPENDENCE_BASE (dest) = base;
4031 }
4032
4033 tree mask;
4034 if (bswap_res)
4035 mask = integer_zero_node;
4036 else
4037 mask = native_interpret_expr (int_type,
4038 group->mask + try_pos
4039 - start_byte_pos,
4040 group->buf_size);
4041
4042 tree ops[2];
4043 for (int j = 0;
4044 j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4045 ++j)
4046 {
4047 store_operand_info &op = split_store->orig_stores[0]->ops[j];
4048 if (bswap_res)
4049 ops[j] = bswap_res;
4050 else if (op.base_addr)
4051 {
4052 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4053 orig_stmts.safe_push (info->ops[j].stmt);
4054
4055 offset_type = get_alias_type_for_stmts (orig_stmts, true,
4056 &clique, &base);
4057 location_t load_loc = get_location_for_stmts (orig_stmts);
4058 orig_stmts.truncate (0);
4059
4060 unsigned HOST_WIDE_INT load_align = group->load_align[j];
4061 unsigned HOST_WIDE_INT align_bitpos
4062 = known_alignment (try_bitpos
4063 - split_store->orig_stores[0]->bitpos
4064 + op.bitpos);
4065 if (align_bitpos & (load_align - 1))
4066 load_align = least_bit_hwi (align_bitpos);
4067
4068 tree load_int_type
4069 = build_nonstandard_integer_type (try_size, UNSIGNED);
4070 load_int_type
4071 = build_aligned_type (load_int_type, load_align);
4072
4073 poly_uint64 load_pos
4074 = exact_div (try_bitpos
4075 - split_store->orig_stores[0]->bitpos
4076 + op.bitpos,
4077 BITS_PER_UNIT);
4078 ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4079 build_int_cst (offset_type, load_pos));
4080 if (TREE_CODE (ops[j]) == MEM_REF)
4081 {
4082 MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4083 MR_DEPENDENCE_BASE (ops[j]) = base;
4084 }
4085 if (!integer_zerop (mask))
4086 /* The load might load some bits (that will be masked off
4087 later on) uninitialized, avoid -W*uninitialized
4088 warnings in that case. */
4089 TREE_NO_WARNING (ops[j]) = 1;
4090
4091 stmt = gimple_build_assign (make_ssa_name (int_type),
4092 ops[j]);
4093 gimple_set_location (stmt, load_loc);
4094 if (gsi_bb (load_gsi[j]))
4095 {
4096 gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4097 gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4098 }
4099 else
4100 {
4101 gimple_set_vuse (stmt, new_vuse);
4102 gimple_seq_add_stmt_without_update (&seq, stmt);
4103 }
4104 ops[j] = gimple_assign_lhs (stmt);
4105 tree xor_mask;
4106 enum tree_code inv_op
4107 = invert_op (split_store, j, int_type, xor_mask);
4108 if (inv_op != NOP_EXPR)
4109 {
4110 stmt = gimple_build_assign (make_ssa_name (int_type),
4111 inv_op, ops[j], xor_mask);
4112 gimple_set_location (stmt, load_loc);
4113 ops[j] = gimple_assign_lhs (stmt);
4114
4115 if (gsi_bb (load_gsi[j]))
4116 gimple_seq_add_stmt_without_update (&load_seq[j],
4117 stmt);
4118 else
4119 gimple_seq_add_stmt_without_update (&seq, stmt);
4120 }
4121 }
4122 else
4123 ops[j] = native_interpret_expr (int_type,
4124 group->val + try_pos
4125 - start_byte_pos,
4126 group->buf_size);
4127 }
4128
4129 switch (split_store->orig_stores[0]->rhs_code)
4130 {
4131 case BIT_AND_EXPR:
4132 case BIT_IOR_EXPR:
4133 case BIT_XOR_EXPR:
4134 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4135 {
4136 tree rhs1 = gimple_assign_rhs1 (info->stmt);
4137 orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4138 }
4139 location_t bit_loc;
4140 bit_loc = get_location_for_stmts (orig_stmts);
4141 orig_stmts.truncate (0);
4142
4143 stmt
4144 = gimple_build_assign (make_ssa_name (int_type),
4145 split_store->orig_stores[0]->rhs_code,
4146 ops[0], ops[1]);
4147 gimple_set_location (stmt, bit_loc);
4148 /* If there is just one load and there is a separate
4149 load_seq[0], emit the bitwise op right after it. */
4150 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4151 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4152 /* Otherwise, if at least one load is in seq, we need to
4153 emit the bitwise op right before the store. If there
4154 are two loads and are emitted somewhere else, it would
4155 be better to emit the bitwise op as early as possible;
4156 we don't track where that would be possible right now
4157 though. */
4158 else
4159 gimple_seq_add_stmt_without_update (&seq, stmt);
4160 src = gimple_assign_lhs (stmt);
4161 tree xor_mask;
4162 enum tree_code inv_op;
4163 inv_op = invert_op (split_store, 2, int_type, xor_mask);
4164 if (inv_op != NOP_EXPR)
4165 {
4166 stmt = gimple_build_assign (make_ssa_name (int_type),
4167 inv_op, src, xor_mask);
4168 gimple_set_location (stmt, bit_loc);
4169 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4170 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4171 else
4172 gimple_seq_add_stmt_without_update (&seq, stmt);
4173 src = gimple_assign_lhs (stmt);
4174 }
4175 break;
4176 case LROTATE_EXPR:
4177 case NOP_EXPR:
4178 src = ops[0];
4179 if (!is_gimple_val (src))
4180 {
4181 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4182 src);
4183 gimple_seq_add_stmt_without_update (&seq, stmt);
4184 src = gimple_assign_lhs (stmt);
4185 }
4186 if (!useless_type_conversion_p (int_type, TREE_TYPE (src)))
4187 {
4188 stmt = gimple_build_assign (make_ssa_name (int_type),
4189 NOP_EXPR, src);
4190 gimple_seq_add_stmt_without_update (&seq, stmt);
4191 src = gimple_assign_lhs (stmt);
4192 }
4193 inv_op = invert_op (split_store, 2, int_type, xor_mask);
4194 if (inv_op != NOP_EXPR)
4195 {
4196 stmt = gimple_build_assign (make_ssa_name (int_type),
4197 inv_op, src, xor_mask);
4198 gimple_set_location (stmt, loc);
4199 gimple_seq_add_stmt_without_update (&seq, stmt);
4200 src = gimple_assign_lhs (stmt);
4201 }
4202 break;
4203 default:
4204 src = ops[0];
4205 break;
4206 }
4207
4208 /* If bit insertion is required, we use the source as an accumulator
4209 into which the successive bit-field values are manually inserted.
4210 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4211 if (group->bit_insertion)
4212 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4213 if (info->rhs_code == BIT_INSERT_EXPR
4214 && info->bitpos < try_bitpos + try_size
4215 && info->bitpos + info->bitsize > try_bitpos)
4216 {
4217 /* Mask, truncate, convert to final type, shift and ior into
4218 the accumulator. Note that every step can be a no-op. */
4219 const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4220 const HOST_WIDE_INT end_gap
4221 = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4222 tree tem = info->ops[0].val;
4223 if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4224 {
4225 tree bitfield_type
4226 = build_nonstandard_integer_type (info->bitsize,
4227 UNSIGNED);
4228 tem = gimple_convert (&seq, loc, bitfield_type, tem);
4229 }
4230 else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
4231 {
4232 const unsigned HOST_WIDE_INT imask
4233 = (HOST_WIDE_INT_1U << info->bitsize) - 1;
4234 tem = gimple_build (&seq, loc,
4235 BIT_AND_EXPR, TREE_TYPE (tem), tem,
4236 build_int_cst (TREE_TYPE (tem),
4237 imask));
4238 }
4239 const HOST_WIDE_INT shift
4240 = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4241 if (shift < 0)
4242 tem = gimple_build (&seq, loc,
4243 RSHIFT_EXPR, TREE_TYPE (tem), tem,
4244 build_int_cst (NULL_TREE, -shift));
4245 tem = gimple_convert (&seq, loc, int_type, tem);
4246 if (shift > 0)
4247 tem = gimple_build (&seq, loc,
4248 LSHIFT_EXPR, int_type, tem,
4249 build_int_cst (NULL_TREE, shift));
4250 src = gimple_build (&seq, loc,
4251 BIT_IOR_EXPR, int_type, tem, src);
4252 }
4253
4254 if (!integer_zerop (mask))
4255 {
4256 tree tem = make_ssa_name (int_type);
4257 tree load_src = unshare_expr (dest);
4258 /* The load might load some or all bits uninitialized,
4259 avoid -W*uninitialized warnings in that case.
4260 As optimization, it would be nice if all the bits are
4261 provably uninitialized (no stores at all yet or previous
4262 store a CLOBBER) we'd optimize away the load and replace
4263 it e.g. with 0. */
4264 TREE_NO_WARNING (load_src) = 1;
4265 stmt = gimple_build_assign (tem, load_src);
4266 gimple_set_location (stmt, loc);
4267 gimple_set_vuse (stmt, new_vuse);
4268 gimple_seq_add_stmt_without_update (&seq, stmt);
4269
4270 /* FIXME: If there is a single chunk of zero bits in mask,
4271 perhaps use BIT_INSERT_EXPR instead? */
4272 stmt = gimple_build_assign (make_ssa_name (int_type),
4273 BIT_AND_EXPR, tem, mask);
4274 gimple_set_location (stmt, loc);
4275 gimple_seq_add_stmt_without_update (&seq, stmt);
4276 tem = gimple_assign_lhs (stmt);
4277
4278 if (TREE_CODE (src) == INTEGER_CST)
4279 src = wide_int_to_tree (int_type,
4280 wi::bit_and_not (wi::to_wide (src),
4281 wi::to_wide (mask)));
4282 else
4283 {
4284 tree nmask
4285 = wide_int_to_tree (int_type,
4286 wi::bit_not (wi::to_wide (mask)));
4287 stmt = gimple_build_assign (make_ssa_name (int_type),
4288 BIT_AND_EXPR, src, nmask);
4289 gimple_set_location (stmt, loc);
4290 gimple_seq_add_stmt_without_update (&seq, stmt);
4291 src = gimple_assign_lhs (stmt);
4292 }
4293 stmt = gimple_build_assign (make_ssa_name (int_type),
4294 BIT_IOR_EXPR, tem, src);
4295 gimple_set_location (stmt, loc);
4296 gimple_seq_add_stmt_without_update (&seq, stmt);
4297 src = gimple_assign_lhs (stmt);
4298 }
4299 }
4300
4301 stmt = gimple_build_assign (dest, src);
4302 gimple_set_location (stmt, loc);
4303 gimple_set_vuse (stmt, new_vuse);
4304 gimple_seq_add_stmt_without_update (&seq, stmt);
4305
4306 if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4307 add_stmt_to_eh_lp (stmt, group->lp_nr);
4308
4309 tree new_vdef;
4310 if (i < split_stores.length () - 1)
4311 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
4312 else
4313 new_vdef = last_vdef;
4314
4315 gimple_set_vdef (stmt, new_vdef);
4316 SSA_NAME_DEF_STMT (new_vdef) = stmt;
4317 new_vuse = new_vdef;
4318 }
4319
4320 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4321 delete split_store;
4322
4323 gcc_assert (seq);
4324 if (dump_file)
4325 {
4326 fprintf (dump_file,
4327 "New sequence of %u stores to replace old one of %u stores\n",
4328 split_stores.length (), orig_num_stmts);
4329 if (dump_flags & TDF_DETAILS)
4330 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4331 }
4332
4333 if (gimple_clobber_p (group->last_stmt))
4334 update_stmt (group->last_stmt);
4335
4336 if (group->lp_nr > 0)
4337 {
4338 /* We're going to insert a sequence of (potentially) throwing stores
4339 into an active EH region. This means that we're going to create
4340 new basic blocks with EH edges pointing to the post landing pad
4341 and, therefore, to have to update its PHI nodes, if any. For the
4342 virtual PHI node, we're going to use the VDEFs created above, but
4343 for the other nodes, we need to record the original reaching defs. */
4344 eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4345 basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4346 basic_block last_bb = gimple_bb (group->last_stmt);
4347 edge last_edge = find_edge (last_bb, lp_bb);
4348 auto_vec<tree, 16> last_defs;
4349 gphi_iterator gpi;
4350 for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4351 {
4352 gphi *phi = gpi.phi ();
4353 tree last_def;
4354 if (virtual_operand_p (gimple_phi_result (phi)))
4355 last_def = NULL_TREE;
4356 else
4357 last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4358 last_defs.safe_push (last_def);
4359 }
4360
4361 /* Do the insertion. Then, if new basic blocks have been created in the
4362 process, rewind the chain of VDEFs create above to walk the new basic
4363 blocks and update the corresponding arguments of the PHI nodes. */
4364 update_modified_stmts (seq);
4365 if (gimple_find_sub_bbs (seq, &last_gsi))
4366 while (last_vdef != gimple_vuse (group->last_stmt))
4367 {
4368 gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4369 if (stmt_could_throw_p (cfun, stmt))
4370 {
4371 edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4372 unsigned int i;
4373 for (gpi = gsi_start_phis (lp_bb), i = 0;
4374 !gsi_end_p (gpi);
4375 gsi_next (&gpi), i++)
4376 {
4377 gphi *phi = gpi.phi ();
4378 tree new_def;
4379 if (virtual_operand_p (gimple_phi_result (phi)))
4380 new_def = last_vdef;
4381 else
4382 new_def = last_defs[i];
4383 add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4384 }
4385 }
4386 last_vdef = gimple_vuse (stmt);
4387 }
4388 }
4389 else
4390 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4391
4392 for (int j = 0; j < 2; ++j)
4393 if (load_seq[j])
4394 gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
4395
4396 return true;
4397 }
4398
4399 /* Process the merged_store_group objects created in the coalescing phase.
4400 The stores are all against the base object BASE.
4401 Try to output the widened stores and delete the original statements if
4402 successful. Return true iff any changes were made. */
4403
4404 bool
4405 imm_store_chain_info::output_merged_stores ()
4406 {
4407 unsigned int i;
4408 merged_store_group *merged_store;
4409 bool ret = false;
4410 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4411 {
4412 if (dbg_cnt (store_merging)
4413 && output_merged_store (merged_store))
4414 {
4415 unsigned int j;
4416 store_immediate_info *store;
4417 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4418 {
4419 gimple *stmt = store->stmt;
4420 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4421 /* Don't remove clobbers, they are still useful even if
4422 everything is overwritten afterwards. */
4423 if (gimple_clobber_p (stmt))
4424 continue;
4425 gsi_remove (&gsi, true);
4426 if (store->lp_nr)
4427 remove_stmt_from_eh_lp (stmt);
4428 if (stmt != merged_store->last_stmt)
4429 {
4430 unlink_stmt_vdef (stmt);
4431 release_defs (stmt);
4432 }
4433 }
4434 ret = true;
4435 }
4436 }
4437 if (ret && dump_file)
4438 fprintf (dump_file, "Merging successful!\n");
4439
4440 return ret;
4441 }
4442
4443 /* Coalesce the store_immediate_info objects recorded against the base object
4444 BASE in the first phase and output them.
4445 Delete the allocated structures.
4446 Return true if any changes were made. */
4447
4448 bool
4449 imm_store_chain_info::terminate_and_process_chain ()
4450 {
4451 /* Process store chain. */
4452 bool ret = false;
4453 if (m_store_info.length () > 1)
4454 {
4455 ret = coalesce_immediate_stores ();
4456 if (ret)
4457 ret = output_merged_stores ();
4458 }
4459
4460 /* Delete all the entries we allocated ourselves. */
4461 store_immediate_info *info;
4462 unsigned int i;
4463 FOR_EACH_VEC_ELT (m_store_info, i, info)
4464 delete info;
4465
4466 merged_store_group *merged_info;
4467 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4468 delete merged_info;
4469
4470 return ret;
4471 }
4472
4473 /* Return true iff LHS is a destination potentially interesting for
4474 store merging. In practice these are the codes that get_inner_reference
4475 can process. */
4476
4477 static bool
4478 lhs_valid_for_store_merging_p (tree lhs)
4479 {
4480 if (DECL_P (lhs))
4481 return true;
4482
4483 switch (TREE_CODE (lhs))
4484 {
4485 case ARRAY_REF:
4486 case ARRAY_RANGE_REF:
4487 case BIT_FIELD_REF:
4488 case COMPONENT_REF:
4489 case MEM_REF:
4490 return true;
4491 default:
4492 return false;
4493 }
4494
4495 gcc_unreachable ();
4496 }
4497
4498 /* Return true if the tree RHS is a constant we want to consider
4499 during store merging. In practice accept all codes that
4500 native_encode_expr accepts. */
4501
4502 static bool
4503 rhs_valid_for_store_merging_p (tree rhs)
4504 {
4505 unsigned HOST_WIDE_INT size;
4506 if (TREE_CODE (rhs) == CONSTRUCTOR
4507 && CONSTRUCTOR_NELTS (rhs) == 0
4508 && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4509 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4510 return true;
4511 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4512 && native_encode_expr (rhs, NULL, size) != 0);
4513 }
4514
4515 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4516 and return true on success or false on failure. */
4517
4518 static bool
4519 adjust_bit_pos (poly_offset_int byte_off,
4520 poly_int64 *pbitpos,
4521 poly_uint64 *pbitregion_start,
4522 poly_uint64 *pbitregion_end)
4523 {
4524 poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4525 bit_off += *pbitpos;
4526
4527 if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4528 {
4529 if (maybe_ne (*pbitregion_end, 0U))
4530 {
4531 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4532 bit_off += *pbitregion_start;
4533 if (bit_off.to_uhwi (pbitregion_start))
4534 {
4535 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4536 bit_off += *pbitregion_end;
4537 if (!bit_off.to_uhwi (pbitregion_end))
4538 *pbitregion_end = 0;
4539 }
4540 else
4541 *pbitregion_end = 0;
4542 }
4543 return true;
4544 }
4545 else
4546 return false;
4547 }
4548
4549 /* If MEM is a memory reference usable for store merging (either as
4550 store destination or for loads), return the non-NULL base_addr
4551 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4552 Otherwise return NULL, *PBITPOS should be still valid even for that
4553 case. */
4554
4555 static tree
4556 mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4557 poly_uint64 *pbitpos,
4558 poly_uint64 *pbitregion_start,
4559 poly_uint64 *pbitregion_end)
4560 {
4561 poly_int64 bitsize, bitpos;
4562 poly_uint64 bitregion_start = 0, bitregion_end = 0;
4563 machine_mode mode;
4564 int unsignedp = 0, reversep = 0, volatilep = 0;
4565 tree offset;
4566 tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4567 &unsignedp, &reversep, &volatilep);
4568 *pbitsize = bitsize;
4569 if (known_eq (bitsize, 0))
4570 return NULL_TREE;
4571
4572 if (TREE_CODE (mem) == COMPONENT_REF
4573 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4574 {
4575 get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
4576 if (maybe_ne (bitregion_end, 0U))
4577 bitregion_end += 1;
4578 }
4579
4580 if (reversep)
4581 return NULL_TREE;
4582
4583 /* We do not want to rewrite TARGET_MEM_REFs. */
4584 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4585 return NULL_TREE;
4586 /* In some cases get_inner_reference may return a
4587 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4588 canonicalize the base_addr to MEM_REF [ptr] and take
4589 byteoffset into account in the bitpos. This occurs in
4590 PR 23684 and this way we can catch more chains. */
4591 else if (TREE_CODE (base_addr) == MEM_REF)
4592 {
4593 if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4594 &bitregion_start, &bitregion_end))
4595 return NULL_TREE;
4596 base_addr = TREE_OPERAND (base_addr, 0);
4597 }
4598 /* get_inner_reference returns the base object, get at its
4599 address now. */
4600 else
4601 {
4602 if (maybe_lt (bitpos, 0))
4603 return NULL_TREE;
4604 base_addr = build_fold_addr_expr (base_addr);
4605 }
4606
4607 if (offset)
4608 {
4609 /* If the access is variable offset then a base decl has to be
4610 address-taken to be able to emit pointer-based stores to it.
4611 ??? We might be able to get away with re-using the original
4612 base up to the first variable part and then wrapping that inside
4613 a BIT_FIELD_REF. */
4614 tree base = get_base_address (base_addr);
4615 if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
4616 return NULL_TREE;
4617
4618 /* Similarly to above for the base, remove constant from the offset. */
4619 if (TREE_CODE (offset) == PLUS_EXPR
4620 && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
4621 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
4622 &bitpos, &bitregion_start, &bitregion_end))
4623 offset = TREE_OPERAND (offset, 0);
4624
4625 base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
4626 base_addr, offset);
4627 }
4628
4629 if (known_eq (bitregion_end, 0U))
4630 {
4631 bitregion_start = round_down_to_byte_boundary (bitpos);
4632 bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
4633 }
4634
4635 *pbitsize = bitsize;
4636 *pbitpos = bitpos;
4637 *pbitregion_start = bitregion_start;
4638 *pbitregion_end = bitregion_end;
4639 return base_addr;
4640 }
4641
4642 /* Return true if STMT is a load that can be used for store merging.
4643 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
4644 BITREGION_END are properties of the corresponding store. */
4645
4646 static bool
4647 handled_load (gimple *stmt, store_operand_info *op,
4648 poly_uint64 bitsize, poly_uint64 bitpos,
4649 poly_uint64 bitregion_start, poly_uint64 bitregion_end)
4650 {
4651 if (!is_gimple_assign (stmt))
4652 return false;
4653 if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
4654 {
4655 tree rhs1 = gimple_assign_rhs1 (stmt);
4656 if (TREE_CODE (rhs1) == SSA_NAME
4657 && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
4658 bitregion_start, bitregion_end))
4659 {
4660 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
4661 been optimized earlier, but if allowed here, would confuse the
4662 multiple uses counting. */
4663 if (op->bit_not_p)
4664 return false;
4665 op->bit_not_p = !op->bit_not_p;
4666 return true;
4667 }
4668 return false;
4669 }
4670 if (gimple_vuse (stmt)
4671 && gimple_assign_load_p (stmt)
4672 && !stmt_can_throw_internal (cfun, stmt)
4673 && !gimple_has_volatile_ops (stmt))
4674 {
4675 tree mem = gimple_assign_rhs1 (stmt);
4676 op->base_addr
4677 = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
4678 &op->bitregion_start,
4679 &op->bitregion_end);
4680 if (op->base_addr != NULL_TREE
4681 && known_eq (op->bitsize, bitsize)
4682 && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
4683 && known_ge (op->bitpos - op->bitregion_start,
4684 bitpos - bitregion_start)
4685 && known_ge (op->bitregion_end - op->bitpos,
4686 bitregion_end - bitpos))
4687 {
4688 op->stmt = stmt;
4689 op->val = mem;
4690 op->bit_not_p = false;
4691 return true;
4692 }
4693 }
4694 return false;
4695 }
4696
4697 /* Return the index number of the landing pad for STMT, if any. */
4698
4699 static int
4700 lp_nr_for_store (gimple *stmt)
4701 {
4702 if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
4703 return 0;
4704
4705 if (!stmt_could_throw_p (cfun, stmt))
4706 return 0;
4707
4708 return lookup_stmt_eh_lp (stmt);
4709 }
4710
4711 /* Record the store STMT for store merging optimization if it can be
4712 optimized. Return true if any changes were made. */
4713
4714 bool
4715 pass_store_merging::process_store (gimple *stmt)
4716 {
4717 tree lhs = gimple_assign_lhs (stmt);
4718 tree rhs = gimple_assign_rhs1 (stmt);
4719 poly_uint64 bitsize, bitpos;
4720 poly_uint64 bitregion_start, bitregion_end;
4721 tree base_addr
4722 = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
4723 &bitregion_start, &bitregion_end);
4724 if (known_eq (bitsize, 0U))
4725 return false;
4726
4727 bool invalid = (base_addr == NULL_TREE
4728 || (maybe_gt (bitsize,
4729 (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
4730 && TREE_CODE (rhs) != INTEGER_CST
4731 && (TREE_CODE (rhs) != CONSTRUCTOR
4732 || CONSTRUCTOR_NELTS (rhs) != 0)));
4733 enum tree_code rhs_code = ERROR_MARK;
4734 bool bit_not_p = false;
4735 struct symbolic_number n;
4736 gimple *ins_stmt = NULL;
4737 store_operand_info ops[2];
4738 if (invalid)
4739 ;
4740 else if (rhs_valid_for_store_merging_p (rhs))
4741 {
4742 rhs_code = INTEGER_CST;
4743 ops[0].val = rhs;
4744 }
4745 else if (TREE_CODE (rhs) != SSA_NAME)
4746 invalid = true;
4747 else
4748 {
4749 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
4750 if (!is_gimple_assign (def_stmt))
4751 invalid = true;
4752 else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
4753 bitregion_start, bitregion_end))
4754 rhs_code = MEM_REF;
4755 else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
4756 {
4757 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4758 if (TREE_CODE (rhs1) == SSA_NAME
4759 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
4760 {
4761 bit_not_p = true;
4762 def_stmt = SSA_NAME_DEF_STMT (rhs1);
4763 }
4764 }
4765
4766 if (rhs_code == ERROR_MARK && !invalid)
4767 switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
4768 {
4769 case BIT_AND_EXPR:
4770 case BIT_IOR_EXPR:
4771 case BIT_XOR_EXPR:
4772 tree rhs1, rhs2;
4773 rhs1 = gimple_assign_rhs1 (def_stmt);
4774 rhs2 = gimple_assign_rhs2 (def_stmt);
4775 invalid = true;
4776 if (TREE_CODE (rhs1) != SSA_NAME)
4777 break;
4778 def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
4779 if (!is_gimple_assign (def_stmt1)
4780 || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
4781 bitregion_start, bitregion_end))
4782 break;
4783 if (rhs_valid_for_store_merging_p (rhs2))
4784 ops[1].val = rhs2;
4785 else if (TREE_CODE (rhs2) != SSA_NAME)
4786 break;
4787 else
4788 {
4789 def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
4790 if (!is_gimple_assign (def_stmt2))
4791 break;
4792 else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
4793 bitregion_start, bitregion_end))
4794 break;
4795 }
4796 invalid = false;
4797 break;
4798 default:
4799 invalid = true;
4800 break;
4801 }
4802
4803 unsigned HOST_WIDE_INT const_bitsize;
4804 if (bitsize.is_constant (&const_bitsize)
4805 && (const_bitsize % BITS_PER_UNIT) == 0
4806 && const_bitsize <= 64
4807 && multiple_p (bitpos, BITS_PER_UNIT))
4808 {
4809 ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
4810 if (ins_stmt)
4811 {
4812 uint64_t nn = n.n;
4813 for (unsigned HOST_WIDE_INT i = 0;
4814 i < const_bitsize;
4815 i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
4816 if ((nn & MARKER_MASK) == 0
4817 || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
4818 {
4819 ins_stmt = NULL;
4820 break;
4821 }
4822 if (ins_stmt)
4823 {
4824 if (invalid)
4825 {
4826 rhs_code = LROTATE_EXPR;
4827 ops[0].base_addr = NULL_TREE;
4828 ops[1].base_addr = NULL_TREE;
4829 }
4830 invalid = false;
4831 }
4832 }
4833 }
4834
4835 if (invalid
4836 && bitsize.is_constant (&const_bitsize)
4837 && ((const_bitsize % BITS_PER_UNIT) != 0
4838 || !multiple_p (bitpos, BITS_PER_UNIT))
4839 && const_bitsize <= 64)
4840 {
4841 /* Bypass a conversion to the bit-field type. */
4842 if (!bit_not_p
4843 && is_gimple_assign (def_stmt)
4844 && CONVERT_EXPR_CODE_P (rhs_code))
4845 {
4846 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4847 if (TREE_CODE (rhs1) == SSA_NAME
4848 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
4849 rhs = rhs1;
4850 }
4851 rhs_code = BIT_INSERT_EXPR;
4852 bit_not_p = false;
4853 ops[0].val = rhs;
4854 ops[0].base_addr = NULL_TREE;
4855 ops[1].base_addr = NULL_TREE;
4856 invalid = false;
4857 }
4858 }
4859
4860 unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
4861 unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
4862 if (invalid
4863 || !bitsize.is_constant (&const_bitsize)
4864 || !bitpos.is_constant (&const_bitpos)
4865 || !bitregion_start.is_constant (&const_bitregion_start)
4866 || !bitregion_end.is_constant (&const_bitregion_end))
4867 return terminate_all_aliasing_chains (NULL, stmt);
4868
4869 if (!ins_stmt)
4870 memset (&n, 0, sizeof (n));
4871
4872 class imm_store_chain_info **chain_info = NULL;
4873 bool ret = false;
4874 if (base_addr)
4875 chain_info = m_stores.get (base_addr);
4876
4877 store_immediate_info *info;
4878 if (chain_info)
4879 {
4880 unsigned int ord = (*chain_info)->m_store_info.length ();
4881 info = new store_immediate_info (const_bitsize, const_bitpos,
4882 const_bitregion_start,
4883 const_bitregion_end,
4884 stmt, ord, rhs_code, n, ins_stmt,
4885 bit_not_p, lp_nr_for_store (stmt),
4886 ops[0], ops[1]);
4887 if (dump_file && (dump_flags & TDF_DETAILS))
4888 {
4889 fprintf (dump_file, "Recording immediate store from stmt:\n");
4890 print_gimple_stmt (dump_file, stmt, 0);
4891 }
4892 (*chain_info)->m_store_info.safe_push (info);
4893 ret |= terminate_all_aliasing_chains (chain_info, stmt);
4894 /* If we reach the limit of stores to merge in a chain terminate and
4895 process the chain now. */
4896 if ((*chain_info)->m_store_info.length ()
4897 == (unsigned int) param_max_stores_to_merge)
4898 {
4899 if (dump_file && (dump_flags & TDF_DETAILS))
4900 fprintf (dump_file,
4901 "Reached maximum number of statements to merge:\n");
4902 ret |= terminate_and_process_chain (*chain_info);
4903 }
4904 return ret;
4905 }
4906
4907 /* Store aliases any existing chain? */
4908 ret |= terminate_all_aliasing_chains (NULL, stmt);
4909 /* Start a new chain. */
4910 class imm_store_chain_info *new_chain
4911 = new imm_store_chain_info (m_stores_head, base_addr);
4912 info = new store_immediate_info (const_bitsize, const_bitpos,
4913 const_bitregion_start,
4914 const_bitregion_end,
4915 stmt, 0, rhs_code, n, ins_stmt,
4916 bit_not_p, lp_nr_for_store (stmt),
4917 ops[0], ops[1]);
4918 new_chain->m_store_info.safe_push (info);
4919 m_stores.put (base_addr, new_chain);
4920 if (dump_file && (dump_flags & TDF_DETAILS))
4921 {
4922 fprintf (dump_file, "Starting new chain with statement:\n");
4923 print_gimple_stmt (dump_file, stmt, 0);
4924 fprintf (dump_file, "The base object is:\n");
4925 print_generic_expr (dump_file, base_addr);
4926 fprintf (dump_file, "\n");
4927 }
4928 return ret;
4929 }
4930
4931 /* Return true if STMT is a store valid for store merging. */
4932
4933 static bool
4934 store_valid_for_store_merging_p (gimple *stmt)
4935 {
4936 return gimple_assign_single_p (stmt)
4937 && gimple_vdef (stmt)
4938 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
4939 && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
4940 }
4941
4942 enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
4943
4944 /* Return the status of basic block BB wrt store merging. */
4945
4946 static enum basic_block_status
4947 get_status_for_store_merging (basic_block bb)
4948 {
4949 unsigned int num_statements = 0;
4950 gimple_stmt_iterator gsi;
4951 edge e;
4952
4953 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4954 {
4955 gimple *stmt = gsi_stmt (gsi);
4956
4957 if (is_gimple_debug (stmt))
4958 continue;
4959
4960 if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
4961 break;
4962 }
4963
4964 if (num_statements == 0)
4965 return BB_INVALID;
4966
4967 if (cfun->can_throw_non_call_exceptions && cfun->eh
4968 && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb)))
4969 && (e = find_fallthru_edge (bb->succs))
4970 && e->dest == bb->next_bb)
4971 return BB_EXTENDED_VALID;
4972
4973 return num_statements >= 2 ? BB_VALID : BB_INVALID;
4974 }
4975
4976 /* Entry point for the pass. Go over each basic block recording chains of
4977 immediate stores. Upon encountering a terminating statement (as defined
4978 by stmt_terminates_chain_p) process the recorded stores and emit the widened
4979 variants. */
4980
4981 unsigned int
4982 pass_store_merging::execute (function *fun)
4983 {
4984 basic_block bb;
4985 hash_set<gimple *> orig_stmts;
4986 bool changed = false, open_chains = false;
4987
4988 /* If the function can throw and catch non-call exceptions, we'll be trying
4989 to merge stores across different basic blocks so we need to first unsplit
4990 the EH edges in order to streamline the CFG of the function. */
4991 if (cfun->can_throw_non_call_exceptions && cfun->eh)
4992 unsplit_eh_edges ();
4993
4994 calculate_dominance_info (CDI_DOMINATORS);
4995
4996 FOR_EACH_BB_FN (bb, fun)
4997 {
4998 const basic_block_status bb_status = get_status_for_store_merging (bb);
4999 gimple_stmt_iterator gsi;
5000
5001 if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5002 {
5003 changed |= terminate_and_process_all_chains ();
5004 open_chains = false;
5005 }
5006
5007 if (bb_status == BB_INVALID)
5008 continue;
5009
5010 if (dump_file && (dump_flags & TDF_DETAILS))
5011 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5012
5013 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5014 {
5015 gimple *stmt = gsi_stmt (gsi);
5016
5017 if (is_gimple_debug (stmt))
5018 continue;
5019
5020 if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
5021 {
5022 /* Terminate all chains. */
5023 if (dump_file && (dump_flags & TDF_DETAILS))
5024 fprintf (dump_file, "Volatile access terminates "
5025 "all chains\n");
5026 changed |= terminate_and_process_all_chains ();
5027 open_chains = false;
5028 continue;
5029 }
5030
5031 if (store_valid_for_store_merging_p (stmt))
5032 changed |= process_store (stmt);
5033 else
5034 changed |= terminate_all_aliasing_chains (NULL, stmt);
5035 }
5036
5037 if (bb_status == BB_EXTENDED_VALID)
5038 open_chains = true;
5039 else
5040 {
5041 changed |= terminate_and_process_all_chains ();
5042 open_chains = false;
5043 }
5044 }
5045
5046 if (open_chains)
5047 changed |= terminate_and_process_all_chains ();
5048
5049 /* If the function can throw and catch non-call exceptions and something
5050 changed during the pass, then the CFG has (very likely) changed too. */
5051 if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5052 {
5053 free_dominance_info (CDI_DOMINATORS);
5054 return TODO_cleanup_cfg;
5055 }
5056
5057 return 0;
5058 }
5059
5060 } // anon namespace
5061
5062 /* Construct and return a store merging pass object. */
5063
5064 gimple_opt_pass *
5065 make_pass_store_merging (gcc::context *ctxt)
5066 {
5067 return new pass_store_merging (ctxt);
5068 }
5069
5070 #if CHECKING_P
5071
5072 namespace selftest {
5073
5074 /* Selftests for store merging helpers. */
5075
5076 /* Assert that all elements of the byte arrays X and Y, both of length N
5077 are equal. */
5078
5079 static void
5080 verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5081 {
5082 for (unsigned int i = 0; i < n; i++)
5083 {
5084 if (x[i] != y[i])
5085 {
5086 fprintf (stderr, "Arrays do not match. X:\n");
5087 dump_char_array (stderr, x, n);
5088 fprintf (stderr, "Y:\n");
5089 dump_char_array (stderr, y, n);
5090 }
5091 ASSERT_EQ (x[i], y[i]);
5092 }
5093 }
5094
5095 /* Test shift_bytes_in_array and that it carries bits across between
5096 bytes correctly. */
5097
5098 static void
5099 verify_shift_bytes_in_array (void)
5100 {
5101 /* byte 1 | byte 0
5102 00011111 | 11100000. */
5103 unsigned char orig[2] = { 0xe0, 0x1f };
5104 unsigned char in[2];
5105 memcpy (in, orig, sizeof orig);
5106
5107 unsigned char expected[2] = { 0x80, 0x7f };
5108 shift_bytes_in_array (in, sizeof (in), 2);
5109 verify_array_eq (in, expected, sizeof (in));
5110
5111 memcpy (in, orig, sizeof orig);
5112 memcpy (expected, orig, sizeof orig);
5113 /* Check that shifting by zero doesn't change anything. */
5114 shift_bytes_in_array (in, sizeof (in), 0);
5115 verify_array_eq (in, expected, sizeof (in));
5116
5117 }
5118
5119 /* Test shift_bytes_in_array_right and that it carries bits across between
5120 bytes correctly. */
5121
5122 static void
5123 verify_shift_bytes_in_array_right (void)
5124 {
5125 /* byte 1 | byte 0
5126 00011111 | 11100000. */
5127 unsigned char orig[2] = { 0x1f, 0xe0};
5128 unsigned char in[2];
5129 memcpy (in, orig, sizeof orig);
5130 unsigned char expected[2] = { 0x07, 0xf8};
5131 shift_bytes_in_array_right (in, sizeof (in), 2);
5132 verify_array_eq (in, expected, sizeof (in));
5133
5134 memcpy (in, orig, sizeof orig);
5135 memcpy (expected, orig, sizeof orig);
5136 /* Check that shifting by zero doesn't change anything. */
5137 shift_bytes_in_array_right (in, sizeof (in), 0);
5138 verify_array_eq (in, expected, sizeof (in));
5139 }
5140
5141 /* Test clear_bit_region that it clears exactly the bits asked and
5142 nothing more. */
5143
5144 static void
5145 verify_clear_bit_region (void)
5146 {
5147 /* Start with all bits set and test clearing various patterns in them. */
5148 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5149 unsigned char in[3];
5150 unsigned char expected[3];
5151 memcpy (in, orig, sizeof in);
5152
5153 /* Check zeroing out all the bits. */
5154 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5155 expected[0] = expected[1] = expected[2] = 0;
5156 verify_array_eq (in, expected, sizeof in);
5157
5158 memcpy (in, orig, sizeof in);
5159 /* Leave the first and last bits intact. */
5160 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5161 expected[0] = 0x1;
5162 expected[1] = 0;
5163 expected[2] = 0x80;
5164 verify_array_eq (in, expected, sizeof in);
5165 }
5166
5167 /* Test clear_bit_region_be that it clears exactly the bits asked and
5168 nothing more. */
5169
5170 static void
5171 verify_clear_bit_region_be (void)
5172 {
5173 /* Start with all bits set and test clearing various patterns in them. */
5174 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5175 unsigned char in[3];
5176 unsigned char expected[3];
5177 memcpy (in, orig, sizeof in);
5178
5179 /* Check zeroing out all the bits. */
5180 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5181 expected[0] = expected[1] = expected[2] = 0;
5182 verify_array_eq (in, expected, sizeof in);
5183
5184 memcpy (in, orig, sizeof in);
5185 /* Leave the first and last bits intact. */
5186 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5187 expected[0] = 0x80;
5188 expected[1] = 0;
5189 expected[2] = 0x1;
5190 verify_array_eq (in, expected, sizeof in);
5191 }
5192
5193
5194 /* Run all of the selftests within this file. */
5195
5196 void
5197 store_merging_c_tests (void)
5198 {
5199 verify_shift_bytes_in_array ();
5200 verify_shift_bytes_in_array_right ();
5201 verify_clear_bit_region ();
5202 verify_clear_bit_region_be ();
5203 }
5204
5205 } // namespace selftest
5206 #endif /* CHECKING_P. */