tree-eh.h (unsplit_eh_edges): Declare.
[gcc.git] / gcc / gimple-ssa-store-merging.c
1 /* GIMPLE store merging and byte swapping passes.
2 Copyright (C) 2009-2019 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
24
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
27 [p ] := imm1;
28 [p + 1B] := imm2;
29 [p + 2B] := imm3;
30 [p + 3B] := imm4;
31 we can transform this into a single 4-byte store if the target supports it:
32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
33
34 Or:
35 [p ] := [q ];
36 [p + 1B] := [q + 1B];
37 [p + 2B] := [q + 2B];
38 [p + 3B] := [q + 3B];
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
41
42 Or:
43 [p ] := [q ] ^ imm1;
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49
50 Or:
51 [p:1 ] := imm;
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55
56 The algorithm is applied to each basic block in three phases:
57
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
64 terminate such chains when appropriate (for example when when the stored
65 values get used subsequently).
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
74
75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76 store_immediate_info objects) and coalesce contiguous stores into
77 merged_store_group objects. For bit-field stores, we don't need to
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
83
84 For example, given the stores:
85 [p ] := 0;
86 [p + 1B] := 1;
87 [p + 3B] := 0;
88 [p + 4B] := 1;
89 [p + 5B] := 0;
90 [p + 6B] := 0;
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
94
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
103
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106 [p ] := 0x1234;
107 [p + 2B] := 0x5678;
108 [p + 4B] := 0xab;
109 [p + 5B] := 0xcd;
110
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
112 p |LE|BE|
113 ---------
114 0 |34|12|
115 1 |12|34|
116 2 |78|56|
117 3 |56|78|
118 4 |ab|ab|
119 5 |cd|cd|
120
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
125
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
129
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
136
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
140
141 #include "config.h"
142 #include "system.h"
143 #include "coretypes.h"
144 #include "backend.h"
145 #include "tree.h"
146 #include "gimple.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
150 #include "ssa.h"
151 #include "gimple-pretty-print.h"
152 #include "alias.h"
153 #include "fold-const.h"
154 #include "params.h"
155 #include "print-tree.h"
156 #include "tree-hash-traits.h"
157 #include "gimple-iterator.h"
158 #include "gimplify.h"
159 #include "gimple-fold.h"
160 #include "stor-layout.h"
161 #include "timevar.h"
162 #include "cfganal.h"
163 #include "cfgcleanup.h"
164 #include "tree-cfg.h"
165 #include "except.h"
166 #include "tree-eh.h"
167 #include "target.h"
168 #include "gimplify-me.h"
169 #include "rtl.h"
170 #include "expr.h" /* For get_bit_range. */
171 #include "optabs-tree.h"
172 #include "dbgcnt.h"
173 #include "selftest.h"
174
175 /* The maximum size (in bits) of the stores this pass should generate. */
176 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
177 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
178
179 /* Limit to bound the number of aliasing checks for loads with the same
180 vuse as the corresponding store. */
181 #define MAX_STORE_ALIAS_CHECKS 64
182
183 namespace {
184
185 struct bswap_stat
186 {
187 /* Number of hand-written 16-bit nop / bswaps found. */
188 int found_16bit;
189
190 /* Number of hand-written 32-bit nop / bswaps found. */
191 int found_32bit;
192
193 /* Number of hand-written 64-bit nop / bswaps found. */
194 int found_64bit;
195 } nop_stats, bswap_stats;
196
197 /* A symbolic number structure is used to detect byte permutation and selection
198 patterns of a source. To achieve that, its field N contains an artificial
199 number consisting of BITS_PER_MARKER sized markers tracking where does each
200 byte come from in the source:
201
202 0 - target byte has the value 0
203 FF - target byte has an unknown value (eg. due to sign extension)
204 1..size - marker value is the byte index in the source (0 for lsb).
205
206 To detect permutations on memory sources (arrays and structures), a symbolic
207 number is also associated:
208 - a base address BASE_ADDR and an OFFSET giving the address of the source;
209 - a range which gives the difference between the highest and lowest accessed
210 memory location to make such a symbolic number;
211 - the address SRC of the source element of lowest address as a convenience
212 to easily get BASE_ADDR + offset + lowest bytepos;
213 - number of expressions N_OPS bitwise ored together to represent
214 approximate cost of the computation.
215
216 Note 1: the range is different from size as size reflects the size of the
217 type of the current expression. For instance, for an array char a[],
218 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
219 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
220 time a range of 1.
221
222 Note 2: for non-memory sources, range holds the same value as size.
223
224 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
225
226 struct symbolic_number {
227 uint64_t n;
228 tree type;
229 tree base_addr;
230 tree offset;
231 poly_int64_pod bytepos;
232 tree src;
233 tree alias_set;
234 tree vuse;
235 unsigned HOST_WIDE_INT range;
236 int n_ops;
237 };
238
239 #define BITS_PER_MARKER 8
240 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
241 #define MARKER_BYTE_UNKNOWN MARKER_MASK
242 #define HEAD_MARKER(n, size) \
243 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
244
245 /* The number which the find_bswap_or_nop_1 result should match in
246 order to have a nop. The number is masked according to the size of
247 the symbolic number before using it. */
248 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
249 (uint64_t)0x08070605 << 32 | 0x04030201)
250
251 /* The number which the find_bswap_or_nop_1 result should match in
252 order to have a byte swap. The number is masked according to the
253 size of the symbolic number before using it. */
254 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
255 (uint64_t)0x01020304 << 32 | 0x05060708)
256
257 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
258 number N. Return false if the requested operation is not permitted
259 on a symbolic number. */
260
261 inline bool
262 do_shift_rotate (enum tree_code code,
263 struct symbolic_number *n,
264 int count)
265 {
266 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
267 unsigned head_marker;
268
269 if (count < 0
270 || count >= TYPE_PRECISION (n->type)
271 || count % BITS_PER_UNIT != 0)
272 return false;
273 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
274
275 /* Zero out the extra bits of N in order to avoid them being shifted
276 into the significant bits. */
277 if (size < 64 / BITS_PER_MARKER)
278 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
279
280 switch (code)
281 {
282 case LSHIFT_EXPR:
283 n->n <<= count;
284 break;
285 case RSHIFT_EXPR:
286 head_marker = HEAD_MARKER (n->n, size);
287 n->n >>= count;
288 /* Arithmetic shift of signed type: result is dependent on the value. */
289 if (!TYPE_UNSIGNED (n->type) && head_marker)
290 for (i = 0; i < count / BITS_PER_MARKER; i++)
291 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
292 << ((size - 1 - i) * BITS_PER_MARKER);
293 break;
294 case LROTATE_EXPR:
295 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
296 break;
297 case RROTATE_EXPR:
298 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
299 break;
300 default:
301 return false;
302 }
303 /* Zero unused bits for size. */
304 if (size < 64 / BITS_PER_MARKER)
305 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
306 return true;
307 }
308
309 /* Perform sanity checking for the symbolic number N and the gimple
310 statement STMT. */
311
312 inline bool
313 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
314 {
315 tree lhs_type;
316
317 lhs_type = gimple_expr_type (stmt);
318
319 if (TREE_CODE (lhs_type) != INTEGER_TYPE)
320 return false;
321
322 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323 return false;
324
325 return true;
326 }
327
328 /* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
330
331 bool
332 init_symbolic_number (struct symbolic_number *n, tree src)
333 {
334 int size;
335
336 if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
337 return false;
338
339 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340 n->src = src;
341
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n->type = TREE_TYPE (src);
346 size = TYPE_PRECISION (n->type);
347 if (size % BITS_PER_UNIT != 0)
348 return false;
349 size /= BITS_PER_UNIT;
350 if (size > 64 / BITS_PER_MARKER)
351 return false;
352 n->range = size;
353 n->n = CMPNOP;
354 n->n_ops = 1;
355
356 if (size < 64 / BITS_PER_MARKER)
357 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
358
359 return true;
360 }
361
362 /* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
365
366 bool
367 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
368 {
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
371 poly_int64 bitsize, bitpos, bytepos;
372 machine_mode mode;
373 int unsignedp, reversep, volatilep;
374 tree offset, base_addr;
375
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378 return false;
379
380 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381 return false;
382
383 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384 &unsignedp, &reversep, &volatilep);
385
386 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387 /* Do not rewrite TARGET_MEM_REF. */
388 return false;
389 else if (TREE_CODE (base_addr) == MEM_REF)
390 {
391 poly_offset_int bit_offset = 0;
392 tree off = TREE_OPERAND (base_addr, 1);
393
394 if (!integer_zerop (off))
395 {
396 poly_offset_int boff = mem_ref_offset (base_addr);
397 boff <<= LOG2_BITS_PER_UNIT;
398 bit_offset += boff;
399 }
400
401 base_addr = TREE_OPERAND (base_addr, 0);
402
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
404 if (maybe_lt (bit_offset, 0))
405 {
406 tree byte_offset = wide_int_to_tree
407 (sizetype, bits_to_bytes_round_down (bit_offset));
408 bit_offset = num_trailing_bits (bit_offset);
409 if (offset)
410 offset = size_binop (PLUS_EXPR, offset, byte_offset);
411 else
412 offset = byte_offset;
413 }
414
415 bitpos += bit_offset.force_shwi ();
416 }
417 else
418 base_addr = build_fold_addr_expr (base_addr);
419
420 if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
421 return false;
422 if (!multiple_p (bitsize, BITS_PER_UNIT))
423 return false;
424 if (reversep)
425 return false;
426
427 if (!init_symbolic_number (n, ref))
428 return false;
429 n->base_addr = base_addr;
430 n->offset = offset;
431 n->bytepos = bytepos;
432 n->alias_set = reference_alias_ptr_type (ref);
433 n->vuse = gimple_vuse (stmt);
434 return true;
435 }
436
437 /* Compute the symbolic number N representing the result of a bitwise OR on 2
438 symbolic number N1 and N2 whose source statements are respectively
439 SOURCE_STMT1 and SOURCE_STMT2. */
440
441 gimple *
442 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443 gimple *source_stmt2, struct symbolic_number *n2,
444 struct symbolic_number *n)
445 {
446 int i, size;
447 uint64_t mask;
448 gimple *source_stmt;
449 struct symbolic_number *n_start;
450
451 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452 if (TREE_CODE (rhs1) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454 rhs1 = TREE_OPERAND (rhs1, 0);
455 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456 if (TREE_CODE (rhs2) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458 rhs2 = TREE_OPERAND (rhs2, 0);
459
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
462 if (rhs1 != rhs2)
463 {
464 uint64_t inc;
465 HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
466 struct symbolic_number *toinc_n_ptr, *n_end;
467 basic_block bb1, bb2;
468
469 if (!n1->base_addr || !n2->base_addr
470 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471 return NULL;
472
473 if (!n1->offset != !n2->offset
474 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475 return NULL;
476
477 start1 = 0;
478 if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479 return NULL;
480
481 if (start1 < start2)
482 {
483 n_start = n1;
484 start_sub = start2 - start1;
485 }
486 else
487 {
488 n_start = n2;
489 start_sub = start1 - start2;
490 }
491
492 bb1 = gimple_bb (source_stmt1);
493 bb2 = gimple_bb (source_stmt2);
494 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495 source_stmt = source_stmt1;
496 else
497 source_stmt = source_stmt2;
498
499 /* Find the highest address at which a load is performed and
500 compute related info. */
501 end1 = start1 + (n1->range - 1);
502 end2 = start2 + (n2->range - 1);
503 if (end1 < end2)
504 {
505 end = end2;
506 end_sub = end2 - end1;
507 }
508 else
509 {
510 end = end1;
511 end_sub = end1 - end2;
512 }
513 n_end = (end2 > end1) ? n2 : n1;
514
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN)
517 toinc_n_ptr = (n_end == n1) ? n2 : n1;
518 else
519 toinc_n_ptr = (n_start == n1) ? n2 : n1;
520
521 n->range = end - MIN (start1, start2) + 1;
522
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n->range > 64 / BITS_PER_MARKER)
526 return NULL;
527
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
533 {
534 unsigned marker
535 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536 if (marker && marker != MARKER_BYTE_UNKNOWN)
537 toinc_n_ptr->n += inc;
538 }
539 }
540 else
541 {
542 n->range = n1->range;
543 n_start = n1;
544 source_stmt = source_stmt1;
545 }
546
547 if (!n1->alias_set
548 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549 n->alias_set = n1->alias_set;
550 else
551 n->alias_set = ptr_type_node;
552 n->vuse = n_start->vuse;
553 n->base_addr = n_start->base_addr;
554 n->offset = n_start->offset;
555 n->src = n_start->src;
556 n->bytepos = n_start->bytepos;
557 n->type = n_start->type;
558 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
559
560 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
561 {
562 uint64_t masked1, masked2;
563
564 masked1 = n1->n & mask;
565 masked2 = n2->n & mask;
566 if (masked1 && masked2 && masked1 != masked2)
567 return NULL;
568 }
569 n->n = n1->n | n2->n;
570 n->n_ops = n1->n_ops + n2->n_ops;
571
572 return source_stmt;
573 }
574
575 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
576 the operation given by the rhs of STMT on the result. If the operation
577 could successfully be executed the function returns a gimple stmt whose
578 rhs's first tree is the expression of the source operand and NULL
579 otherwise. */
580
581 gimple *
582 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
583 {
584 enum tree_code code;
585 tree rhs1, rhs2 = NULL;
586 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
587 enum gimple_rhs_class rhs_class;
588
589 if (!limit || !is_gimple_assign (stmt))
590 return NULL;
591
592 rhs1 = gimple_assign_rhs1 (stmt);
593
594 if (find_bswap_or_nop_load (stmt, rhs1, n))
595 return stmt;
596
597 /* Handle BIT_FIELD_REF. */
598 if (TREE_CODE (rhs1) == BIT_FIELD_REF
599 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
600 {
601 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
602 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
603 if (bitpos % BITS_PER_UNIT == 0
604 && bitsize % BITS_PER_UNIT == 0
605 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
606 {
607 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
608 if (BYTES_BIG_ENDIAN)
609 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
610
611 /* Shift. */
612 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
613 return NULL;
614
615 /* Mask. */
616 uint64_t mask = 0;
617 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
618 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
619 i++, tmp <<= BITS_PER_UNIT)
620 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
621 n->n &= mask;
622
623 /* Convert. */
624 n->type = TREE_TYPE (rhs1);
625 if (!n->base_addr)
626 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
627
628 return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
629 }
630
631 return NULL;
632 }
633
634 if (TREE_CODE (rhs1) != SSA_NAME)
635 return NULL;
636
637 code = gimple_assign_rhs_code (stmt);
638 rhs_class = gimple_assign_rhs_class (stmt);
639 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
640
641 if (rhs_class == GIMPLE_BINARY_RHS)
642 rhs2 = gimple_assign_rhs2 (stmt);
643
644 /* Handle unary rhs and binary rhs with integer constants as second
645 operand. */
646
647 if (rhs_class == GIMPLE_UNARY_RHS
648 || (rhs_class == GIMPLE_BINARY_RHS
649 && TREE_CODE (rhs2) == INTEGER_CST))
650 {
651 if (code != BIT_AND_EXPR
652 && code != LSHIFT_EXPR
653 && code != RSHIFT_EXPR
654 && code != LROTATE_EXPR
655 && code != RROTATE_EXPR
656 && !CONVERT_EXPR_CODE_P (code))
657 return NULL;
658
659 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
660
661 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
662 we have to initialize the symbolic number. */
663 if (!source_stmt1)
664 {
665 if (gimple_assign_load_p (stmt)
666 || !init_symbolic_number (n, rhs1))
667 return NULL;
668 source_stmt1 = stmt;
669 }
670
671 switch (code)
672 {
673 case BIT_AND_EXPR:
674 {
675 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
676 uint64_t val = int_cst_value (rhs2), mask = 0;
677 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
678
679 /* Only constants masking full bytes are allowed. */
680 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
681 if ((val & tmp) != 0 && (val & tmp) != tmp)
682 return NULL;
683 else if (val & tmp)
684 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
685
686 n->n &= mask;
687 }
688 break;
689 case LSHIFT_EXPR:
690 case RSHIFT_EXPR:
691 case LROTATE_EXPR:
692 case RROTATE_EXPR:
693 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
694 return NULL;
695 break;
696 CASE_CONVERT:
697 {
698 int i, type_size, old_type_size;
699 tree type;
700
701 type = gimple_expr_type (stmt);
702 type_size = TYPE_PRECISION (type);
703 if (type_size % BITS_PER_UNIT != 0)
704 return NULL;
705 type_size /= BITS_PER_UNIT;
706 if (type_size > 64 / BITS_PER_MARKER)
707 return NULL;
708
709 /* Sign extension: result is dependent on the value. */
710 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
711 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
712 && HEAD_MARKER (n->n, old_type_size))
713 for (i = 0; i < type_size - old_type_size; i++)
714 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
715 << ((type_size - 1 - i) * BITS_PER_MARKER);
716
717 if (type_size < 64 / BITS_PER_MARKER)
718 {
719 /* If STMT casts to a smaller type mask out the bits not
720 belonging to the target type. */
721 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
722 }
723 n->type = type;
724 if (!n->base_addr)
725 n->range = type_size;
726 }
727 break;
728 default:
729 return NULL;
730 };
731 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
732 }
733
734 /* Handle binary rhs. */
735
736 if (rhs_class == GIMPLE_BINARY_RHS)
737 {
738 struct symbolic_number n1, n2;
739 gimple *source_stmt, *source_stmt2;
740
741 if (code != BIT_IOR_EXPR)
742 return NULL;
743
744 if (TREE_CODE (rhs2) != SSA_NAME)
745 return NULL;
746
747 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
748
749 switch (code)
750 {
751 case BIT_IOR_EXPR:
752 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
753
754 if (!source_stmt1)
755 return NULL;
756
757 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
758
759 if (!source_stmt2)
760 return NULL;
761
762 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
763 return NULL;
764
765 if (n1.vuse != n2.vuse)
766 return NULL;
767
768 source_stmt
769 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
770
771 if (!source_stmt)
772 return NULL;
773
774 if (!verify_symbolic_number_p (n, stmt))
775 return NULL;
776
777 break;
778 default:
779 return NULL;
780 }
781 return source_stmt;
782 }
783 return NULL;
784 }
785
786 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
787 *CMPXCHG, *CMPNOP and adjust *N. */
788
789 void
790 find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
791 uint64_t *cmpnop)
792 {
793 unsigned rsize;
794 uint64_t tmpn, mask;
795
796 /* The number which the find_bswap_or_nop_1 result should match in order
797 to have a full byte swap. The number is shifted to the right
798 according to the size of the symbolic number before using it. */
799 *cmpxchg = CMPXCHG;
800 *cmpnop = CMPNOP;
801
802 /* Find real size of result (highest non-zero byte). */
803 if (n->base_addr)
804 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
805 else
806 rsize = n->range;
807
808 /* Zero out the bits corresponding to untouched bytes in original gimple
809 expression. */
810 if (n->range < (int) sizeof (int64_t))
811 {
812 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
813 *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
814 *cmpnop &= mask;
815 }
816
817 /* Zero out the bits corresponding to unused bytes in the result of the
818 gimple expression. */
819 if (rsize < n->range)
820 {
821 if (BYTES_BIG_ENDIAN)
822 {
823 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
824 *cmpxchg &= mask;
825 *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
826 }
827 else
828 {
829 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
830 *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
831 *cmpnop &= mask;
832 }
833 n->range = rsize;
834 }
835
836 n->range *= BITS_PER_UNIT;
837 }
838
839 /* Check if STMT completes a bswap implementation or a read in a given
840 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
841 accordingly. It also sets N to represent the kind of operations
842 performed: size of the resulting expression and whether it works on
843 a memory source, and if so alias-set and vuse. At last, the
844 function returns a stmt whose rhs's first tree is the source
845 expression. */
846
847 gimple *
848 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
849 {
850 /* The last parameter determines the depth search limit. It usually
851 correlates directly to the number n of bytes to be touched. We
852 increase that number by log2(n) + 1 here in order to also
853 cover signed -> unsigned conversions of the src operand as can be seen
854 in libgcc, and for initial shift/and operation of the src operand. */
855 int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
856 limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
857 gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
858
859 if (!ins_stmt)
860 return NULL;
861
862 uint64_t cmpxchg, cmpnop;
863 find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
864
865 /* A complete byte swap should make the symbolic number to start with
866 the largest digit in the highest order byte. Unchanged symbolic
867 number indicates a read with same endianness as target architecture. */
868 if (n->n == cmpnop)
869 *bswap = false;
870 else if (n->n == cmpxchg)
871 *bswap = true;
872 else
873 return NULL;
874
875 /* Useless bit manipulation performed by code. */
876 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
877 return NULL;
878
879 return ins_stmt;
880 }
881
882 const pass_data pass_data_optimize_bswap =
883 {
884 GIMPLE_PASS, /* type */
885 "bswap", /* name */
886 OPTGROUP_NONE, /* optinfo_flags */
887 TV_NONE, /* tv_id */
888 PROP_ssa, /* properties_required */
889 0, /* properties_provided */
890 0, /* properties_destroyed */
891 0, /* todo_flags_start */
892 0, /* todo_flags_finish */
893 };
894
895 class pass_optimize_bswap : public gimple_opt_pass
896 {
897 public:
898 pass_optimize_bswap (gcc::context *ctxt)
899 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
900 {}
901
902 /* opt_pass methods: */
903 virtual bool gate (function *)
904 {
905 return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
906 }
907
908 virtual unsigned int execute (function *);
909
910 }; // class pass_optimize_bswap
911
912 /* Perform the bswap optimization: replace the expression computed in the rhs
913 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
914 bswap, load or load + bswap expression.
915 Which of these alternatives replace the rhs is given by N->base_addr (non
916 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
917 load to perform are also given in N while the builtin bswap invoke is given
918 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
919 load statements involved to construct the rhs in gsi_stmt (GSI) and
920 N->range gives the size of the rhs expression for maintaining some
921 statistics.
922
923 Note that if the replacement involve a load and if gsi_stmt (GSI) is
924 non-NULL, that stmt is moved just after INS_STMT to do the load with the
925 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
926
927 tree
928 bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
929 tree bswap_type, tree load_type, struct symbolic_number *n,
930 bool bswap)
931 {
932 tree src, tmp, tgt = NULL_TREE;
933 gimple *bswap_stmt;
934
935 gimple *cur_stmt = gsi_stmt (gsi);
936 src = n->src;
937 if (cur_stmt)
938 tgt = gimple_assign_lhs (cur_stmt);
939
940 /* Need to load the value from memory first. */
941 if (n->base_addr)
942 {
943 gimple_stmt_iterator gsi_ins = gsi;
944 if (ins_stmt)
945 gsi_ins = gsi_for_stmt (ins_stmt);
946 tree addr_expr, addr_tmp, val_expr, val_tmp;
947 tree load_offset_ptr, aligned_load_type;
948 gimple *load_stmt;
949 unsigned align = get_object_alignment (src);
950 poly_int64 load_offset = 0;
951
952 if (cur_stmt)
953 {
954 basic_block ins_bb = gimple_bb (ins_stmt);
955 basic_block cur_bb = gimple_bb (cur_stmt);
956 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
957 return NULL_TREE;
958
959 /* Move cur_stmt just before one of the load of the original
960 to ensure it has the same VUSE. See PR61517 for what could
961 go wrong. */
962 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
963 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
964 gsi_move_before (&gsi, &gsi_ins);
965 gsi = gsi_for_stmt (cur_stmt);
966 }
967 else
968 gsi = gsi_ins;
969
970 /* Compute address to load from and cast according to the size
971 of the load. */
972 addr_expr = build_fold_addr_expr (src);
973 if (is_gimple_mem_ref_addr (addr_expr))
974 addr_tmp = unshare_expr (addr_expr);
975 else
976 {
977 addr_tmp = unshare_expr (n->base_addr);
978 if (!is_gimple_mem_ref_addr (addr_tmp))
979 addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
980 is_gimple_mem_ref_addr,
981 NULL_TREE, true,
982 GSI_SAME_STMT);
983 load_offset = n->bytepos;
984 if (n->offset)
985 {
986 tree off
987 = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
988 true, NULL_TREE, true,
989 GSI_SAME_STMT);
990 gimple *stmt
991 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
992 POINTER_PLUS_EXPR, addr_tmp, off);
993 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
994 addr_tmp = gimple_assign_lhs (stmt);
995 }
996 }
997
998 /* Perform the load. */
999 aligned_load_type = load_type;
1000 if (align < TYPE_ALIGN (load_type))
1001 aligned_load_type = build_aligned_type (load_type, align);
1002 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1003 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1004 load_offset_ptr);
1005
1006 if (!bswap)
1007 {
1008 if (n->range == 16)
1009 nop_stats.found_16bit++;
1010 else if (n->range == 32)
1011 nop_stats.found_32bit++;
1012 else
1013 {
1014 gcc_assert (n->range == 64);
1015 nop_stats.found_64bit++;
1016 }
1017
1018 /* Convert the result of load if necessary. */
1019 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
1020 {
1021 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1022 "load_dst");
1023 load_stmt = gimple_build_assign (val_tmp, val_expr);
1024 gimple_set_vuse (load_stmt, n->vuse);
1025 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1026 gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
1027 update_stmt (cur_stmt);
1028 }
1029 else if (cur_stmt)
1030 {
1031 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1032 gimple_set_vuse (cur_stmt, n->vuse);
1033 update_stmt (cur_stmt);
1034 }
1035 else
1036 {
1037 tgt = make_ssa_name (load_type);
1038 cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1039 gimple_set_vuse (cur_stmt, n->vuse);
1040 gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
1041 }
1042
1043 if (dump_file)
1044 {
1045 fprintf (dump_file,
1046 "%d bit load in target endianness found at: ",
1047 (int) n->range);
1048 print_gimple_stmt (dump_file, cur_stmt, 0);
1049 }
1050 return tgt;
1051 }
1052 else
1053 {
1054 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1055 load_stmt = gimple_build_assign (val_tmp, val_expr);
1056 gimple_set_vuse (load_stmt, n->vuse);
1057 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1058 }
1059 src = val_tmp;
1060 }
1061 else if (!bswap)
1062 {
1063 gimple *g = NULL;
1064 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
1065 {
1066 if (!is_gimple_val (src))
1067 return NULL_TREE;
1068 g = gimple_build_assign (tgt, NOP_EXPR, src);
1069 }
1070 else if (cur_stmt)
1071 g = gimple_build_assign (tgt, src);
1072 else
1073 tgt = src;
1074 if (n->range == 16)
1075 nop_stats.found_16bit++;
1076 else if (n->range == 32)
1077 nop_stats.found_32bit++;
1078 else
1079 {
1080 gcc_assert (n->range == 64);
1081 nop_stats.found_64bit++;
1082 }
1083 if (dump_file)
1084 {
1085 fprintf (dump_file,
1086 "%d bit reshuffle in target endianness found at: ",
1087 (int) n->range);
1088 if (cur_stmt)
1089 print_gimple_stmt (dump_file, cur_stmt, 0);
1090 else
1091 {
1092 print_generic_expr (dump_file, tgt, TDF_NONE);
1093 fprintf (dump_file, "\n");
1094 }
1095 }
1096 if (cur_stmt)
1097 gsi_replace (&gsi, g, true);
1098 return tgt;
1099 }
1100 else if (TREE_CODE (src) == BIT_FIELD_REF)
1101 src = TREE_OPERAND (src, 0);
1102
1103 if (n->range == 16)
1104 bswap_stats.found_16bit++;
1105 else if (n->range == 32)
1106 bswap_stats.found_32bit++;
1107 else
1108 {
1109 gcc_assert (n->range == 64);
1110 bswap_stats.found_64bit++;
1111 }
1112
1113 tmp = src;
1114
1115 /* Convert the src expression if necessary. */
1116 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1117 {
1118 gimple *convert_stmt;
1119
1120 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1121 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1122 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1123 }
1124
1125 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1126 are considered as rotation of 2N bit values by N bits is generally not
1127 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1128 gives 0x03040102 while a bswap for that value is 0x04030201. */
1129 if (bswap && n->range == 16)
1130 {
1131 tree count = build_int_cst (NULL, BITS_PER_UNIT);
1132 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1133 bswap_stmt = gimple_build_assign (NULL, src);
1134 }
1135 else
1136 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1137
1138 if (tgt == NULL_TREE)
1139 tgt = make_ssa_name (bswap_type);
1140 tmp = tgt;
1141
1142 /* Convert the result if necessary. */
1143 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1144 {
1145 gimple *convert_stmt;
1146
1147 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1148 convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
1149 gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1150 }
1151
1152 gimple_set_lhs (bswap_stmt, tmp);
1153
1154 if (dump_file)
1155 {
1156 fprintf (dump_file, "%d bit bswap implementation found at: ",
1157 (int) n->range);
1158 if (cur_stmt)
1159 print_gimple_stmt (dump_file, cur_stmt, 0);
1160 else
1161 {
1162 print_generic_expr (dump_file, tgt, TDF_NONE);
1163 fprintf (dump_file, "\n");
1164 }
1165 }
1166
1167 if (cur_stmt)
1168 {
1169 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1170 gsi_remove (&gsi, true);
1171 }
1172 else
1173 gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1174 return tgt;
1175 }
1176
1177 /* Find manual byte swap implementations as well as load in a given
1178 endianness. Byte swaps are turned into a bswap builtin invokation
1179 while endian loads are converted to bswap builtin invokation or
1180 simple load according to the target endianness. */
1181
1182 unsigned int
1183 pass_optimize_bswap::execute (function *fun)
1184 {
1185 basic_block bb;
1186 bool bswap32_p, bswap64_p;
1187 bool changed = false;
1188 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1189
1190 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1191 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1192 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1193 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1194 || (bswap32_p && word_mode == SImode)));
1195
1196 /* Determine the argument type of the builtins. The code later on
1197 assumes that the return and argument type are the same. */
1198 if (bswap32_p)
1199 {
1200 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1201 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1202 }
1203
1204 if (bswap64_p)
1205 {
1206 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1207 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1208 }
1209
1210 memset (&nop_stats, 0, sizeof (nop_stats));
1211 memset (&bswap_stats, 0, sizeof (bswap_stats));
1212 calculate_dominance_info (CDI_DOMINATORS);
1213
1214 FOR_EACH_BB_FN (bb, fun)
1215 {
1216 gimple_stmt_iterator gsi;
1217
1218 /* We do a reverse scan for bswap patterns to make sure we get the
1219 widest match. As bswap pattern matching doesn't handle previously
1220 inserted smaller bswap replacements as sub-patterns, the wider
1221 variant wouldn't be detected. */
1222 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1223 {
1224 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1225 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1226 enum tree_code code;
1227 struct symbolic_number n;
1228 bool bswap;
1229
1230 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1231 might be moved to a different basic block by bswap_replace and gsi
1232 must not points to it if that's the case. Moving the gsi_prev
1233 there make sure that gsi points to the statement previous to
1234 cur_stmt while still making sure that all statements are
1235 considered in this basic block. */
1236 gsi_prev (&gsi);
1237
1238 if (!is_gimple_assign (cur_stmt))
1239 continue;
1240
1241 code = gimple_assign_rhs_code (cur_stmt);
1242 switch (code)
1243 {
1244 case LROTATE_EXPR:
1245 case RROTATE_EXPR:
1246 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1247 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1248 % BITS_PER_UNIT)
1249 continue;
1250 /* Fall through. */
1251 case BIT_IOR_EXPR:
1252 break;
1253 default:
1254 continue;
1255 }
1256
1257 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
1258
1259 if (!ins_stmt)
1260 continue;
1261
1262 switch (n.range)
1263 {
1264 case 16:
1265 /* Already in canonical form, nothing to do. */
1266 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1267 continue;
1268 load_type = bswap_type = uint16_type_node;
1269 break;
1270 case 32:
1271 load_type = uint32_type_node;
1272 if (bswap32_p)
1273 {
1274 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1275 bswap_type = bswap32_type;
1276 }
1277 break;
1278 case 64:
1279 load_type = uint64_type_node;
1280 if (bswap64_p)
1281 {
1282 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1283 bswap_type = bswap64_type;
1284 }
1285 break;
1286 default:
1287 continue;
1288 }
1289
1290 if (bswap && !fndecl && n.range != 16)
1291 continue;
1292
1293 if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1294 bswap_type, load_type, &n, bswap))
1295 changed = true;
1296 }
1297 }
1298
1299 statistics_counter_event (fun, "16-bit nop implementations found",
1300 nop_stats.found_16bit);
1301 statistics_counter_event (fun, "32-bit nop implementations found",
1302 nop_stats.found_32bit);
1303 statistics_counter_event (fun, "64-bit nop implementations found",
1304 nop_stats.found_64bit);
1305 statistics_counter_event (fun, "16-bit bswap implementations found",
1306 bswap_stats.found_16bit);
1307 statistics_counter_event (fun, "32-bit bswap implementations found",
1308 bswap_stats.found_32bit);
1309 statistics_counter_event (fun, "64-bit bswap implementations found",
1310 bswap_stats.found_64bit);
1311
1312 return (changed ? TODO_update_ssa : 0);
1313 }
1314
1315 } // anon namespace
1316
1317 gimple_opt_pass *
1318 make_pass_optimize_bswap (gcc::context *ctxt)
1319 {
1320 return new pass_optimize_bswap (ctxt);
1321 }
1322
1323 namespace {
1324
1325 /* Struct recording one operand for the store, which is either a constant,
1326 then VAL represents the constant and all the other fields are zero, or
1327 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1328 and the other fields also reflect the memory load, or an SSA name, then
1329 VAL represents the SSA name and all the other fields are zero, */
1330
1331 class store_operand_info
1332 {
1333 public:
1334 tree val;
1335 tree base_addr;
1336 poly_uint64 bitsize;
1337 poly_uint64 bitpos;
1338 poly_uint64 bitregion_start;
1339 poly_uint64 bitregion_end;
1340 gimple *stmt;
1341 bool bit_not_p;
1342 store_operand_info ();
1343 };
1344
1345 store_operand_info::store_operand_info ()
1346 : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
1347 bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
1348 {
1349 }
1350
1351 /* Struct recording the information about a single store of an immediate
1352 to memory. These are created in the first phase and coalesced into
1353 merged_store_group objects in the second phase. */
1354
1355 class store_immediate_info
1356 {
1357 public:
1358 unsigned HOST_WIDE_INT bitsize;
1359 unsigned HOST_WIDE_INT bitpos;
1360 unsigned HOST_WIDE_INT bitregion_start;
1361 /* This is one past the last bit of the bit region. */
1362 unsigned HOST_WIDE_INT bitregion_end;
1363 gimple *stmt;
1364 unsigned int order;
1365 /* INTEGER_CST for constant stores, MEM_REF for memory copy,
1366 BIT_*_EXPR for logical bitwise operation, BIT_INSERT_EXPR
1367 for bit insertion.
1368 LROTATE_EXPR if it can be only bswap optimized and
1369 ops are not really meaningful.
1370 NOP_EXPR if bswap optimization detected identity, ops
1371 are not meaningful. */
1372 enum tree_code rhs_code;
1373 /* Two fields for bswap optimization purposes. */
1374 struct symbolic_number n;
1375 gimple *ins_stmt;
1376 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
1377 bool bit_not_p;
1378 /* True if ops have been swapped and thus ops[1] represents
1379 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1380 bool ops_swapped_p;
1381 /* The index number of the landing pad, or 0 if there is none. */
1382 int lp_nr;
1383 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1384 just the first one. */
1385 store_operand_info ops[2];
1386 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1387 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1388 gimple *, unsigned int, enum tree_code,
1389 struct symbolic_number &, gimple *, bool, int,
1390 const store_operand_info &,
1391 const store_operand_info &);
1392 };
1393
1394 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
1395 unsigned HOST_WIDE_INT bp,
1396 unsigned HOST_WIDE_INT brs,
1397 unsigned HOST_WIDE_INT bre,
1398 gimple *st,
1399 unsigned int ord,
1400 enum tree_code rhscode,
1401 struct symbolic_number &nr,
1402 gimple *ins_stmtp,
1403 bool bitnotp,
1404 int nr2,
1405 const store_operand_info &op0r,
1406 const store_operand_info &op1r)
1407 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
1408 stmt (st), order (ord), rhs_code (rhscode), n (nr),
1409 ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1410 lp_nr (nr2)
1411 #if __cplusplus >= 201103L
1412 , ops { op0r, op1r }
1413 {
1414 }
1415 #else
1416 {
1417 ops[0] = op0r;
1418 ops[1] = op1r;
1419 }
1420 #endif
1421
1422 /* Struct representing a group of stores to contiguous memory locations.
1423 These are produced by the second phase (coalescing) and consumed in the
1424 third phase that outputs the widened stores. */
1425
1426 class merged_store_group
1427 {
1428 public:
1429 unsigned HOST_WIDE_INT start;
1430 unsigned HOST_WIDE_INT width;
1431 unsigned HOST_WIDE_INT bitregion_start;
1432 unsigned HOST_WIDE_INT bitregion_end;
1433 /* The size of the allocated memory for val and mask. */
1434 unsigned HOST_WIDE_INT buf_size;
1435 unsigned HOST_WIDE_INT align_base;
1436 poly_uint64 load_align_base[2];
1437
1438 unsigned int align;
1439 unsigned int load_align[2];
1440 unsigned int first_order;
1441 unsigned int last_order;
1442 bool bit_insertion;
1443 bool only_constants;
1444 unsigned int first_nonmergeable_order;
1445 int lp_nr;
1446
1447 auto_vec<store_immediate_info *> stores;
1448 /* We record the first and last original statements in the sequence because
1449 we'll need their vuse/vdef and replacement position. It's easier to keep
1450 track of them separately as 'stores' is reordered by apply_stores. */
1451 gimple *last_stmt;
1452 gimple *first_stmt;
1453 unsigned char *val;
1454 unsigned char *mask;
1455
1456 merged_store_group (store_immediate_info *);
1457 ~merged_store_group ();
1458 bool can_be_merged_into (store_immediate_info *);
1459 void merge_into (store_immediate_info *);
1460 void merge_overlapping (store_immediate_info *);
1461 bool apply_stores ();
1462 private:
1463 void do_merge (store_immediate_info *);
1464 };
1465
1466 /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1467
1468 static void
1469 dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1470 {
1471 if (!fd)
1472 return;
1473
1474 for (unsigned int i = 0; i < len; i++)
1475 fprintf (fd, "%02x ", ptr[i]);
1476 fprintf (fd, "\n");
1477 }
1478
1479 /* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
1480 bits between adjacent elements. AMNT should be within
1481 [0, BITS_PER_UNIT).
1482 Example, AMNT = 2:
1483 00011111|11100000 << 2 = 01111111|10000000
1484 PTR[1] | PTR[0] PTR[1] | PTR[0]. */
1485
1486 static void
1487 shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt)
1488 {
1489 if (amnt == 0)
1490 return;
1491
1492 unsigned char carry_over = 0U;
1493 unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
1494 unsigned char clear_mask = (~0U) << amnt;
1495
1496 for (unsigned int i = 0; i < sz; i++)
1497 {
1498 unsigned prev_carry_over = carry_over;
1499 carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
1500
1501 ptr[i] <<= amnt;
1502 if (i != 0)
1503 {
1504 ptr[i] &= clear_mask;
1505 ptr[i] |= prev_carry_over;
1506 }
1507 }
1508 }
1509
1510 /* Like shift_bytes_in_array but for big-endian.
1511 Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
1512 bits between adjacent elements. AMNT should be within
1513 [0, BITS_PER_UNIT).
1514 Example, AMNT = 2:
1515 00011111|11100000 >> 2 = 00000111|11111000
1516 PTR[0] | PTR[1] PTR[0] | PTR[1]. */
1517
1518 static void
1519 shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
1520 unsigned int amnt)
1521 {
1522 if (amnt == 0)
1523 return;
1524
1525 unsigned char carry_over = 0U;
1526 unsigned char carry_mask = ~(~0U << amnt);
1527
1528 for (unsigned int i = 0; i < sz; i++)
1529 {
1530 unsigned prev_carry_over = carry_over;
1531 carry_over = ptr[i] & carry_mask;
1532
1533 carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
1534 ptr[i] >>= amnt;
1535 ptr[i] |= prev_carry_over;
1536 }
1537 }
1538
1539 /* Clear out LEN bits starting from bit START in the byte array
1540 PTR. This clears the bits to the *right* from START.
1541 START must be within [0, BITS_PER_UNIT) and counts starting from
1542 the least significant bit. */
1543
1544 static void
1545 clear_bit_region_be (unsigned char *ptr, unsigned int start,
1546 unsigned int len)
1547 {
1548 if (len == 0)
1549 return;
1550 /* Clear len bits to the right of start. */
1551 else if (len <= start + 1)
1552 {
1553 unsigned char mask = (~(~0U << len));
1554 mask = mask << (start + 1U - len);
1555 ptr[0] &= ~mask;
1556 }
1557 else if (start != BITS_PER_UNIT - 1)
1558 {
1559 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1560 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1561 len - (start % BITS_PER_UNIT) - 1);
1562 }
1563 else if (start == BITS_PER_UNIT - 1
1564 && len > BITS_PER_UNIT)
1565 {
1566 unsigned int nbytes = len / BITS_PER_UNIT;
1567 memset (ptr, 0, nbytes);
1568 if (len % BITS_PER_UNIT != 0)
1569 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1570 len % BITS_PER_UNIT);
1571 }
1572 else
1573 gcc_unreachable ();
1574 }
1575
1576 /* In the byte array PTR clear the bit region starting at bit
1577 START and is LEN bits wide.
1578 For regions spanning multiple bytes do this recursively until we reach
1579 zero LEN or a region contained within a single byte. */
1580
1581 static void
1582 clear_bit_region (unsigned char *ptr, unsigned int start,
1583 unsigned int len)
1584 {
1585 /* Degenerate base case. */
1586 if (len == 0)
1587 return;
1588 else if (start >= BITS_PER_UNIT)
1589 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1590 /* Second base case. */
1591 else if ((start + len) <= BITS_PER_UNIT)
1592 {
1593 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
1594 mask >>= BITS_PER_UNIT - (start + len);
1595
1596 ptr[0] &= ~mask;
1597
1598 return;
1599 }
1600 /* Clear most significant bits in a byte and proceed with the next byte. */
1601 else if (start != 0)
1602 {
1603 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1604 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
1605 }
1606 /* Whole bytes need to be cleared. */
1607 else if (start == 0 && len > BITS_PER_UNIT)
1608 {
1609 unsigned int nbytes = len / BITS_PER_UNIT;
1610 /* We could recurse on each byte but we clear whole bytes, so a simple
1611 memset will do. */
1612 memset (ptr, '\0', nbytes);
1613 /* Clear the remaining sub-byte region if there is one. */
1614 if (len % BITS_PER_UNIT != 0)
1615 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1616 }
1617 else
1618 gcc_unreachable ();
1619 }
1620
1621 /* Write BITLEN bits of EXPR to the byte array PTR at
1622 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1623 Return true if the operation succeeded. */
1624
1625 static bool
1626 encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
1627 unsigned int total_bytes)
1628 {
1629 unsigned int first_byte = bitpos / BITS_PER_UNIT;
1630 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1631 || (bitpos % BITS_PER_UNIT)
1632 || !int_mode_for_size (bitlen, 0).exists ());
1633 bool empty_ctor_p
1634 = (TREE_CODE (expr) == CONSTRUCTOR
1635 && CONSTRUCTOR_NELTS (expr) == 0
1636 && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1637 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
1638
1639 if (!sub_byte_op_p)
1640 {
1641 if (first_byte >= total_bytes)
1642 return false;
1643 total_bytes -= first_byte;
1644 if (empty_ctor_p)
1645 {
1646 unsigned HOST_WIDE_INT rhs_bytes
1647 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1648 if (rhs_bytes > total_bytes)
1649 return false;
1650 memset (ptr + first_byte, '\0', rhs_bytes);
1651 return true;
1652 }
1653 return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1654 }
1655
1656 /* LITTLE-ENDIAN
1657 We are writing a non byte-sized quantity or at a position that is not
1658 at a byte boundary.
1659 |--------|--------|--------| ptr + first_byte
1660 ^ ^
1661 xxx xxxxxxxx xxx< bp>
1662 |______EXPR____|
1663
1664 First native_encode_expr EXPR into a temporary buffer and shift each
1665 byte in the buffer by 'bp' (carrying the bits over as necessary).
1666 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1667 <------bitlen---->< bp>
1668 Then we clear the destination bits:
1669 |---00000|00000000|000-----| ptr + first_byte
1670 <-------bitlen--->< bp>
1671
1672 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1673 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1674
1675 BIG-ENDIAN
1676 We are writing a non byte-sized quantity or at a position that is not
1677 at a byte boundary.
1678 ptr + first_byte |--------|--------|--------|
1679 ^ ^
1680 <bp >xxx xxxxxxxx xxx
1681 |_____EXPR_____|
1682
1683 First native_encode_expr EXPR into a temporary buffer and shift each
1684 byte in the buffer to the right by (carrying the bits over as necessary).
1685 We shift by as much as needed to align the most significant bit of EXPR
1686 with bitpos:
1687 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1688 <---bitlen----> <bp ><-----bitlen----->
1689 Then we clear the destination bits:
1690 ptr + first_byte |-----000||00000000||00000---|
1691 <bp ><-------bitlen----->
1692
1693 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1694 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1695 The awkwardness comes from the fact that bitpos is counted from the
1696 most significant bit of a byte. */
1697
1698 /* We must be dealing with fixed-size data at this point, since the
1699 total size is also fixed. */
1700 unsigned int byte_size;
1701 if (empty_ctor_p)
1702 {
1703 unsigned HOST_WIDE_INT rhs_bytes
1704 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1705 if (rhs_bytes > total_bytes)
1706 return false;
1707 byte_size = rhs_bytes;
1708 }
1709 else
1710 {
1711 fixed_size_mode mode
1712 = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
1713 byte_size = GET_MODE_SIZE (mode);
1714 }
1715 /* Allocate an extra byte so that we have space to shift into. */
1716 byte_size++;
1717 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
1718 memset (tmpbuf, '\0', byte_size);
1719 /* The store detection code should only have allowed constants that are
1720 accepted by native_encode_expr or empty ctors. */
1721 if (!empty_ctor_p
1722 && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
1723 gcc_unreachable ();
1724
1725 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1726 bytes to write. This means it can write more than
1727 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1728 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1729 bitlen and zero out the bits that are not relevant as well (that may
1730 contain a sign bit due to sign-extension). */
1731 unsigned int padding
1732 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
1733 /* On big-endian the padding is at the 'front' so just skip the initial
1734 bytes. */
1735 if (BYTES_BIG_ENDIAN)
1736 tmpbuf += padding;
1737
1738 byte_size -= padding;
1739
1740 if (bitlen % BITS_PER_UNIT != 0)
1741 {
1742 if (BYTES_BIG_ENDIAN)
1743 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1744 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1745 else
1746 clear_bit_region (tmpbuf, bitlen,
1747 byte_size * BITS_PER_UNIT - bitlen);
1748 }
1749 /* Left shifting relies on the last byte being clear if bitlen is
1750 a multiple of BITS_PER_UNIT, which might not be clear if
1751 there are padding bytes. */
1752 else if (!BYTES_BIG_ENDIAN)
1753 tmpbuf[byte_size - 1] = '\0';
1754
1755 /* Clear the bit region in PTR where the bits from TMPBUF will be
1756 inserted into. */
1757 if (BYTES_BIG_ENDIAN)
1758 clear_bit_region_be (ptr + first_byte,
1759 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1760 else
1761 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1762
1763 int shift_amnt;
1764 int bitlen_mod = bitlen % BITS_PER_UNIT;
1765 int bitpos_mod = bitpos % BITS_PER_UNIT;
1766
1767 bool skip_byte = false;
1768 if (BYTES_BIG_ENDIAN)
1769 {
1770 /* BITPOS and BITLEN are exactly aligned and no shifting
1771 is necessary. */
1772 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1773 || (bitpos_mod == 0 && bitlen_mod == 0))
1774 shift_amnt = 0;
1775 /* |. . . . . . . .|
1776 <bp > <blen >.
1777 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1778 of the value until it aligns with 'bp' in the next byte over. */
1779 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
1780 {
1781 shift_amnt = bitlen_mod + bitpos_mod;
1782 skip_byte = bitlen_mod != 0;
1783 }
1784 /* |. . . . . . . .|
1785 <----bp--->
1786 <---blen---->.
1787 Shift the value right within the same byte so it aligns with 'bp'. */
1788 else
1789 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
1790 }
1791 else
1792 shift_amnt = bitpos % BITS_PER_UNIT;
1793
1794 /* Create the shifted version of EXPR. */
1795 if (!BYTES_BIG_ENDIAN)
1796 {
1797 shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
1798 if (shift_amnt == 0)
1799 byte_size--;
1800 }
1801 else
1802 {
1803 gcc_assert (BYTES_BIG_ENDIAN);
1804 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
1805 /* If shifting right forced us to move into the next byte skip the now
1806 empty byte. */
1807 if (skip_byte)
1808 {
1809 tmpbuf++;
1810 byte_size--;
1811 }
1812 }
1813
1814 /* Insert the bits from TMPBUF. */
1815 for (unsigned int i = 0; i < byte_size; i++)
1816 ptr[first_byte + i] |= tmpbuf[i];
1817
1818 return true;
1819 }
1820
1821 /* Sorting function for store_immediate_info objects.
1822 Sorts them by bitposition. */
1823
1824 static int
1825 sort_by_bitpos (const void *x, const void *y)
1826 {
1827 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1828 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1829
1830 if ((*tmp)->bitpos < (*tmp2)->bitpos)
1831 return -1;
1832 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
1833 return 1;
1834 else
1835 /* If they are the same let's use the order which is guaranteed to
1836 be different. */
1837 return (*tmp)->order - (*tmp2)->order;
1838 }
1839
1840 /* Sorting function for store_immediate_info objects.
1841 Sorts them by the order field. */
1842
1843 static int
1844 sort_by_order (const void *x, const void *y)
1845 {
1846 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1847 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1848
1849 if ((*tmp)->order < (*tmp2)->order)
1850 return -1;
1851 else if ((*tmp)->order > (*tmp2)->order)
1852 return 1;
1853
1854 gcc_unreachable ();
1855 }
1856
1857 /* Initialize a merged_store_group object from a store_immediate_info
1858 object. */
1859
1860 merged_store_group::merged_store_group (store_immediate_info *info)
1861 {
1862 start = info->bitpos;
1863 width = info->bitsize;
1864 bitregion_start = info->bitregion_start;
1865 bitregion_end = info->bitregion_end;
1866 /* VAL has memory allocated for it in apply_stores once the group
1867 width has been finalized. */
1868 val = NULL;
1869 mask = NULL;
1870 bit_insertion = false;
1871 only_constants = info->rhs_code == INTEGER_CST;
1872 first_nonmergeable_order = ~0U;
1873 lp_nr = info->lp_nr;
1874 unsigned HOST_WIDE_INT align_bitpos = 0;
1875 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1876 &align, &align_bitpos);
1877 align_base = start - align_bitpos;
1878 for (int i = 0; i < 2; ++i)
1879 {
1880 store_operand_info &op = info->ops[i];
1881 if (op.base_addr == NULL_TREE)
1882 {
1883 load_align[i] = 0;
1884 load_align_base[i] = 0;
1885 }
1886 else
1887 {
1888 get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
1889 load_align_base[i] = op.bitpos - align_bitpos;
1890 }
1891 }
1892 stores.create (1);
1893 stores.safe_push (info);
1894 last_stmt = info->stmt;
1895 last_order = info->order;
1896 first_stmt = last_stmt;
1897 first_order = last_order;
1898 buf_size = 0;
1899 }
1900
1901 merged_store_group::~merged_store_group ()
1902 {
1903 if (val)
1904 XDELETEVEC (val);
1905 }
1906
1907 /* Return true if the store described by INFO can be merged into the group. */
1908
1909 bool
1910 merged_store_group::can_be_merged_into (store_immediate_info *info)
1911 {
1912 /* Do not merge bswap patterns. */
1913 if (info->rhs_code == LROTATE_EXPR)
1914 return false;
1915
1916 if (info->lp_nr != lp_nr)
1917 return false;
1918
1919 /* The canonical case. */
1920 if (info->rhs_code == stores[0]->rhs_code)
1921 return true;
1922
1923 /* BIT_INSERT_EXPR is compatible with INTEGER_CST. */
1924 if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
1925 return true;
1926
1927 if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
1928 return true;
1929
1930 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
1931 if (info->rhs_code == MEM_REF
1932 && (stores[0]->rhs_code == INTEGER_CST
1933 || stores[0]->rhs_code == BIT_INSERT_EXPR)
1934 && info->bitregion_start == stores[0]->bitregion_start
1935 && info->bitregion_end == stores[0]->bitregion_end)
1936 return true;
1937
1938 if (stores[0]->rhs_code == MEM_REF
1939 && (info->rhs_code == INTEGER_CST
1940 || info->rhs_code == BIT_INSERT_EXPR)
1941 && info->bitregion_start == stores[0]->bitregion_start
1942 && info->bitregion_end == stores[0]->bitregion_end)
1943 return true;
1944
1945 return false;
1946 }
1947
1948 /* Helper method for merge_into and merge_overlapping to do
1949 the common part. */
1950
1951 void
1952 merged_store_group::do_merge (store_immediate_info *info)
1953 {
1954 bitregion_start = MIN (bitregion_start, info->bitregion_start);
1955 bitregion_end = MAX (bitregion_end, info->bitregion_end);
1956
1957 unsigned int this_align;
1958 unsigned HOST_WIDE_INT align_bitpos = 0;
1959 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1960 &this_align, &align_bitpos);
1961 if (this_align > align)
1962 {
1963 align = this_align;
1964 align_base = info->bitpos - align_bitpos;
1965 }
1966 for (int i = 0; i < 2; ++i)
1967 {
1968 store_operand_info &op = info->ops[i];
1969 if (!op.base_addr)
1970 continue;
1971
1972 get_object_alignment_1 (op.val, &this_align, &align_bitpos);
1973 if (this_align > load_align[i])
1974 {
1975 load_align[i] = this_align;
1976 load_align_base[i] = op.bitpos - align_bitpos;
1977 }
1978 }
1979
1980 gimple *stmt = info->stmt;
1981 stores.safe_push (info);
1982 if (info->order > last_order)
1983 {
1984 last_order = info->order;
1985 last_stmt = stmt;
1986 }
1987 else if (info->order < first_order)
1988 {
1989 first_order = info->order;
1990 first_stmt = stmt;
1991 }
1992 if (info->rhs_code != INTEGER_CST)
1993 only_constants = false;
1994 }
1995
1996 /* Merge a store recorded by INFO into this merged store.
1997 The store is not overlapping with the existing recorded
1998 stores. */
1999
2000 void
2001 merged_store_group::merge_into (store_immediate_info *info)
2002 {
2003 /* Make sure we're inserting in the position we think we're inserting. */
2004 gcc_assert (info->bitpos >= start + width
2005 && info->bitregion_start <= bitregion_end);
2006
2007 width = info->bitpos + info->bitsize - start;
2008 do_merge (info);
2009 }
2010
2011 /* Merge a store described by INFO into this merged store.
2012 INFO overlaps in some way with the current store (i.e. it's not contiguous
2013 which is handled by merged_store_group::merge_into). */
2014
2015 void
2016 merged_store_group::merge_overlapping (store_immediate_info *info)
2017 {
2018 /* If the store extends the size of the group, extend the width. */
2019 if (info->bitpos + info->bitsize > start + width)
2020 width = info->bitpos + info->bitsize - start;
2021
2022 do_merge (info);
2023 }
2024
2025 /* Go through all the recorded stores in this group in program order and
2026 apply their values to the VAL byte array to create the final merged
2027 value. Return true if the operation succeeded. */
2028
2029 bool
2030 merged_store_group::apply_stores ()
2031 {
2032 /* Make sure we have more than one store in the group, otherwise we cannot
2033 merge anything. */
2034 if (bitregion_start % BITS_PER_UNIT != 0
2035 || bitregion_end % BITS_PER_UNIT != 0
2036 || stores.length () == 1)
2037 return false;
2038
2039 stores.qsort (sort_by_order);
2040 store_immediate_info *info;
2041 unsigned int i;
2042 /* Create a power-of-2-sized buffer for native_encode_expr. */
2043 buf_size = 1 << ceil_log2 ((bitregion_end - bitregion_start) / BITS_PER_UNIT);
2044 val = XNEWVEC (unsigned char, 2 * buf_size);
2045 mask = val + buf_size;
2046 memset (val, 0, buf_size);
2047 memset (mask, ~0U, buf_size);
2048
2049 FOR_EACH_VEC_ELT (stores, i, info)
2050 {
2051 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
2052 tree cst;
2053 if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2054 cst = info->ops[0].val;
2055 else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2056 cst = info->ops[1].val;
2057 else
2058 cst = NULL_TREE;
2059 bool ret = true;
2060 if (cst)
2061 {
2062 if (info->rhs_code == BIT_INSERT_EXPR)
2063 bit_insertion = true;
2064 else
2065 ret = encode_tree_to_bitpos (cst, val, info->bitsize,
2066 pos_in_buffer, buf_size);
2067 }
2068 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2069 if (BYTES_BIG_ENDIAN)
2070 clear_bit_region_be (m, (BITS_PER_UNIT - 1
2071 - (pos_in_buffer % BITS_PER_UNIT)),
2072 info->bitsize);
2073 else
2074 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
2075 if (cst && dump_file && (dump_flags & TDF_DETAILS))
2076 {
2077 if (ret)
2078 {
2079 fputs ("After writing ", dump_file);
2080 print_generic_expr (dump_file, cst, TDF_NONE);
2081 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
2082 " at position %d\n", info->bitsize, pos_in_buffer);
2083 fputs (" the merged value contains ", dump_file);
2084 dump_char_array (dump_file, val, buf_size);
2085 fputs (" the merged mask contains ", dump_file);
2086 dump_char_array (dump_file, mask, buf_size);
2087 if (bit_insertion)
2088 fputs (" bit insertion is required\n", dump_file);
2089 }
2090 else
2091 fprintf (dump_file, "Failed to merge stores\n");
2092 }
2093 if (!ret)
2094 return false;
2095 }
2096 stores.qsort (sort_by_bitpos);
2097 return true;
2098 }
2099
2100 /* Structure describing the store chain. */
2101
2102 class imm_store_chain_info
2103 {
2104 public:
2105 /* Doubly-linked list that imposes an order on chain processing.
2106 PNXP (prev's next pointer) points to the head of a list, or to
2107 the next field in the previous chain in the list.
2108 See pass_store_merging::m_stores_head for more rationale. */
2109 imm_store_chain_info *next, **pnxp;
2110 tree base_addr;
2111 auto_vec<store_immediate_info *> m_store_info;
2112 auto_vec<merged_store_group *> m_merged_store_groups;
2113
2114 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2115 : next (inspt), pnxp (&inspt), base_addr (b_a)
2116 {
2117 inspt = this;
2118 if (next)
2119 {
2120 gcc_checking_assert (pnxp == next->pnxp);
2121 next->pnxp = &next;
2122 }
2123 }
2124 ~imm_store_chain_info ()
2125 {
2126 *pnxp = next;
2127 if (next)
2128 {
2129 gcc_checking_assert (&next == next->pnxp);
2130 next->pnxp = pnxp;
2131 }
2132 }
2133 bool terminate_and_process_chain ();
2134 bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
2135 bool coalesce_immediate_stores ();
2136 bool output_merged_store (merged_store_group *);
2137 bool output_merged_stores ();
2138 };
2139
2140 const pass_data pass_data_tree_store_merging = {
2141 GIMPLE_PASS, /* type */
2142 "store-merging", /* name */
2143 OPTGROUP_NONE, /* optinfo_flags */
2144 TV_GIMPLE_STORE_MERGING, /* tv_id */
2145 PROP_ssa, /* properties_required */
2146 0, /* properties_provided */
2147 0, /* properties_destroyed */
2148 0, /* todo_flags_start */
2149 TODO_update_ssa, /* todo_flags_finish */
2150 };
2151
2152 class pass_store_merging : public gimple_opt_pass
2153 {
2154 public:
2155 pass_store_merging (gcc::context *ctxt)
2156 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head ()
2157 {
2158 }
2159
2160 /* Pass not supported for PDP-endian, nor for insane hosts or
2161 target character sizes where native_{encode,interpret}_expr
2162 doesn't work properly. */
2163 virtual bool
2164 gate (function *)
2165 {
2166 return flag_store_merging
2167 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
2168 && CHAR_BIT == 8
2169 && BITS_PER_UNIT == 8;
2170 }
2171
2172 virtual unsigned int execute (function *);
2173
2174 private:
2175 hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
2176
2177 /* Form a doubly-linked stack of the elements of m_stores, so that
2178 we can iterate over them in a predictable way. Using this order
2179 avoids extraneous differences in the compiler output just because
2180 of tree pointer variations (e.g. different chains end up in
2181 different positions of m_stores, so they are handled in different
2182 orders, so they allocate or release SSA names in different
2183 orders, and when they get reused, subsequent passes end up
2184 getting different SSA names, which may ultimately change
2185 decisions when going out of SSA). */
2186 imm_store_chain_info *m_stores_head;
2187
2188 bool process_store (gimple *);
2189 bool terminate_and_process_chain (imm_store_chain_info *);
2190 bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
2191 bool terminate_and_process_all_chains ();
2192 }; // class pass_store_merging
2193
2194 /* Terminate and process all recorded chains. Return true if any changes
2195 were made. */
2196
2197 bool
2198 pass_store_merging::terminate_and_process_all_chains ()
2199 {
2200 bool ret = false;
2201 while (m_stores_head)
2202 ret |= terminate_and_process_chain (m_stores_head);
2203 gcc_assert (m_stores.is_empty ());
2204 return ret;
2205 }
2206
2207 /* Terminate all chains that are affected by the statement STMT.
2208 CHAIN_INFO is the chain we should ignore from the checks if
2209 non-NULL. Return true if any changes were made. */
2210
2211 bool
2212 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2213 **chain_info,
2214 gimple *stmt)
2215 {
2216 bool ret = false;
2217
2218 /* If the statement doesn't touch memory it can't alias. */
2219 if (!gimple_vuse (stmt))
2220 return false;
2221
2222 tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
2223 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
2224 {
2225 next = cur->next;
2226
2227 /* We already checked all the stores in chain_info and terminated the
2228 chain if necessary. Skip it here. */
2229 if (chain_info && *chain_info == cur)
2230 continue;
2231
2232 store_immediate_info *info;
2233 unsigned int i;
2234 FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
2235 {
2236 tree lhs = gimple_assign_lhs (info->stmt);
2237 if (ref_maybe_used_by_stmt_p (stmt, lhs)
2238 || stmt_may_clobber_ref_p (stmt, lhs)
2239 || (store_lhs && refs_output_dependent_p (store_lhs, lhs)))
2240 {
2241 if (dump_file && (dump_flags & TDF_DETAILS))
2242 {
2243 fprintf (dump_file, "stmt causes chain termination:\n");
2244 print_gimple_stmt (dump_file, stmt, 0);
2245 }
2246 ret |= terminate_and_process_chain (cur);
2247 break;
2248 }
2249 }
2250 }
2251
2252 return ret;
2253 }
2254
2255 /* Helper function. Terminate the recorded chain storing to base object
2256 BASE. Return true if the merging and output was successful. The m_stores
2257 entry is removed after the processing in any case. */
2258
2259 bool
2260 pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
2261 {
2262 bool ret = chain_info->terminate_and_process_chain ();
2263 m_stores.remove (chain_info->base_addr);
2264 delete chain_info;
2265 return ret;
2266 }
2267
2268 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2269 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2270 be able to sink load of REF across stores between FIRST and LAST, up
2271 to right before LAST. */
2272
2273 bool
2274 stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2275 {
2276 ao_ref r;
2277 ao_ref_init (&r, ref);
2278 unsigned int count = 0;
2279 tree vop = gimple_vdef (last);
2280 gimple *stmt;
2281
2282 /* Return true conservatively if the basic blocks are different. */
2283 if (gimple_bb (first) != gimple_bb (last))
2284 return true;
2285
2286 do
2287 {
2288 stmt = SSA_NAME_DEF_STMT (vop);
2289 if (stmt_may_clobber_ref_p_1 (stmt, &r))
2290 return true;
2291 if (gimple_store_p (stmt)
2292 && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2293 return true;
2294 /* Avoid quadratic compile time by bounding the number of checks
2295 we perform. */
2296 if (++count > MAX_STORE_ALIAS_CHECKS)
2297 return true;
2298 vop = gimple_vuse (stmt);
2299 }
2300 while (stmt != first);
2301
2302 return false;
2303 }
2304
2305 /* Return true if INFO->ops[IDX] is mergeable with the
2306 corresponding loads already in MERGED_STORE group.
2307 BASE_ADDR is the base address of the whole store group. */
2308
2309 bool
2310 compatible_load_p (merged_store_group *merged_store,
2311 store_immediate_info *info,
2312 tree base_addr, int idx)
2313 {
2314 store_immediate_info *infof = merged_store->stores[0];
2315 if (!info->ops[idx].base_addr
2316 || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2317 info->bitpos - infof->bitpos)
2318 || !operand_equal_p (info->ops[idx].base_addr,
2319 infof->ops[idx].base_addr, 0))
2320 return false;
2321
2322 store_immediate_info *infol = merged_store->stores.last ();
2323 tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2324 /* In this case all vuses should be the same, e.g.
2325 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2326 or
2327 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2328 and we can emit the coalesced load next to any of those loads. */
2329 if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2330 && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2331 return true;
2332
2333 /* Otherwise, at least for now require that the load has the same
2334 vuse as the store. See following examples. */
2335 if (gimple_vuse (info->stmt) != load_vuse)
2336 return false;
2337
2338 if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2339 || (infof != infol
2340 && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2341 return false;
2342
2343 /* If the load is from the same location as the store, already
2344 the construction of the immediate chain info guarantees no intervening
2345 stores, so no further checks are needed. Example:
2346 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
2347 if (known_eq (info->ops[idx].bitpos, info->bitpos)
2348 && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2349 return true;
2350
2351 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2352 of the stores in the group, or any other stores in between those.
2353 Previous calls to compatible_load_p ensured that for all the
2354 merged_store->stores IDX loads, no stmts starting with
2355 merged_store->first_stmt and ending right before merged_store->last_stmt
2356 clobbers those loads. */
2357 gimple *first = merged_store->first_stmt;
2358 gimple *last = merged_store->last_stmt;
2359 unsigned int i;
2360 store_immediate_info *infoc;
2361 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2362 comes before the so far first load, we'll be changing
2363 merged_store->first_stmt. In that case we need to give up if
2364 any of the earlier processed loads clobber with the stmts in the new
2365 range. */
2366 if (info->order < merged_store->first_order)
2367 {
2368 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2369 if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2370 return false;
2371 first = info->stmt;
2372 }
2373 /* Similarly, we could change merged_store->last_stmt, so ensure
2374 in that case no stmts in the new range clobber any of the earlier
2375 processed loads. */
2376 else if (info->order > merged_store->last_order)
2377 {
2378 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2379 if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2380 return false;
2381 last = info->stmt;
2382 }
2383 /* And finally, we'd be adding a new load to the set, ensure it isn't
2384 clobbered in the new range. */
2385 if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2386 return false;
2387
2388 /* Otherwise, we are looking for:
2389 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2390 or
2391 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2392 return true;
2393 }
2394
2395 /* Add all refs loaded to compute VAL to REFS vector. */
2396
2397 void
2398 gather_bswap_load_refs (vec<tree> *refs, tree val)
2399 {
2400 if (TREE_CODE (val) != SSA_NAME)
2401 return;
2402
2403 gimple *stmt = SSA_NAME_DEF_STMT (val);
2404 if (!is_gimple_assign (stmt))
2405 return;
2406
2407 if (gimple_assign_load_p (stmt))
2408 {
2409 refs->safe_push (gimple_assign_rhs1 (stmt));
2410 return;
2411 }
2412
2413 switch (gimple_assign_rhs_class (stmt))
2414 {
2415 case GIMPLE_BINARY_RHS:
2416 gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2417 /* FALLTHRU */
2418 case GIMPLE_UNARY_RHS:
2419 gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2420 break;
2421 default:
2422 gcc_unreachable ();
2423 }
2424 }
2425
2426 /* Check if there are any stores in M_STORE_INFO after index I
2427 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2428 a potential group ending with END that have their order
2429 smaller than LAST_ORDER. RHS_CODE is the kind of store in the
2430 group. Return true if there are no such stores.
2431 Consider:
2432 MEM[(long long int *)p_28] = 0;
2433 MEM[(long long int *)p_28 + 8B] = 0;
2434 MEM[(long long int *)p_28 + 16B] = 0;
2435 MEM[(long long int *)p_28 + 24B] = 0;
2436 _129 = (int) _130;
2437 MEM[(int *)p_28 + 8B] = _129;
2438 MEM[(int *)p_28].a = -1;
2439 We already have
2440 MEM[(long long int *)p_28] = 0;
2441 MEM[(int *)p_28].a = -1;
2442 stmts in the current group and need to consider if it is safe to
2443 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2444 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2445 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2446 into the group and merging of those 3 stores is successful, merged
2447 stmts will be emitted at the latest store from that group, i.e.
2448 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2449 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2450 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2451 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2452 into the group. That way it will be its own store group and will
2453 not be touched. If RHS_CODE is INTEGER_CST and there are overlapping
2454 INTEGER_CST stores, those are mergeable using merge_overlapping,
2455 so don't return false for those. */
2456
2457 static bool
2458 check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
2459 enum tree_code rhs_code, unsigned int last_order,
2460 unsigned HOST_WIDE_INT end)
2461 {
2462 unsigned int len = m_store_info.length ();
2463 for (++i; i < len; ++i)
2464 {
2465 store_immediate_info *info = m_store_info[i];
2466 if (info->bitpos >= end)
2467 break;
2468 if (info->order < last_order
2469 && (rhs_code != INTEGER_CST || info->rhs_code != INTEGER_CST))
2470 return false;
2471 }
2472 return true;
2473 }
2474
2475 /* Return true if m_store_info[first] and at least one following store
2476 form a group which store try_size bitsize value which is byte swapped
2477 from a memory load or some value, or identity from some value.
2478 This uses the bswap pass APIs. */
2479
2480 bool
2481 imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2482 unsigned int first,
2483 unsigned int try_size)
2484 {
2485 unsigned int len = m_store_info.length (), last = first;
2486 unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2487 if (width >= try_size)
2488 return false;
2489 for (unsigned int i = first + 1; i < len; ++i)
2490 {
2491 if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2492 || m_store_info[i]->ins_stmt == NULL)
2493 return false;
2494 width += m_store_info[i]->bitsize;
2495 if (width >= try_size)
2496 {
2497 last = i;
2498 break;
2499 }
2500 }
2501 if (width != try_size)
2502 return false;
2503
2504 bool allow_unaligned
2505 = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
2506 /* Punt if the combined store would not be aligned and we need alignment. */
2507 if (!allow_unaligned)
2508 {
2509 unsigned int align = merged_store->align;
2510 unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2511 for (unsigned int i = first + 1; i <= last; ++i)
2512 {
2513 unsigned int this_align;
2514 unsigned HOST_WIDE_INT align_bitpos = 0;
2515 get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2516 &this_align, &align_bitpos);
2517 if (this_align > align)
2518 {
2519 align = this_align;
2520 align_base = m_store_info[i]->bitpos - align_bitpos;
2521 }
2522 }
2523 unsigned HOST_WIDE_INT align_bitpos
2524 = (m_store_info[first]->bitpos - align_base) & (align - 1);
2525 if (align_bitpos)
2526 align = least_bit_hwi (align_bitpos);
2527 if (align < try_size)
2528 return false;
2529 }
2530
2531 tree type;
2532 switch (try_size)
2533 {
2534 case 16: type = uint16_type_node; break;
2535 case 32: type = uint32_type_node; break;
2536 case 64: type = uint64_type_node; break;
2537 default: gcc_unreachable ();
2538 }
2539 struct symbolic_number n;
2540 gimple *ins_stmt = NULL;
2541 int vuse_store = -1;
2542 unsigned int first_order = merged_store->first_order;
2543 unsigned int last_order = merged_store->last_order;
2544 gimple *first_stmt = merged_store->first_stmt;
2545 gimple *last_stmt = merged_store->last_stmt;
2546 unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
2547 store_immediate_info *infof = m_store_info[first];
2548
2549 for (unsigned int i = first; i <= last; ++i)
2550 {
2551 store_immediate_info *info = m_store_info[i];
2552 struct symbolic_number this_n = info->n;
2553 this_n.type = type;
2554 if (!this_n.base_addr)
2555 this_n.range = try_size / BITS_PER_UNIT;
2556 else
2557 /* Update vuse in case it has changed by output_merged_stores. */
2558 this_n.vuse = gimple_vuse (info->ins_stmt);
2559 unsigned int bitpos = info->bitpos - infof->bitpos;
2560 if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2561 BYTES_BIG_ENDIAN
2562 ? try_size - info->bitsize - bitpos
2563 : bitpos))
2564 return false;
2565 if (this_n.base_addr && vuse_store)
2566 {
2567 unsigned int j;
2568 for (j = first; j <= last; ++j)
2569 if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2570 break;
2571 if (j > last)
2572 {
2573 if (vuse_store == 1)
2574 return false;
2575 vuse_store = 0;
2576 }
2577 }
2578 if (i == first)
2579 {
2580 n = this_n;
2581 ins_stmt = info->ins_stmt;
2582 }
2583 else
2584 {
2585 if (n.base_addr && n.vuse != this_n.vuse)
2586 {
2587 if (vuse_store == 0)
2588 return false;
2589 vuse_store = 1;
2590 }
2591 if (info->order > last_order)
2592 {
2593 last_order = info->order;
2594 last_stmt = info->stmt;
2595 }
2596 else if (info->order < first_order)
2597 {
2598 first_order = info->order;
2599 first_stmt = info->stmt;
2600 }
2601 end = MAX (end, info->bitpos + info->bitsize);
2602
2603 ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2604 &this_n, &n);
2605 if (ins_stmt == NULL)
2606 return false;
2607 }
2608 }
2609
2610 uint64_t cmpxchg, cmpnop;
2611 find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop);
2612
2613 /* A complete byte swap should make the symbolic number to start with
2614 the largest digit in the highest order byte. Unchanged symbolic
2615 number indicates a read with same endianness as target architecture. */
2616 if (n.n != cmpnop && n.n != cmpxchg)
2617 return false;
2618
2619 if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2620 return false;
2621
2622 if (!check_no_overlap (m_store_info, last, LROTATE_EXPR, last_order, end))
2623 return false;
2624
2625 /* Don't handle memory copy this way if normal non-bswap processing
2626 would handle it too. */
2627 if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2628 {
2629 unsigned int i;
2630 for (i = first; i <= last; ++i)
2631 if (m_store_info[i]->rhs_code != MEM_REF)
2632 break;
2633 if (i == last + 1)
2634 return false;
2635 }
2636
2637 if (n.n == cmpxchg)
2638 switch (try_size)
2639 {
2640 case 16:
2641 /* Will emit LROTATE_EXPR. */
2642 break;
2643 case 32:
2644 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2645 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2646 break;
2647 return false;
2648 case 64:
2649 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2650 && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2651 break;
2652 return false;
2653 default:
2654 gcc_unreachable ();
2655 }
2656
2657 if (!allow_unaligned && n.base_addr)
2658 {
2659 unsigned int align = get_object_alignment (n.src);
2660 if (align < try_size)
2661 return false;
2662 }
2663
2664 /* If each load has vuse of the corresponding store, need to verify
2665 the loads can be sunk right before the last store. */
2666 if (vuse_store == 1)
2667 {
2668 auto_vec<tree, 64> refs;
2669 for (unsigned int i = first; i <= last; ++i)
2670 gather_bswap_load_refs (&refs,
2671 gimple_assign_rhs1 (m_store_info[i]->stmt));
2672
2673 unsigned int i;
2674 tree ref;
2675 FOR_EACH_VEC_ELT (refs, i, ref)
2676 if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2677 return false;
2678 n.vuse = NULL_TREE;
2679 }
2680
2681 infof->n = n;
2682 infof->ins_stmt = ins_stmt;
2683 for (unsigned int i = first; i <= last; ++i)
2684 {
2685 m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
2686 m_store_info[i]->ops[0].base_addr = NULL_TREE;
2687 m_store_info[i]->ops[1].base_addr = NULL_TREE;
2688 if (i != first)
2689 merged_store->merge_into (m_store_info[i]);
2690 }
2691
2692 return true;
2693 }
2694
2695 /* Go through the candidate stores recorded in m_store_info and merge them
2696 into merged_store_group objects recorded into m_merged_store_groups
2697 representing the widened stores. Return true if coalescing was successful
2698 and the number of widened stores is fewer than the original number
2699 of stores. */
2700
2701 bool
2702 imm_store_chain_info::coalesce_immediate_stores ()
2703 {
2704 /* Anything less can't be processed. */
2705 if (m_store_info.length () < 2)
2706 return false;
2707
2708 if (dump_file && (dump_flags & TDF_DETAILS))
2709 fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
2710 m_store_info.length ());
2711
2712 store_immediate_info *info;
2713 unsigned int i, ignore = 0;
2714
2715 /* Order the stores by the bitposition they write to. */
2716 m_store_info.qsort (sort_by_bitpos);
2717
2718 info = m_store_info[0];
2719 merged_store_group *merged_store = new merged_store_group (info);
2720 if (dump_file && (dump_flags & TDF_DETAILS))
2721 fputs ("New store group\n", dump_file);
2722
2723 FOR_EACH_VEC_ELT (m_store_info, i, info)
2724 {
2725 unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
2726
2727 if (i <= ignore)
2728 goto done;
2729
2730 /* First try to handle group of stores like:
2731 p[0] = data >> 24;
2732 p[1] = data >> 16;
2733 p[2] = data >> 8;
2734 p[3] = data;
2735 using the bswap framework. */
2736 if (info->bitpos == merged_store->start + merged_store->width
2737 && merged_store->stores.length () == 1
2738 && merged_store->stores[0]->ins_stmt != NULL
2739 && info->ins_stmt != NULL)
2740 {
2741 unsigned int try_size;
2742 for (try_size = 64; try_size >= 16; try_size >>= 1)
2743 if (try_coalesce_bswap (merged_store, i - 1, try_size))
2744 break;
2745
2746 if (try_size >= 16)
2747 {
2748 ignore = i + merged_store->stores.length () - 1;
2749 m_merged_store_groups.safe_push (merged_store);
2750 if (ignore < m_store_info.length ())
2751 merged_store = new merged_store_group (m_store_info[ignore]);
2752 else
2753 merged_store = NULL;
2754 goto done;
2755 }
2756 }
2757
2758 new_bitregion_start
2759 = MIN (merged_store->bitregion_start, info->bitregion_start);
2760 new_bitregion_end
2761 = MAX (merged_store->bitregion_end, info->bitregion_end);
2762
2763 if (info->order >= merged_store->first_nonmergeable_order
2764 || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
2765 > (unsigned) PARAM_VALUE (PARAM_STORE_MERGING_MAX_SIZE)))
2766 ;
2767
2768 /* |---store 1---|
2769 |---store 2---|
2770 Overlapping stores. */
2771 else if (IN_RANGE (info->bitpos, merged_store->start,
2772 merged_store->start + merged_store->width - 1))
2773 {
2774 /* Only allow overlapping stores of constants. */
2775 if (info->rhs_code == INTEGER_CST
2776 && merged_store->only_constants
2777 && info->lp_nr == merged_store->lp_nr)
2778 {
2779 unsigned int last_order
2780 = MAX (merged_store->last_order, info->order);
2781 unsigned HOST_WIDE_INT end
2782 = MAX (merged_store->start + merged_store->width,
2783 info->bitpos + info->bitsize);
2784 if (check_no_overlap (m_store_info, i, INTEGER_CST,
2785 last_order, end))
2786 {
2787 /* check_no_overlap call above made sure there are no
2788 overlapping stores with non-INTEGER_CST rhs_code
2789 in between the first and last of the stores we've
2790 just merged. If there are any INTEGER_CST rhs_code
2791 stores in between, we need to merge_overlapping them
2792 even if in the sort_by_bitpos order there are other
2793 overlapping stores in between. Keep those stores as is.
2794 Example:
2795 MEM[(int *)p_28] = 0;
2796 MEM[(char *)p_28 + 3B] = 1;
2797 MEM[(char *)p_28 + 1B] = 2;
2798 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
2799 We can't merge the zero store with the store of two and
2800 not merge anything else, because the store of one is
2801 in the original order in between those two, but in
2802 store_by_bitpos order it comes after the last store that
2803 we can't merge with them. We can merge the first 3 stores
2804 and keep the last store as is though. */
2805 unsigned int len = m_store_info.length ();
2806 unsigned int try_order = last_order;
2807 unsigned int first_nonmergeable_order;
2808 unsigned int k;
2809 bool last_iter = false;
2810 int attempts = 0;
2811 do
2812 {
2813 unsigned int max_order = 0;
2814 unsigned first_nonmergeable_int_order = ~0U;
2815 unsigned HOST_WIDE_INT this_end = end;
2816 k = i;
2817 first_nonmergeable_order = ~0U;
2818 for (unsigned int j = i + 1; j < len; ++j)
2819 {
2820 store_immediate_info *info2 = m_store_info[j];
2821 if (info2->bitpos >= this_end)
2822 break;
2823 if (info2->order < try_order)
2824 {
2825 if (info2->rhs_code != INTEGER_CST)
2826 {
2827 /* Normally check_no_overlap makes sure this
2828 doesn't happen, but if end grows below,
2829 then we need to process more stores than
2830 check_no_overlap verified. Example:
2831 MEM[(int *)p_5] = 0;
2832 MEM[(short *)p_5 + 3B] = 1;
2833 MEM[(char *)p_5 + 4B] = _9;
2834 MEM[(char *)p_5 + 2B] = 2; */
2835 k = 0;
2836 break;
2837 }
2838 k = j;
2839 this_end = MAX (this_end,
2840 info2->bitpos + info2->bitsize);
2841 }
2842 else if (info2->rhs_code == INTEGER_CST
2843 && !last_iter)
2844 {
2845 max_order = MAX (max_order, info2->order + 1);
2846 first_nonmergeable_int_order
2847 = MIN (first_nonmergeable_int_order,
2848 info2->order);
2849 }
2850 else
2851 first_nonmergeable_order
2852 = MIN (first_nonmergeable_order, info2->order);
2853 }
2854 if (k == 0)
2855 {
2856 if (last_order == try_order)
2857 break;
2858 /* If this failed, but only because we grew
2859 try_order, retry with the last working one,
2860 so that we merge at least something. */
2861 try_order = last_order;
2862 last_iter = true;
2863 continue;
2864 }
2865 last_order = try_order;
2866 /* Retry with a larger try_order to see if we could
2867 merge some further INTEGER_CST stores. */
2868 if (max_order
2869 && (first_nonmergeable_int_order
2870 < first_nonmergeable_order))
2871 {
2872 try_order = MIN (max_order,
2873 first_nonmergeable_order);
2874 try_order
2875 = MIN (try_order,
2876 merged_store->first_nonmergeable_order);
2877 if (try_order > last_order && ++attempts < 16)
2878 continue;
2879 }
2880 first_nonmergeable_order
2881 = MIN (first_nonmergeable_order,
2882 first_nonmergeable_int_order);
2883 end = this_end;
2884 break;
2885 }
2886 while (1);
2887
2888 if (k != 0)
2889 {
2890 merged_store->merge_overlapping (info);
2891
2892 merged_store->first_nonmergeable_order
2893 = MIN (merged_store->first_nonmergeable_order,
2894 first_nonmergeable_order);
2895
2896 for (unsigned int j = i + 1; j <= k; j++)
2897 {
2898 store_immediate_info *info2 = m_store_info[j];
2899 gcc_assert (info2->bitpos < end);
2900 if (info2->order < last_order)
2901 {
2902 gcc_assert (info2->rhs_code == INTEGER_CST);
2903 if (info != info2)
2904 merged_store->merge_overlapping (info2);
2905 }
2906 /* Other stores are kept and not merged in any
2907 way. */
2908 }
2909 ignore = k;
2910 goto done;
2911 }
2912 }
2913 }
2914 }
2915 /* |---store 1---||---store 2---|
2916 This store is consecutive to the previous one.
2917 Merge it into the current store group. There can be gaps in between
2918 the stores, but there can't be gaps in between bitregions. */
2919 else if (info->bitregion_start <= merged_store->bitregion_end
2920 && merged_store->can_be_merged_into (info))
2921 {
2922 store_immediate_info *infof = merged_store->stores[0];
2923
2924 /* All the rhs_code ops that take 2 operands are commutative,
2925 swap the operands if it could make the operands compatible. */
2926 if (infof->ops[0].base_addr
2927 && infof->ops[1].base_addr
2928 && info->ops[0].base_addr
2929 && info->ops[1].base_addr
2930 && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
2931 info->bitpos - infof->bitpos)
2932 && operand_equal_p (info->ops[1].base_addr,
2933 infof->ops[0].base_addr, 0))
2934 {
2935 std::swap (info->ops[0], info->ops[1]);
2936 info->ops_swapped_p = true;
2937 }
2938 if (check_no_overlap (m_store_info, i, info->rhs_code,
2939 MAX (merged_store->last_order, info->order),
2940 MAX (merged_store->start + merged_store->width,
2941 info->bitpos + info->bitsize)))
2942 {
2943 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
2944 if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
2945 {
2946 info->rhs_code = BIT_INSERT_EXPR;
2947 info->ops[0].val = gimple_assign_rhs1 (info->stmt);
2948 info->ops[0].base_addr = NULL_TREE;
2949 }
2950 else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
2951 {
2952 store_immediate_info *infoj;
2953 unsigned int j;
2954 FOR_EACH_VEC_ELT (merged_store->stores, j, infoj)
2955 {
2956 infoj->rhs_code = BIT_INSERT_EXPR;
2957 infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
2958 infoj->ops[0].base_addr = NULL_TREE;
2959 }
2960 }
2961 if ((infof->ops[0].base_addr
2962 ? compatible_load_p (merged_store, info, base_addr, 0)
2963 : !info->ops[0].base_addr)
2964 && (infof->ops[1].base_addr
2965 ? compatible_load_p (merged_store, info, base_addr, 1)
2966 : !info->ops[1].base_addr))
2967 {
2968 merged_store->merge_into (info);
2969 goto done;
2970 }
2971 }
2972 }
2973
2974 /* |---store 1---| <gap> |---store 2---|.
2975 Gap between stores or the rhs not compatible. Start a new group. */
2976
2977 /* Try to apply all the stores recorded for the group to determine
2978 the bitpattern they write and discard it if that fails.
2979 This will also reject single-store groups. */
2980 if (merged_store->apply_stores ())
2981 m_merged_store_groups.safe_push (merged_store);
2982 else
2983 delete merged_store;
2984
2985 merged_store = new merged_store_group (info);
2986 if (dump_file && (dump_flags & TDF_DETAILS))
2987 fputs ("New store group\n", dump_file);
2988
2989 done:
2990 if (dump_file && (dump_flags & TDF_DETAILS))
2991 {
2992 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
2993 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
2994 i, info->bitsize, info->bitpos);
2995 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
2996 fputc ('\n', dump_file);
2997 }
2998 }
2999
3000 /* Record or discard the last store group. */
3001 if (merged_store)
3002 {
3003 if (merged_store->apply_stores ())
3004 m_merged_store_groups.safe_push (merged_store);
3005 else
3006 delete merged_store;
3007 }
3008
3009 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
3010
3011 bool success
3012 = !m_merged_store_groups.is_empty ()
3013 && m_merged_store_groups.length () < m_store_info.length ();
3014
3015 if (success && dump_file)
3016 fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
3017 m_merged_store_groups.length ());
3018
3019 return success;
3020 }
3021
3022 /* Return the type to use for the merged stores or loads described by STMTS.
3023 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3024 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3025 of the MEM_REFs if any. */
3026
3027 static tree
3028 get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3029 unsigned short *cliquep, unsigned short *basep)
3030 {
3031 gimple *stmt;
3032 unsigned int i;
3033 tree type = NULL_TREE;
3034 tree ret = NULL_TREE;
3035 *cliquep = 0;
3036 *basep = 0;
3037
3038 FOR_EACH_VEC_ELT (stmts, i, stmt)
3039 {
3040 tree ref = is_load ? gimple_assign_rhs1 (stmt)
3041 : gimple_assign_lhs (stmt);
3042 tree type1 = reference_alias_ptr_type (ref);
3043 tree base = get_base_address (ref);
3044
3045 if (i == 0)
3046 {
3047 if (TREE_CODE (base) == MEM_REF)
3048 {
3049 *cliquep = MR_DEPENDENCE_CLIQUE (base);
3050 *basep = MR_DEPENDENCE_BASE (base);
3051 }
3052 ret = type = type1;
3053 continue;
3054 }
3055 if (!alias_ptr_types_compatible_p (type, type1))
3056 ret = ptr_type_node;
3057 if (TREE_CODE (base) != MEM_REF
3058 || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3059 || *basep != MR_DEPENDENCE_BASE (base))
3060 {
3061 *cliquep = 0;
3062 *basep = 0;
3063 }
3064 }
3065 return ret;
3066 }
3067
3068 /* Return the location_t information we can find among the statements
3069 in STMTS. */
3070
3071 static location_t
3072 get_location_for_stmts (vec<gimple *> &stmts)
3073 {
3074 gimple *stmt;
3075 unsigned int i;
3076
3077 FOR_EACH_VEC_ELT (stmts, i, stmt)
3078 if (gimple_has_location (stmt))
3079 return gimple_location (stmt);
3080
3081 return UNKNOWN_LOCATION;
3082 }
3083
3084 /* Used to decribe a store resulting from splitting a wide store in smaller
3085 regularly-sized stores in split_group. */
3086
3087 class split_store
3088 {
3089 public:
3090 unsigned HOST_WIDE_INT bytepos;
3091 unsigned HOST_WIDE_INT size;
3092 unsigned HOST_WIDE_INT align;
3093 auto_vec<store_immediate_info *> orig_stores;
3094 /* True if there is a single orig stmt covering the whole split store. */
3095 bool orig;
3096 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3097 unsigned HOST_WIDE_INT);
3098 };
3099
3100 /* Simple constructor. */
3101
3102 split_store::split_store (unsigned HOST_WIDE_INT bp,
3103 unsigned HOST_WIDE_INT sz,
3104 unsigned HOST_WIDE_INT al)
3105 : bytepos (bp), size (sz), align (al), orig (false)
3106 {
3107 orig_stores.create (0);
3108 }
3109
3110 /* Record all stores in GROUP that write to the region starting at BITPOS and
3111 is of size BITSIZE. Record infos for such statements in STORES if
3112 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
3113 if there is exactly one original store in the range. */
3114
3115 static store_immediate_info *
3116 find_constituent_stores (class merged_store_group *group,
3117 vec<store_immediate_info *> *stores,
3118 unsigned int *first,
3119 unsigned HOST_WIDE_INT bitpos,
3120 unsigned HOST_WIDE_INT bitsize)
3121 {
3122 store_immediate_info *info, *ret = NULL;
3123 unsigned int i;
3124 bool second = false;
3125 bool update_first = true;
3126 unsigned HOST_WIDE_INT end = bitpos + bitsize;
3127 for (i = *first; group->stores.iterate (i, &info); ++i)
3128 {
3129 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3130 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
3131 if (stmt_end <= bitpos)
3132 {
3133 /* BITPOS passed to this function never decreases from within the
3134 same split_group call, so optimize and don't scan info records
3135 which are known to end before or at BITPOS next time.
3136 Only do it if all stores before this one also pass this. */
3137 if (update_first)
3138 *first = i + 1;
3139 continue;
3140 }
3141 else
3142 update_first = false;
3143
3144 /* The stores in GROUP are ordered by bitposition so if we're past
3145 the region for this group return early. */
3146 if (stmt_start >= end)
3147 return ret;
3148
3149 if (stores)
3150 {
3151 stores->safe_push (info);
3152 if (ret)
3153 {
3154 ret = NULL;
3155 second = true;
3156 }
3157 }
3158 else if (ret)
3159 return NULL;
3160 if (!second)
3161 ret = info;
3162 }
3163 return ret;
3164 }
3165
3166 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3167 store have multiple uses. If any SSA_NAME has multiple uses, also
3168 count statements needed to compute it. */
3169
3170 static unsigned
3171 count_multiple_uses (store_immediate_info *info)
3172 {
3173 gimple *stmt = info->stmt;
3174 unsigned ret = 0;
3175 switch (info->rhs_code)
3176 {
3177 case INTEGER_CST:
3178 return 0;
3179 case BIT_AND_EXPR:
3180 case BIT_IOR_EXPR:
3181 case BIT_XOR_EXPR:
3182 if (info->bit_not_p)
3183 {
3184 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3185 ret = 1; /* Fall through below to return
3186 the BIT_NOT_EXPR stmt and then
3187 BIT_{AND,IOR,XOR}_EXPR and anything it
3188 uses. */
3189 else
3190 /* stmt is after this the BIT_NOT_EXPR. */
3191 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3192 }
3193 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3194 {
3195 ret += 1 + info->ops[0].bit_not_p;
3196 if (info->ops[1].base_addr)
3197 ret += 1 + info->ops[1].bit_not_p;
3198 return ret + 1;
3199 }
3200 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3201 /* stmt is now the BIT_*_EXPR. */
3202 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3203 ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3204 else if (info->ops[info->ops_swapped_p].bit_not_p)
3205 {
3206 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3207 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3208 ++ret;
3209 }
3210 if (info->ops[1].base_addr == NULL_TREE)
3211 {
3212 gcc_checking_assert (!info->ops_swapped_p);
3213 return ret;
3214 }
3215 if (!has_single_use (gimple_assign_rhs2 (stmt)))
3216 ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3217 else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
3218 {
3219 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3220 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3221 ++ret;
3222 }
3223 return ret;
3224 case MEM_REF:
3225 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3226 return 1 + info->ops[0].bit_not_p;
3227 else if (info->ops[0].bit_not_p)
3228 {
3229 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3230 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3231 return 1;
3232 }
3233 return 0;
3234 case BIT_INSERT_EXPR:
3235 return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
3236 default:
3237 gcc_unreachable ();
3238 }
3239 }
3240
3241 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3242 vector (if non-NULL) with split_store structs describing the byte offset
3243 (from the base), the bit size and alignment of each store as well as the
3244 original statements involved in each such split group.
3245 This is to separate the splitting strategy from the statement
3246 building/emission/linking done in output_merged_store.
3247 Return number of new stores.
3248 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3249 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3250 BZERO_FIRST may be true only when the first store covers the whole group
3251 and clears it; if BZERO_FIRST is true, keep that first store in the set
3252 unmodified and emit further stores for the overrides only.
3253 If SPLIT_STORES is NULL, it is just a dry run to count number of
3254 new stores. */
3255
3256 static unsigned int
3257 split_group (merged_store_group *group, bool allow_unaligned_store,
3258 bool allow_unaligned_load, bool bzero_first,
3259 vec<split_store *> *split_stores,
3260 unsigned *total_orig,
3261 unsigned *total_new)
3262 {
3263 unsigned HOST_WIDE_INT pos = group->bitregion_start;
3264 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
3265 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
3266 unsigned HOST_WIDE_INT group_align = group->align;
3267 unsigned HOST_WIDE_INT align_base = group->align_base;
3268 unsigned HOST_WIDE_INT group_load_align = group_align;
3269 bool any_orig = false;
3270
3271 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3272
3273 if (group->stores[0]->rhs_code == LROTATE_EXPR
3274 || group->stores[0]->rhs_code == NOP_EXPR)
3275 {
3276 gcc_assert (!bzero_first);
3277 /* For bswap framework using sets of stores, all the checking
3278 has been done earlier in try_coalesce_bswap and needs to be
3279 emitted as a single store. */
3280 if (total_orig)
3281 {
3282 /* Avoid the old/new stmt count heuristics. It should be
3283 always beneficial. */
3284 total_new[0] = 1;
3285 total_orig[0] = 2;
3286 }
3287
3288 if (split_stores)
3289 {
3290 unsigned HOST_WIDE_INT align_bitpos
3291 = (group->start - align_base) & (group_align - 1);
3292 unsigned HOST_WIDE_INT align = group_align;
3293 if (align_bitpos)
3294 align = least_bit_hwi (align_bitpos);
3295 bytepos = group->start / BITS_PER_UNIT;
3296 split_store *store
3297 = new split_store (bytepos, group->width, align);
3298 unsigned int first = 0;
3299 find_constituent_stores (group, &store->orig_stores,
3300 &first, group->start, group->width);
3301 split_stores->safe_push (store);
3302 }
3303
3304 return 1;
3305 }
3306
3307 unsigned int ret = 0, first = 0;
3308 unsigned HOST_WIDE_INT try_pos = bytepos;
3309
3310 if (total_orig)
3311 {
3312 unsigned int i;
3313 store_immediate_info *info = group->stores[0];
3314
3315 total_new[0] = 0;
3316 total_orig[0] = 1; /* The orig store. */
3317 info = group->stores[0];
3318 if (info->ops[0].base_addr)
3319 total_orig[0]++;
3320 if (info->ops[1].base_addr)
3321 total_orig[0]++;
3322 switch (info->rhs_code)
3323 {
3324 case BIT_AND_EXPR:
3325 case BIT_IOR_EXPR:
3326 case BIT_XOR_EXPR:
3327 total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
3328 break;
3329 default:
3330 break;
3331 }
3332 total_orig[0] *= group->stores.length ();
3333
3334 FOR_EACH_VEC_ELT (group->stores, i, info)
3335 {
3336 total_new[0] += count_multiple_uses (info);
3337 total_orig[0] += (info->bit_not_p
3338 + info->ops[0].bit_not_p
3339 + info->ops[1].bit_not_p);
3340 }
3341 }
3342
3343 if (!allow_unaligned_load)
3344 for (int i = 0; i < 2; ++i)
3345 if (group->load_align[i])
3346 group_load_align = MIN (group_load_align, group->load_align[i]);
3347
3348 if (bzero_first)
3349 {
3350 first = 1;
3351 ret = 1;
3352 if (split_stores)
3353 {
3354 split_store *store
3355 = new split_store (bytepos, group->stores[0]->bitsize, align_base);
3356 store->orig_stores.safe_push (group->stores[0]);
3357 store->orig = true;
3358 any_orig = true;
3359 split_stores->safe_push (store);
3360 }
3361 }
3362
3363 while (size > 0)
3364 {
3365 if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3366 && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3367 || (bzero_first && group->val[try_pos - bytepos] == 0)))
3368 {
3369 /* Skip padding bytes. */
3370 ++try_pos;
3371 size -= BITS_PER_UNIT;
3372 continue;
3373 }
3374
3375 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3376 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3377 unsigned HOST_WIDE_INT align_bitpos
3378 = (try_bitpos - align_base) & (group_align - 1);
3379 unsigned HOST_WIDE_INT align = group_align;
3380 if (align_bitpos)
3381 align = least_bit_hwi (align_bitpos);
3382 if (!allow_unaligned_store)
3383 try_size = MIN (try_size, align);
3384 if (!allow_unaligned_load)
3385 {
3386 /* If we can't do or don't want to do unaligned stores
3387 as well as loads, we need to take the loads into account
3388 as well. */
3389 unsigned HOST_WIDE_INT load_align = group_load_align;
3390 align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3391 if (align_bitpos)
3392 load_align = least_bit_hwi (align_bitpos);
3393 for (int i = 0; i < 2; ++i)
3394 if (group->load_align[i])
3395 {
3396 align_bitpos
3397 = known_alignment (try_bitpos
3398 - group->stores[0]->bitpos
3399 + group->stores[0]->ops[i].bitpos
3400 - group->load_align_base[i]);
3401 if (align_bitpos & (group_load_align - 1))
3402 {
3403 unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3404 load_align = MIN (load_align, a);
3405 }
3406 }
3407 try_size = MIN (try_size, load_align);
3408 }
3409 store_immediate_info *info
3410 = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
3411 if (info)
3412 {
3413 /* If there is just one original statement for the range, see if
3414 we can just reuse the original store which could be even larger
3415 than try_size. */
3416 unsigned HOST_WIDE_INT stmt_end
3417 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
3418 info = find_constituent_stores (group, NULL, &first, try_bitpos,
3419 stmt_end - try_bitpos);
3420 if (info && info->bitpos >= try_bitpos)
3421 {
3422 try_size = stmt_end - try_bitpos;
3423 goto found;
3424 }
3425 }
3426
3427 /* Approximate store bitsize for the case when there are no padding
3428 bits. */
3429 while (try_size > size)
3430 try_size /= 2;
3431 /* Now look for whole padding bytes at the end of that bitsize. */
3432 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3433 if (group->mask[try_pos - bytepos + nonmasked - 1]
3434 != (unsigned char) ~0U
3435 && (!bzero_first
3436 || group->val[try_pos - bytepos + nonmasked - 1] != 0))
3437 break;
3438 if (nonmasked == 0)
3439 {
3440 /* If entire try_size range is padding, skip it. */
3441 try_pos += try_size / BITS_PER_UNIT;
3442 size -= try_size;
3443 continue;
3444 }
3445 /* Otherwise try to decrease try_size if second half, last 3 quarters
3446 etc. are padding. */
3447 nonmasked *= BITS_PER_UNIT;
3448 while (nonmasked <= try_size / 2)
3449 try_size /= 2;
3450 if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
3451 {
3452 /* Now look for whole padding bytes at the start of that bitsize. */
3453 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3454 for (masked = 0; masked < try_bytesize; ++masked)
3455 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3456 && (!bzero_first
3457 || group->val[try_pos - bytepos + masked] != 0))
3458 break;
3459 masked *= BITS_PER_UNIT;
3460 gcc_assert (masked < try_size);
3461 if (masked >= try_size / 2)
3462 {
3463 while (masked >= try_size / 2)
3464 {
3465 try_size /= 2;
3466 try_pos += try_size / BITS_PER_UNIT;
3467 size -= try_size;
3468 masked -= try_size;
3469 }
3470 /* Need to recompute the alignment, so just retry at the new
3471 position. */
3472 continue;
3473 }
3474 }
3475
3476 found:
3477 ++ret;
3478
3479 if (split_stores)
3480 {
3481 split_store *store
3482 = new split_store (try_pos, try_size, align);
3483 info = find_constituent_stores (group, &store->orig_stores,
3484 &first, try_bitpos, try_size);
3485 if (info
3486 && info->bitpos >= try_bitpos
3487 && info->bitpos + info->bitsize <= try_bitpos + try_size)
3488 {
3489 store->orig = true;
3490 any_orig = true;
3491 }
3492 split_stores->safe_push (store);
3493 }
3494
3495 try_pos += try_size / BITS_PER_UNIT;
3496 size -= try_size;
3497 }
3498
3499 if (total_orig)
3500 {
3501 unsigned int i;
3502 split_store *store;
3503 /* If we are reusing some original stores and any of the
3504 original SSA_NAMEs had multiple uses, we need to subtract
3505 those now before we add the new ones. */
3506 if (total_new[0] && any_orig)
3507 {
3508 FOR_EACH_VEC_ELT (*split_stores, i, store)
3509 if (store->orig)
3510 total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3511 }
3512 total_new[0] += ret; /* The new store. */
3513 store_immediate_info *info = group->stores[0];
3514 if (info->ops[0].base_addr)
3515 total_new[0] += ret;
3516 if (info->ops[1].base_addr)
3517 total_new[0] += ret;
3518 switch (info->rhs_code)
3519 {
3520 case BIT_AND_EXPR:
3521 case BIT_IOR_EXPR:
3522 case BIT_XOR_EXPR:
3523 total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
3524 break;
3525 default:
3526 break;
3527 }
3528 FOR_EACH_VEC_ELT (*split_stores, i, store)
3529 {
3530 unsigned int j;
3531 bool bit_not_p[3] = { false, false, false };
3532 /* If all orig_stores have certain bit_not_p set, then
3533 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3534 If some orig_stores have certain bit_not_p set, then
3535 we'd use a BIT_XOR_EXPR with a mask and need to account for
3536 it. */
3537 FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3538 {
3539 if (info->ops[0].bit_not_p)
3540 bit_not_p[0] = true;
3541 if (info->ops[1].bit_not_p)
3542 bit_not_p[1] = true;
3543 if (info->bit_not_p)
3544 bit_not_p[2] = true;
3545 }
3546 total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3547 }
3548
3549 }
3550
3551 return ret;
3552 }
3553
3554 /* Return the operation through which the operand IDX (if < 2) or
3555 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3556 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3557 the bits should be xored with mask. */
3558
3559 static enum tree_code
3560 invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3561 {
3562 unsigned int i;
3563 store_immediate_info *info;
3564 unsigned int cnt = 0;
3565 bool any_paddings = false;
3566 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3567 {
3568 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3569 if (bit_not_p)
3570 {
3571 ++cnt;
3572 tree lhs = gimple_assign_lhs (info->stmt);
3573 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3574 && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3575 any_paddings = true;
3576 }
3577 }
3578 mask = NULL_TREE;
3579 if (cnt == 0)
3580 return NOP_EXPR;
3581 if (cnt == split_store->orig_stores.length () && !any_paddings)
3582 return BIT_NOT_EXPR;
3583
3584 unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3585 unsigned buf_size = split_store->size / BITS_PER_UNIT;
3586 unsigned char *buf
3587 = XALLOCAVEC (unsigned char, buf_size);
3588 memset (buf, ~0U, buf_size);
3589 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3590 {
3591 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3592 if (!bit_not_p)
3593 continue;
3594 /* Clear regions with bit_not_p and invert afterwards, rather than
3595 clear regions with !bit_not_p, so that gaps in between stores aren't
3596 set in the mask. */
3597 unsigned HOST_WIDE_INT bitsize = info->bitsize;
3598 unsigned HOST_WIDE_INT prec = bitsize;
3599 unsigned int pos_in_buffer = 0;
3600 if (any_paddings)
3601 {
3602 tree lhs = gimple_assign_lhs (info->stmt);
3603 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3604 && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
3605 prec = TYPE_PRECISION (TREE_TYPE (lhs));
3606 }
3607 if (info->bitpos < try_bitpos)
3608 {
3609 gcc_assert (info->bitpos + bitsize > try_bitpos);
3610 if (!BYTES_BIG_ENDIAN)
3611 {
3612 if (prec <= try_bitpos - info->bitpos)
3613 continue;
3614 prec -= try_bitpos - info->bitpos;
3615 }
3616 bitsize -= try_bitpos - info->bitpos;
3617 if (BYTES_BIG_ENDIAN && prec > bitsize)
3618 prec = bitsize;
3619 }
3620 else
3621 pos_in_buffer = info->bitpos - try_bitpos;
3622 if (prec < bitsize)
3623 {
3624 /* If this is a bool inversion, invert just the least significant
3625 prec bits rather than all bits of it. */
3626 if (BYTES_BIG_ENDIAN)
3627 {
3628 pos_in_buffer += bitsize - prec;
3629 if (pos_in_buffer >= split_store->size)
3630 continue;
3631 }
3632 bitsize = prec;
3633 }
3634 if (pos_in_buffer + bitsize > split_store->size)
3635 bitsize = split_store->size - pos_in_buffer;
3636 unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
3637 if (BYTES_BIG_ENDIAN)
3638 clear_bit_region_be (p, (BITS_PER_UNIT - 1
3639 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
3640 else
3641 clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
3642 }
3643 for (unsigned int i = 0; i < buf_size; ++i)
3644 buf[i] = ~buf[i];
3645 mask = native_interpret_expr (int_type, buf, buf_size);
3646 return BIT_XOR_EXPR;
3647 }
3648
3649 /* Given a merged store group GROUP output the widened version of it.
3650 The store chain is against the base object BASE.
3651 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
3652 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
3653 Make sure that the number of statements output is less than the number of
3654 original statements. If a better sequence is possible emit it and
3655 return true. */
3656
3657 bool
3658 imm_store_chain_info::output_merged_store (merged_store_group *group)
3659 {
3660 split_store *split_store;
3661 unsigned int i;
3662 unsigned HOST_WIDE_INT start_byte_pos
3663 = group->bitregion_start / BITS_PER_UNIT;
3664
3665 unsigned int orig_num_stmts = group->stores.length ();
3666 if (orig_num_stmts < 2)
3667 return false;
3668
3669 auto_vec<class split_store *, 32> split_stores;
3670 bool allow_unaligned_store
3671 = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
3672 bool allow_unaligned_load = allow_unaligned_store;
3673 bool bzero_first = false;
3674 if (group->stores[0]->rhs_code == INTEGER_CST
3675 && TREE_CODE (gimple_assign_rhs1 (group->stores[0]->stmt)) == CONSTRUCTOR
3676 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (group->stores[0]->stmt)) == 0
3677 && group->start == group->stores[0]->bitpos
3678 && group->width == group->stores[0]->bitsize
3679 && (group->start % BITS_PER_UNIT) == 0
3680 && (group->width % BITS_PER_UNIT) == 0)
3681 bzero_first = true;
3682 if (allow_unaligned_store || bzero_first)
3683 {
3684 /* If unaligned stores are allowed, see how many stores we'd emit
3685 for unaligned and how many stores we'd emit for aligned stores.
3686 Only use unaligned stores if it allows fewer stores than aligned.
3687 Similarly, if there is a whole region clear first, prefer expanding
3688 it together compared to expanding clear first followed by merged
3689 further stores. */
3690 unsigned cnt[4] = { ~0, ~0, ~0, ~0 };
3691 int pass_min = 0;
3692 for (int pass = 0; pass < 4; ++pass)
3693 {
3694 if (!allow_unaligned_store && (pass & 1) != 0)
3695 continue;
3696 if (!bzero_first && (pass & 2) != 0)
3697 continue;
3698 cnt[pass] = split_group (group, (pass & 1) != 0,
3699 allow_unaligned_load, (pass & 2) != 0,
3700 NULL, NULL, NULL);
3701 if (cnt[pass] < cnt[pass_min])
3702 pass_min = pass;
3703 }
3704 if ((pass_min & 1) == 0)
3705 allow_unaligned_store = false;
3706 if ((pass_min & 2) == 0)
3707 bzero_first = false;
3708 }
3709 unsigned total_orig, total_new;
3710 split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
3711 &split_stores, &total_orig, &total_new);
3712
3713 if (split_stores.length () >= orig_num_stmts)
3714 {
3715 /* We didn't manage to reduce the number of statements. Bail out. */
3716 if (dump_file && (dump_flags & TDF_DETAILS))
3717 fprintf (dump_file, "Exceeded original number of stmts (%u)."
3718 " Not profitable to emit new sequence.\n",
3719 orig_num_stmts);
3720 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3721 delete split_store;
3722 return false;
3723 }
3724 if (total_orig <= total_new)
3725 {
3726 /* If number of estimated new statements is above estimated original
3727 statements, bail out too. */
3728 if (dump_file && (dump_flags & TDF_DETAILS))
3729 fprintf (dump_file, "Estimated number of original stmts (%u)"
3730 " not larger than estimated number of new"
3731 " stmts (%u).\n",
3732 total_orig, total_new);
3733 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3734 delete split_store;
3735 return false;
3736 }
3737
3738 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
3739 gimple_seq seq = NULL;
3740 tree last_vdef, new_vuse;
3741 last_vdef = gimple_vdef (group->last_stmt);
3742 new_vuse = gimple_vuse (group->last_stmt);
3743 tree bswap_res = NULL_TREE;
3744
3745 if (group->stores[0]->rhs_code == LROTATE_EXPR
3746 || group->stores[0]->rhs_code == NOP_EXPR)
3747 {
3748 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
3749 gimple *ins_stmt = group->stores[0]->ins_stmt;
3750 struct symbolic_number *n = &group->stores[0]->n;
3751 bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
3752
3753 switch (n->range)
3754 {
3755 case 16:
3756 load_type = bswap_type = uint16_type_node;
3757 break;
3758 case 32:
3759 load_type = uint32_type_node;
3760 if (bswap)
3761 {
3762 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
3763 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3764 }
3765 break;
3766 case 64:
3767 load_type = uint64_type_node;
3768 if (bswap)
3769 {
3770 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
3771 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3772 }
3773 break;
3774 default:
3775 gcc_unreachable ();
3776 }
3777
3778 /* If the loads have each vuse of the corresponding store,
3779 we've checked the aliasing already in try_coalesce_bswap and
3780 we want to sink the need load into seq. So need to use new_vuse
3781 on the load. */
3782 if (n->base_addr)
3783 {
3784 if (n->vuse == NULL)
3785 {
3786 n->vuse = new_vuse;
3787 ins_stmt = NULL;
3788 }
3789 else
3790 /* Update vuse in case it has changed by output_merged_stores. */
3791 n->vuse = gimple_vuse (ins_stmt);
3792 }
3793 bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
3794 bswap_type, load_type, n, bswap);
3795 gcc_assert (bswap_res);
3796 }
3797
3798 gimple *stmt = NULL;
3799 auto_vec<gimple *, 32> orig_stmts;
3800 gimple_seq this_seq;
3801 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
3802 is_gimple_mem_ref_addr, NULL_TREE);
3803 gimple_seq_add_seq_without_update (&seq, this_seq);
3804
3805 tree load_addr[2] = { NULL_TREE, NULL_TREE };
3806 gimple_seq load_seq[2] = { NULL, NULL };
3807 gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
3808 for (int j = 0; j < 2; ++j)
3809 {
3810 store_operand_info &op = group->stores[0]->ops[j];
3811 if (op.base_addr == NULL_TREE)
3812 continue;
3813
3814 store_immediate_info *infol = group->stores.last ();
3815 if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
3816 {
3817 /* We can't pick the location randomly; while we've verified
3818 all the loads have the same vuse, they can be still in different
3819 basic blocks and we need to pick the one from the last bb:
3820 int x = q[0];
3821 if (x == N) return;
3822 int y = q[1];
3823 p[0] = x;
3824 p[1] = y;
3825 otherwise if we put the wider load at the q[0] load, we might
3826 segfault if q[1] is not mapped. */
3827 basic_block bb = gimple_bb (op.stmt);
3828 gimple *ostmt = op.stmt;
3829 store_immediate_info *info;
3830 FOR_EACH_VEC_ELT (group->stores, i, info)
3831 {
3832 gimple *tstmt = info->ops[j].stmt;
3833 basic_block tbb = gimple_bb (tstmt);
3834 if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
3835 {
3836 ostmt = tstmt;
3837 bb = tbb;
3838 }
3839 }
3840 load_gsi[j] = gsi_for_stmt (ostmt);
3841 load_addr[j]
3842 = force_gimple_operand_1 (unshare_expr (op.base_addr),
3843 &load_seq[j], is_gimple_mem_ref_addr,
3844 NULL_TREE);
3845 }
3846 else if (operand_equal_p (base_addr, op.base_addr, 0))
3847 load_addr[j] = addr;
3848 else
3849 {
3850 load_addr[j]
3851 = force_gimple_operand_1 (unshare_expr (op.base_addr),
3852 &this_seq, is_gimple_mem_ref_addr,
3853 NULL_TREE);
3854 gimple_seq_add_seq_without_update (&seq, this_seq);
3855 }
3856 }
3857
3858 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3859 {
3860 unsigned HOST_WIDE_INT try_size = split_store->size;
3861 unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
3862 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3863 unsigned HOST_WIDE_INT align = split_store->align;
3864 tree dest, src;
3865 location_t loc;
3866 if (split_store->orig)
3867 {
3868 /* If there is just a single constituent store which covers
3869 the whole area, just reuse the lhs and rhs. */
3870 gimple *orig_stmt = split_store->orig_stores[0]->stmt;
3871 dest = gimple_assign_lhs (orig_stmt);
3872 src = gimple_assign_rhs1 (orig_stmt);
3873 loc = gimple_location (orig_stmt);
3874 }
3875 else
3876 {
3877 store_immediate_info *info;
3878 unsigned short clique, base;
3879 unsigned int k;
3880 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
3881 orig_stmts.safe_push (info->stmt);
3882 tree offset_type
3883 = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
3884 loc = get_location_for_stmts (orig_stmts);
3885 orig_stmts.truncate (0);
3886
3887 tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
3888 int_type = build_aligned_type (int_type, align);
3889 dest = fold_build2 (MEM_REF, int_type, addr,
3890 build_int_cst (offset_type, try_pos));
3891 if (TREE_CODE (dest) == MEM_REF)
3892 {
3893 MR_DEPENDENCE_CLIQUE (dest) = clique;
3894 MR_DEPENDENCE_BASE (dest) = base;
3895 }
3896
3897 tree mask;
3898 if (bswap_res)
3899 mask = integer_zero_node;
3900 else
3901 mask = native_interpret_expr (int_type,
3902 group->mask + try_pos
3903 - start_byte_pos,
3904 group->buf_size);
3905
3906 tree ops[2];
3907 for (int j = 0;
3908 j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
3909 ++j)
3910 {
3911 store_operand_info &op = split_store->orig_stores[0]->ops[j];
3912 if (bswap_res)
3913 ops[j] = bswap_res;
3914 else if (op.base_addr)
3915 {
3916 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
3917 orig_stmts.safe_push (info->ops[j].stmt);
3918
3919 offset_type = get_alias_type_for_stmts (orig_stmts, true,
3920 &clique, &base);
3921 location_t load_loc = get_location_for_stmts (orig_stmts);
3922 orig_stmts.truncate (0);
3923
3924 unsigned HOST_WIDE_INT load_align = group->load_align[j];
3925 unsigned HOST_WIDE_INT align_bitpos
3926 = known_alignment (try_bitpos
3927 - split_store->orig_stores[0]->bitpos
3928 + op.bitpos);
3929 if (align_bitpos & (load_align - 1))
3930 load_align = least_bit_hwi (align_bitpos);
3931
3932 tree load_int_type
3933 = build_nonstandard_integer_type (try_size, UNSIGNED);
3934 load_int_type
3935 = build_aligned_type (load_int_type, load_align);
3936
3937 poly_uint64 load_pos
3938 = exact_div (try_bitpos
3939 - split_store->orig_stores[0]->bitpos
3940 + op.bitpos,
3941 BITS_PER_UNIT);
3942 ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
3943 build_int_cst (offset_type, load_pos));
3944 if (TREE_CODE (ops[j]) == MEM_REF)
3945 {
3946 MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
3947 MR_DEPENDENCE_BASE (ops[j]) = base;
3948 }
3949 if (!integer_zerop (mask))
3950 /* The load might load some bits (that will be masked off
3951 later on) uninitialized, avoid -W*uninitialized
3952 warnings in that case. */
3953 TREE_NO_WARNING (ops[j]) = 1;
3954
3955 stmt = gimple_build_assign (make_ssa_name (int_type),
3956 ops[j]);
3957 gimple_set_location (stmt, load_loc);
3958 if (gsi_bb (load_gsi[j]))
3959 {
3960 gimple_set_vuse (stmt, gimple_vuse (op.stmt));
3961 gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
3962 }
3963 else
3964 {
3965 gimple_set_vuse (stmt, new_vuse);
3966 gimple_seq_add_stmt_without_update (&seq, stmt);
3967 }
3968 ops[j] = gimple_assign_lhs (stmt);
3969 tree xor_mask;
3970 enum tree_code inv_op
3971 = invert_op (split_store, j, int_type, xor_mask);
3972 if (inv_op != NOP_EXPR)
3973 {
3974 stmt = gimple_build_assign (make_ssa_name (int_type),
3975 inv_op, ops[j], xor_mask);
3976 gimple_set_location (stmt, load_loc);
3977 ops[j] = gimple_assign_lhs (stmt);
3978
3979 if (gsi_bb (load_gsi[j]))
3980 gimple_seq_add_stmt_without_update (&load_seq[j],
3981 stmt);
3982 else
3983 gimple_seq_add_stmt_without_update (&seq, stmt);
3984 }
3985 }
3986 else
3987 ops[j] = native_interpret_expr (int_type,
3988 group->val + try_pos
3989 - start_byte_pos,
3990 group->buf_size);
3991 }
3992
3993 switch (split_store->orig_stores[0]->rhs_code)
3994 {
3995 case BIT_AND_EXPR:
3996 case BIT_IOR_EXPR:
3997 case BIT_XOR_EXPR:
3998 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
3999 {
4000 tree rhs1 = gimple_assign_rhs1 (info->stmt);
4001 orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4002 }
4003 location_t bit_loc;
4004 bit_loc = get_location_for_stmts (orig_stmts);
4005 orig_stmts.truncate (0);
4006
4007 stmt
4008 = gimple_build_assign (make_ssa_name (int_type),
4009 split_store->orig_stores[0]->rhs_code,
4010 ops[0], ops[1]);
4011 gimple_set_location (stmt, bit_loc);
4012 /* If there is just one load and there is a separate
4013 load_seq[0], emit the bitwise op right after it. */
4014 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4015 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4016 /* Otherwise, if at least one load is in seq, we need to
4017 emit the bitwise op right before the store. If there
4018 are two loads and are emitted somewhere else, it would
4019 be better to emit the bitwise op as early as possible;
4020 we don't track where that would be possible right now
4021 though. */
4022 else
4023 gimple_seq_add_stmt_without_update (&seq, stmt);
4024 src = gimple_assign_lhs (stmt);
4025 tree xor_mask;
4026 enum tree_code inv_op;
4027 inv_op = invert_op (split_store, 2, int_type, xor_mask);
4028 if (inv_op != NOP_EXPR)
4029 {
4030 stmt = gimple_build_assign (make_ssa_name (int_type),
4031 inv_op, src, xor_mask);
4032 gimple_set_location (stmt, bit_loc);
4033 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4034 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4035 else
4036 gimple_seq_add_stmt_without_update (&seq, stmt);
4037 src = gimple_assign_lhs (stmt);
4038 }
4039 break;
4040 case LROTATE_EXPR:
4041 case NOP_EXPR:
4042 src = ops[0];
4043 if (!is_gimple_val (src))
4044 {
4045 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4046 src);
4047 gimple_seq_add_stmt_without_update (&seq, stmt);
4048 src = gimple_assign_lhs (stmt);
4049 }
4050 if (!useless_type_conversion_p (int_type, TREE_TYPE (src)))
4051 {
4052 stmt = gimple_build_assign (make_ssa_name (int_type),
4053 NOP_EXPR, src);
4054 gimple_seq_add_stmt_without_update (&seq, stmt);
4055 src = gimple_assign_lhs (stmt);
4056 }
4057 inv_op = invert_op (split_store, 2, int_type, xor_mask);
4058 if (inv_op != NOP_EXPR)
4059 {
4060 stmt = gimple_build_assign (make_ssa_name (int_type),
4061 inv_op, src, xor_mask);
4062 gimple_set_location (stmt, loc);
4063 gimple_seq_add_stmt_without_update (&seq, stmt);
4064 src = gimple_assign_lhs (stmt);
4065 }
4066 break;
4067 default:
4068 src = ops[0];
4069 break;
4070 }
4071
4072 /* If bit insertion is required, we use the source as an accumulator
4073 into which the successive bit-field values are manually inserted.
4074 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4075 if (group->bit_insertion)
4076 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4077 if (info->rhs_code == BIT_INSERT_EXPR
4078 && info->bitpos < try_bitpos + try_size
4079 && info->bitpos + info->bitsize > try_bitpos)
4080 {
4081 /* Mask, truncate, convert to final type, shift and ior into
4082 the accumulator. Note that every step can be a no-op. */
4083 const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4084 const HOST_WIDE_INT end_gap
4085 = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4086 tree tem = info->ops[0].val;
4087 if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4088 {
4089 tree bitfield_type
4090 = build_nonstandard_integer_type (info->bitsize,
4091 UNSIGNED);
4092 tem = gimple_convert (&seq, loc, bitfield_type, tem);
4093 }
4094 else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
4095 {
4096 const unsigned HOST_WIDE_INT imask
4097 = (HOST_WIDE_INT_1U << info->bitsize) - 1;
4098 tem = gimple_build (&seq, loc,
4099 BIT_AND_EXPR, TREE_TYPE (tem), tem,
4100 build_int_cst (TREE_TYPE (tem),
4101 imask));
4102 }
4103 const HOST_WIDE_INT shift
4104 = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4105 if (shift < 0)
4106 tem = gimple_build (&seq, loc,
4107 RSHIFT_EXPR, TREE_TYPE (tem), tem,
4108 build_int_cst (NULL_TREE, -shift));
4109 tem = gimple_convert (&seq, loc, int_type, tem);
4110 if (shift > 0)
4111 tem = gimple_build (&seq, loc,
4112 LSHIFT_EXPR, int_type, tem,
4113 build_int_cst (NULL_TREE, shift));
4114 src = gimple_build (&seq, loc,
4115 BIT_IOR_EXPR, int_type, tem, src);
4116 }
4117
4118 if (!integer_zerop (mask))
4119 {
4120 tree tem = make_ssa_name (int_type);
4121 tree load_src = unshare_expr (dest);
4122 /* The load might load some or all bits uninitialized,
4123 avoid -W*uninitialized warnings in that case.
4124 As optimization, it would be nice if all the bits are
4125 provably uninitialized (no stores at all yet or previous
4126 store a CLOBBER) we'd optimize away the load and replace
4127 it e.g. with 0. */
4128 TREE_NO_WARNING (load_src) = 1;
4129 stmt = gimple_build_assign (tem, load_src);
4130 gimple_set_location (stmt, loc);
4131 gimple_set_vuse (stmt, new_vuse);
4132 gimple_seq_add_stmt_without_update (&seq, stmt);
4133
4134 /* FIXME: If there is a single chunk of zero bits in mask,
4135 perhaps use BIT_INSERT_EXPR instead? */
4136 stmt = gimple_build_assign (make_ssa_name (int_type),
4137 BIT_AND_EXPR, tem, mask);
4138 gimple_set_location (stmt, loc);
4139 gimple_seq_add_stmt_without_update (&seq, stmt);
4140 tem = gimple_assign_lhs (stmt);
4141
4142 if (TREE_CODE (src) == INTEGER_CST)
4143 src = wide_int_to_tree (int_type,
4144 wi::bit_and_not (wi::to_wide (src),
4145 wi::to_wide (mask)));
4146 else
4147 {
4148 tree nmask
4149 = wide_int_to_tree (int_type,
4150 wi::bit_not (wi::to_wide (mask)));
4151 stmt = gimple_build_assign (make_ssa_name (int_type),
4152 BIT_AND_EXPR, src, nmask);
4153 gimple_set_location (stmt, loc);
4154 gimple_seq_add_stmt_without_update (&seq, stmt);
4155 src = gimple_assign_lhs (stmt);
4156 }
4157 stmt = gimple_build_assign (make_ssa_name (int_type),
4158 BIT_IOR_EXPR, tem, src);
4159 gimple_set_location (stmt, loc);
4160 gimple_seq_add_stmt_without_update (&seq, stmt);
4161 src = gimple_assign_lhs (stmt);
4162 }
4163 }
4164
4165 stmt = gimple_build_assign (dest, src);
4166 gimple_set_location (stmt, loc);
4167 gimple_set_vuse (stmt, new_vuse);
4168 gimple_seq_add_stmt_without_update (&seq, stmt);
4169
4170 if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4171 add_stmt_to_eh_lp (stmt, group->lp_nr);
4172
4173 tree new_vdef;
4174 if (i < split_stores.length () - 1)
4175 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
4176 else
4177 new_vdef = last_vdef;
4178
4179 gimple_set_vdef (stmt, new_vdef);
4180 SSA_NAME_DEF_STMT (new_vdef) = stmt;
4181 new_vuse = new_vdef;
4182 }
4183
4184 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4185 delete split_store;
4186
4187 gcc_assert (seq);
4188 if (dump_file)
4189 {
4190 fprintf (dump_file,
4191 "New sequence of %u stores to replace old one of %u stores\n",
4192 split_stores.length (), orig_num_stmts);
4193 if (dump_flags & TDF_DETAILS)
4194 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4195 }
4196
4197 if (group->lp_nr > 0)
4198 {
4199 /* We're going to insert a sequence of (potentially) throwing stores
4200 into an active EH region. This means that we're going to create
4201 new basic blocks with EH edges pointing to the post landing pad
4202 and, therefore, to have to update its PHI nodes, if any. For the
4203 virtual PHI node, we're going to use the VDEFs created above, but
4204 for the other nodes, we need to record the original reaching defs. */
4205 eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4206 basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4207 basic_block last_bb = gimple_bb (group->last_stmt);
4208 edge last_edge = find_edge (last_bb, lp_bb);
4209 auto_vec<tree, 16> last_defs;
4210 gphi_iterator gpi;
4211 for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4212 {
4213 gphi *phi = gpi.phi ();
4214 tree last_def;
4215 if (virtual_operand_p (gimple_phi_result (phi)))
4216 last_def = NULL_TREE;
4217 else
4218 last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4219 last_defs.safe_push (last_def);
4220 }
4221
4222 /* Do the insertion. Then, if new basic blocks have been created in the
4223 process, rewind the chain of VDEFs create above to walk the new basic
4224 blocks and update the corresponding arguments of the PHI nodes. */
4225 update_modified_stmts (seq);
4226 if (gimple_find_sub_bbs (seq, &last_gsi))
4227 while (last_vdef != gimple_vuse (group->last_stmt))
4228 {
4229 gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4230 if (stmt_could_throw_p (cfun, stmt))
4231 {
4232 edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4233 unsigned int i;
4234 for (gpi = gsi_start_phis (lp_bb), i = 0;
4235 !gsi_end_p (gpi);
4236 gsi_next (&gpi), i++)
4237 {
4238 gphi *phi = gpi.phi ();
4239 tree new_def;
4240 if (virtual_operand_p (gimple_phi_result (phi)))
4241 new_def = last_vdef;
4242 else
4243 new_def = last_defs[i];
4244 add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4245 }
4246 }
4247 last_vdef = gimple_vuse (stmt);
4248 }
4249 }
4250 else
4251 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4252
4253 for (int j = 0; j < 2; ++j)
4254 if (load_seq[j])
4255 gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
4256
4257 return true;
4258 }
4259
4260 /* Process the merged_store_group objects created in the coalescing phase.
4261 The stores are all against the base object BASE.
4262 Try to output the widened stores and delete the original statements if
4263 successful. Return true iff any changes were made. */
4264
4265 bool
4266 imm_store_chain_info::output_merged_stores ()
4267 {
4268 unsigned int i;
4269 merged_store_group *merged_store;
4270 bool ret = false;
4271 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4272 {
4273 if (dbg_cnt (store_merging)
4274 && output_merged_store (merged_store))
4275 {
4276 unsigned int j;
4277 store_immediate_info *store;
4278 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4279 {
4280 gimple *stmt = store->stmt;
4281 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4282 gsi_remove (&gsi, true);
4283 if (store->lp_nr)
4284 remove_stmt_from_eh_lp (stmt);
4285 if (stmt != merged_store->last_stmt)
4286 {
4287 unlink_stmt_vdef (stmt);
4288 release_defs (stmt);
4289 }
4290 }
4291 ret = true;
4292 }
4293 }
4294 if (ret && dump_file)
4295 fprintf (dump_file, "Merging successful!\n");
4296
4297 return ret;
4298 }
4299
4300 /* Coalesce the store_immediate_info objects recorded against the base object
4301 BASE in the first phase and output them.
4302 Delete the allocated structures.
4303 Return true if any changes were made. */
4304
4305 bool
4306 imm_store_chain_info::terminate_and_process_chain ()
4307 {
4308 /* Process store chain. */
4309 bool ret = false;
4310 if (m_store_info.length () > 1)
4311 {
4312 ret = coalesce_immediate_stores ();
4313 if (ret)
4314 ret = output_merged_stores ();
4315 }
4316
4317 /* Delete all the entries we allocated ourselves. */
4318 store_immediate_info *info;
4319 unsigned int i;
4320 FOR_EACH_VEC_ELT (m_store_info, i, info)
4321 delete info;
4322
4323 merged_store_group *merged_info;
4324 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4325 delete merged_info;
4326
4327 return ret;
4328 }
4329
4330 /* Return true iff LHS is a destination potentially interesting for
4331 store merging. In practice these are the codes that get_inner_reference
4332 can process. */
4333
4334 static bool
4335 lhs_valid_for_store_merging_p (tree lhs)
4336 {
4337 if (DECL_P (lhs))
4338 return true;
4339
4340 switch (TREE_CODE (lhs))
4341 {
4342 case ARRAY_REF:
4343 case ARRAY_RANGE_REF:
4344 case BIT_FIELD_REF:
4345 case COMPONENT_REF:
4346 case MEM_REF:
4347 return true;
4348 default:
4349 return false;
4350 }
4351
4352 gcc_unreachable ();
4353 }
4354
4355 /* Return true if the tree RHS is a constant we want to consider
4356 during store merging. In practice accept all codes that
4357 native_encode_expr accepts. */
4358
4359 static bool
4360 rhs_valid_for_store_merging_p (tree rhs)
4361 {
4362 unsigned HOST_WIDE_INT size;
4363 if (TREE_CODE (rhs) == CONSTRUCTOR
4364 && !TREE_CLOBBER_P (rhs)
4365 && CONSTRUCTOR_NELTS (rhs) == 0
4366 && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4367 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4368 return true;
4369 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4370 && native_encode_expr (rhs, NULL, size) != 0);
4371 }
4372
4373 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4374 and return true on success or false on failure. */
4375
4376 static bool
4377 adjust_bit_pos (poly_offset_int byte_off,
4378 poly_int64 *pbitpos,
4379 poly_uint64 *pbitregion_start,
4380 poly_uint64 *pbitregion_end)
4381 {
4382 poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4383 bit_off += *pbitpos;
4384
4385 if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4386 {
4387 if (maybe_ne (*pbitregion_end, 0U))
4388 {
4389 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4390 bit_off += *pbitregion_start;
4391 if (bit_off.to_uhwi (pbitregion_start))
4392 {
4393 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4394 bit_off += *pbitregion_end;
4395 if (!bit_off.to_uhwi (pbitregion_end))
4396 *pbitregion_end = 0;
4397 }
4398 else
4399 *pbitregion_end = 0;
4400 }
4401 return true;
4402 }
4403 else
4404 return false;
4405 }
4406
4407 /* If MEM is a memory reference usable for store merging (either as
4408 store destination or for loads), return the non-NULL base_addr
4409 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4410 Otherwise return NULL, *PBITPOS should be still valid even for that
4411 case. */
4412
4413 static tree
4414 mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4415 poly_uint64 *pbitpos,
4416 poly_uint64 *pbitregion_start,
4417 poly_uint64 *pbitregion_end)
4418 {
4419 poly_int64 bitsize, bitpos;
4420 poly_uint64 bitregion_start = 0, bitregion_end = 0;
4421 machine_mode mode;
4422 int unsignedp = 0, reversep = 0, volatilep = 0;
4423 tree offset;
4424 tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4425 &unsignedp, &reversep, &volatilep);
4426 *pbitsize = bitsize;
4427 if (known_eq (bitsize, 0))
4428 return NULL_TREE;
4429
4430 if (TREE_CODE (mem) == COMPONENT_REF
4431 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4432 {
4433 get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
4434 if (maybe_ne (bitregion_end, 0U))
4435 bitregion_end += 1;
4436 }
4437
4438 if (reversep)
4439 return NULL_TREE;
4440
4441 /* We do not want to rewrite TARGET_MEM_REFs. */
4442 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4443 return NULL_TREE;
4444 /* In some cases get_inner_reference may return a
4445 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4446 canonicalize the base_addr to MEM_REF [ptr] and take
4447 byteoffset into account in the bitpos. This occurs in
4448 PR 23684 and this way we can catch more chains. */
4449 else if (TREE_CODE (base_addr) == MEM_REF)
4450 {
4451 if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4452 &bitregion_start, &bitregion_end))
4453 return NULL_TREE;
4454 base_addr = TREE_OPERAND (base_addr, 0);
4455 }
4456 /* get_inner_reference returns the base object, get at its
4457 address now. */
4458 else
4459 {
4460 if (maybe_lt (bitpos, 0))
4461 return NULL_TREE;
4462 base_addr = build_fold_addr_expr (base_addr);
4463 }
4464
4465 if (offset)
4466 {
4467 /* If the access is variable offset then a base decl has to be
4468 address-taken to be able to emit pointer-based stores to it.
4469 ??? We might be able to get away with re-using the original
4470 base up to the first variable part and then wrapping that inside
4471 a BIT_FIELD_REF. */
4472 tree base = get_base_address (base_addr);
4473 if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
4474 return NULL_TREE;
4475
4476 /* Similarly to above for the base, remove constant from the offset. */
4477 if (TREE_CODE (offset) == PLUS_EXPR
4478 && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
4479 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
4480 &bitpos, &bitregion_start, &bitregion_end))
4481 offset = TREE_OPERAND (offset, 0);
4482
4483 base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
4484 base_addr, offset);
4485 }
4486
4487 if (known_eq (bitregion_end, 0U))
4488 {
4489 bitregion_start = round_down_to_byte_boundary (bitpos);
4490 bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
4491 }
4492
4493 *pbitsize = bitsize;
4494 *pbitpos = bitpos;
4495 *pbitregion_start = bitregion_start;
4496 *pbitregion_end = bitregion_end;
4497 return base_addr;
4498 }
4499
4500 /* Return true if STMT is a load that can be used for store merging.
4501 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
4502 BITREGION_END are properties of the corresponding store. */
4503
4504 static bool
4505 handled_load (gimple *stmt, store_operand_info *op,
4506 poly_uint64 bitsize, poly_uint64 bitpos,
4507 poly_uint64 bitregion_start, poly_uint64 bitregion_end)
4508 {
4509 if (!is_gimple_assign (stmt))
4510 return false;
4511 if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
4512 {
4513 tree rhs1 = gimple_assign_rhs1 (stmt);
4514 if (TREE_CODE (rhs1) == SSA_NAME
4515 && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
4516 bitregion_start, bitregion_end))
4517 {
4518 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
4519 been optimized earlier, but if allowed here, would confuse the
4520 multiple uses counting. */
4521 if (op->bit_not_p)
4522 return false;
4523 op->bit_not_p = !op->bit_not_p;
4524 return true;
4525 }
4526 return false;
4527 }
4528 if (gimple_vuse (stmt)
4529 && gimple_assign_load_p (stmt)
4530 && !stmt_can_throw_internal (cfun, stmt)
4531 && !gimple_has_volatile_ops (stmt))
4532 {
4533 tree mem = gimple_assign_rhs1 (stmt);
4534 op->base_addr
4535 = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
4536 &op->bitregion_start,
4537 &op->bitregion_end);
4538 if (op->base_addr != NULL_TREE
4539 && known_eq (op->bitsize, bitsize)
4540 && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
4541 && known_ge (op->bitpos - op->bitregion_start,
4542 bitpos - bitregion_start)
4543 && known_ge (op->bitregion_end - op->bitpos,
4544 bitregion_end - bitpos))
4545 {
4546 op->stmt = stmt;
4547 op->val = mem;
4548 op->bit_not_p = false;
4549 return true;
4550 }
4551 }
4552 return false;
4553 }
4554
4555 /* Return the index number of the landing pad for STMT, if any. */
4556
4557 static int
4558 lp_nr_for_store (gimple *stmt)
4559 {
4560 if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
4561 return 0;
4562
4563 if (!stmt_could_throw_p (cfun, stmt))
4564 return 0;
4565
4566 return lookup_stmt_eh_lp (stmt);
4567 }
4568
4569 /* Record the store STMT for store merging optimization if it can be
4570 optimized. Return true if any changes were made. */
4571
4572 bool
4573 pass_store_merging::process_store (gimple *stmt)
4574 {
4575 tree lhs = gimple_assign_lhs (stmt);
4576 tree rhs = gimple_assign_rhs1 (stmt);
4577 poly_uint64 bitsize, bitpos;
4578 poly_uint64 bitregion_start, bitregion_end;
4579 tree base_addr
4580 = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
4581 &bitregion_start, &bitregion_end);
4582 if (known_eq (bitsize, 0U))
4583 return false;
4584
4585 bool invalid = (base_addr == NULL_TREE
4586 || (maybe_gt (bitsize,
4587 (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
4588 && TREE_CODE (rhs) != INTEGER_CST
4589 && (TREE_CODE (rhs) != CONSTRUCTOR
4590 || CONSTRUCTOR_NELTS (rhs) != 0)));
4591 enum tree_code rhs_code = ERROR_MARK;
4592 bool bit_not_p = false;
4593 struct symbolic_number n;
4594 gimple *ins_stmt = NULL;
4595 store_operand_info ops[2];
4596 if (invalid)
4597 ;
4598 else if (rhs_valid_for_store_merging_p (rhs))
4599 {
4600 rhs_code = INTEGER_CST;
4601 ops[0].val = rhs;
4602 }
4603 else if (TREE_CODE (rhs) != SSA_NAME)
4604 invalid = true;
4605 else
4606 {
4607 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
4608 if (!is_gimple_assign (def_stmt))
4609 invalid = true;
4610 else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
4611 bitregion_start, bitregion_end))
4612 rhs_code = MEM_REF;
4613 else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
4614 {
4615 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4616 if (TREE_CODE (rhs1) == SSA_NAME
4617 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
4618 {
4619 bit_not_p = true;
4620 def_stmt = SSA_NAME_DEF_STMT (rhs1);
4621 }
4622 }
4623
4624 if (rhs_code == ERROR_MARK && !invalid)
4625 switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
4626 {
4627 case BIT_AND_EXPR:
4628 case BIT_IOR_EXPR:
4629 case BIT_XOR_EXPR:
4630 tree rhs1, rhs2;
4631 rhs1 = gimple_assign_rhs1 (def_stmt);
4632 rhs2 = gimple_assign_rhs2 (def_stmt);
4633 invalid = true;
4634 if (TREE_CODE (rhs1) != SSA_NAME)
4635 break;
4636 def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
4637 if (!is_gimple_assign (def_stmt1)
4638 || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
4639 bitregion_start, bitregion_end))
4640 break;
4641 if (rhs_valid_for_store_merging_p (rhs2))
4642 ops[1].val = rhs2;
4643 else if (TREE_CODE (rhs2) != SSA_NAME)
4644 break;
4645 else
4646 {
4647 def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
4648 if (!is_gimple_assign (def_stmt2))
4649 break;
4650 else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
4651 bitregion_start, bitregion_end))
4652 break;
4653 }
4654 invalid = false;
4655 break;
4656 default:
4657 invalid = true;
4658 break;
4659 }
4660
4661 unsigned HOST_WIDE_INT const_bitsize;
4662 if (bitsize.is_constant (&const_bitsize)
4663 && (const_bitsize % BITS_PER_UNIT) == 0
4664 && const_bitsize <= 64
4665 && multiple_p (bitpos, BITS_PER_UNIT))
4666 {
4667 ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
4668 if (ins_stmt)
4669 {
4670 uint64_t nn = n.n;
4671 for (unsigned HOST_WIDE_INT i = 0;
4672 i < const_bitsize;
4673 i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
4674 if ((nn & MARKER_MASK) == 0
4675 || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
4676 {
4677 ins_stmt = NULL;
4678 break;
4679 }
4680 if (ins_stmt)
4681 {
4682 if (invalid)
4683 {
4684 rhs_code = LROTATE_EXPR;
4685 ops[0].base_addr = NULL_TREE;
4686 ops[1].base_addr = NULL_TREE;
4687 }
4688 invalid = false;
4689 }
4690 }
4691 }
4692
4693 if (invalid
4694 && bitsize.is_constant (&const_bitsize)
4695 && ((const_bitsize % BITS_PER_UNIT) != 0
4696 || !multiple_p (bitpos, BITS_PER_UNIT))
4697 && const_bitsize <= 64)
4698 {
4699 /* Bypass a conversion to the bit-field type. */
4700 if (!bit_not_p
4701 && is_gimple_assign (def_stmt)
4702 && CONVERT_EXPR_CODE_P (rhs_code))
4703 {
4704 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4705 if (TREE_CODE (rhs1) == SSA_NAME
4706 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
4707 rhs = rhs1;
4708 }
4709 rhs_code = BIT_INSERT_EXPR;
4710 bit_not_p = false;
4711 ops[0].val = rhs;
4712 ops[0].base_addr = NULL_TREE;
4713 ops[1].base_addr = NULL_TREE;
4714 invalid = false;
4715 }
4716 }
4717
4718 unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
4719 unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
4720 if (invalid
4721 || !bitsize.is_constant (&const_bitsize)
4722 || !bitpos.is_constant (&const_bitpos)
4723 || !bitregion_start.is_constant (&const_bitregion_start)
4724 || !bitregion_end.is_constant (&const_bitregion_end))
4725 return terminate_all_aliasing_chains (NULL, stmt);
4726
4727 if (!ins_stmt)
4728 memset (&n, 0, sizeof (n));
4729
4730 class imm_store_chain_info **chain_info = NULL;
4731 bool ret = false;
4732 if (base_addr)
4733 chain_info = m_stores.get (base_addr);
4734
4735 store_immediate_info *info;
4736 if (chain_info)
4737 {
4738 unsigned int ord = (*chain_info)->m_store_info.length ();
4739 info = new store_immediate_info (const_bitsize, const_bitpos,
4740 const_bitregion_start,
4741 const_bitregion_end,
4742 stmt, ord, rhs_code, n, ins_stmt,
4743 bit_not_p, lp_nr_for_store (stmt),
4744 ops[0], ops[1]);
4745 if (dump_file && (dump_flags & TDF_DETAILS))
4746 {
4747 fprintf (dump_file, "Recording immediate store from stmt:\n");
4748 print_gimple_stmt (dump_file, stmt, 0);
4749 }
4750 (*chain_info)->m_store_info.safe_push (info);
4751 ret |= terminate_all_aliasing_chains (chain_info, stmt);
4752 /* If we reach the limit of stores to merge in a chain terminate and
4753 process the chain now. */
4754 if ((*chain_info)->m_store_info.length ()
4755 == (unsigned int) PARAM_VALUE (PARAM_MAX_STORES_TO_MERGE))
4756 {
4757 if (dump_file && (dump_flags & TDF_DETAILS))
4758 fprintf (dump_file,
4759 "Reached maximum number of statements to merge:\n");
4760 ret |= terminate_and_process_chain (*chain_info);
4761 }
4762 return ret;
4763 }
4764
4765 /* Store aliases any existing chain? */
4766 ret |= terminate_all_aliasing_chains (NULL, stmt);
4767 /* Start a new chain. */
4768 class imm_store_chain_info *new_chain
4769 = new imm_store_chain_info (m_stores_head, base_addr);
4770 info = new store_immediate_info (const_bitsize, const_bitpos,
4771 const_bitregion_start,
4772 const_bitregion_end,
4773 stmt, 0, rhs_code, n, ins_stmt,
4774 bit_not_p, lp_nr_for_store (stmt),
4775 ops[0], ops[1]);
4776 new_chain->m_store_info.safe_push (info);
4777 m_stores.put (base_addr, new_chain);
4778 if (dump_file && (dump_flags & TDF_DETAILS))
4779 {
4780 fprintf (dump_file, "Starting new chain with statement:\n");
4781 print_gimple_stmt (dump_file, stmt, 0);
4782 fprintf (dump_file, "The base object is:\n");
4783 print_generic_expr (dump_file, base_addr);
4784 fprintf (dump_file, "\n");
4785 }
4786 return ret;
4787 }
4788
4789 /* Return true if STMT is a store valid for store merging. */
4790
4791 static bool
4792 store_valid_for_store_merging_p (gimple *stmt)
4793 {
4794 return gimple_assign_single_p (stmt)
4795 && gimple_vdef (stmt)
4796 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
4797 && !gimple_has_volatile_ops (stmt);
4798 }
4799
4800 enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
4801
4802 /* Return the status of basic block BB wrt store merging. */
4803
4804 static enum basic_block_status
4805 get_status_for_store_merging (basic_block bb)
4806 {
4807 unsigned int num_statements = 0;
4808 gimple_stmt_iterator gsi;
4809 edge e;
4810
4811 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4812 {
4813 gimple *stmt = gsi_stmt (gsi);
4814
4815 if (is_gimple_debug (stmt))
4816 continue;
4817
4818 if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
4819 break;
4820 }
4821
4822 if (num_statements == 0)
4823 return BB_INVALID;
4824
4825 if (cfun->can_throw_non_call_exceptions && cfun->eh
4826 && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb)))
4827 && (e = find_fallthru_edge (bb->succs))
4828 && e->dest == bb->next_bb)
4829 return BB_EXTENDED_VALID;
4830
4831 return num_statements >= 2 ? BB_VALID : BB_INVALID;
4832 }
4833
4834 /* Entry point for the pass. Go over each basic block recording chains of
4835 immediate stores. Upon encountering a terminating statement (as defined
4836 by stmt_terminates_chain_p) process the recorded stores and emit the widened
4837 variants. */
4838
4839 unsigned int
4840 pass_store_merging::execute (function *fun)
4841 {
4842 basic_block bb;
4843 hash_set<gimple *> orig_stmts;
4844 bool changed = false, open_chains = false;
4845
4846 /* If the function can throw and catch non-call exceptions, we'll be trying
4847 to merge stores across different basic blocks so we need to first unsplit
4848 the EH edges in order to streamline the CFG of the function. */
4849 if (cfun->can_throw_non_call_exceptions && cfun->eh)
4850 unsplit_eh_edges ();
4851
4852 calculate_dominance_info (CDI_DOMINATORS);
4853
4854 FOR_EACH_BB_FN (bb, fun)
4855 {
4856 const basic_block_status bb_status = get_status_for_store_merging (bb);
4857 gimple_stmt_iterator gsi;
4858
4859 if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
4860 {
4861 changed |= terminate_and_process_all_chains ();
4862 open_chains = false;
4863 }
4864
4865 if (bb_status == BB_INVALID)
4866 continue;
4867
4868 if (dump_file && (dump_flags & TDF_DETAILS))
4869 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
4870
4871 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4872 {
4873 gimple *stmt = gsi_stmt (gsi);
4874
4875 if (is_gimple_debug (stmt))
4876 continue;
4877
4878 if (gimple_has_volatile_ops (stmt))
4879 {
4880 /* Terminate all chains. */
4881 if (dump_file && (dump_flags & TDF_DETAILS))
4882 fprintf (dump_file, "Volatile access terminates "
4883 "all chains\n");
4884 changed |= terminate_and_process_all_chains ();
4885 open_chains = false;
4886 continue;
4887 }
4888
4889 if (store_valid_for_store_merging_p (stmt))
4890 changed |= process_store (stmt);
4891 else
4892 changed |= terminate_all_aliasing_chains (NULL, stmt);
4893 }
4894
4895 if (bb_status == BB_EXTENDED_VALID)
4896 open_chains = true;
4897 else
4898 {
4899 changed |= terminate_and_process_all_chains ();
4900 open_chains = false;
4901 }
4902 }
4903
4904 if (open_chains)
4905 changed |= terminate_and_process_all_chains ();
4906
4907 /* If the function can throw and catch non-call exceptions and something
4908 changed during the pass, then the CFG has (very likely) changed too. */
4909 if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
4910 {
4911 free_dominance_info (CDI_DOMINATORS);
4912 return TODO_cleanup_cfg;
4913 }
4914
4915 return 0;
4916 }
4917
4918 } // anon namespace
4919
4920 /* Construct and return a store merging pass object. */
4921
4922 gimple_opt_pass *
4923 make_pass_store_merging (gcc::context *ctxt)
4924 {
4925 return new pass_store_merging (ctxt);
4926 }
4927
4928 #if CHECKING_P
4929
4930 namespace selftest {
4931
4932 /* Selftests for store merging helpers. */
4933
4934 /* Assert that all elements of the byte arrays X and Y, both of length N
4935 are equal. */
4936
4937 static void
4938 verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
4939 {
4940 for (unsigned int i = 0; i < n; i++)
4941 {
4942 if (x[i] != y[i])
4943 {
4944 fprintf (stderr, "Arrays do not match. X:\n");
4945 dump_char_array (stderr, x, n);
4946 fprintf (stderr, "Y:\n");
4947 dump_char_array (stderr, y, n);
4948 }
4949 ASSERT_EQ (x[i], y[i]);
4950 }
4951 }
4952
4953 /* Test shift_bytes_in_array and that it carries bits across between
4954 bytes correctly. */
4955
4956 static void
4957 verify_shift_bytes_in_array (void)
4958 {
4959 /* byte 1 | byte 0
4960 00011111 | 11100000. */
4961 unsigned char orig[2] = { 0xe0, 0x1f };
4962 unsigned char in[2];
4963 memcpy (in, orig, sizeof orig);
4964
4965 unsigned char expected[2] = { 0x80, 0x7f };
4966 shift_bytes_in_array (in, sizeof (in), 2);
4967 verify_array_eq (in, expected, sizeof (in));
4968
4969 memcpy (in, orig, sizeof orig);
4970 memcpy (expected, orig, sizeof orig);
4971 /* Check that shifting by zero doesn't change anything. */
4972 shift_bytes_in_array (in, sizeof (in), 0);
4973 verify_array_eq (in, expected, sizeof (in));
4974
4975 }
4976
4977 /* Test shift_bytes_in_array_right and that it carries bits across between
4978 bytes correctly. */
4979
4980 static void
4981 verify_shift_bytes_in_array_right (void)
4982 {
4983 /* byte 1 | byte 0
4984 00011111 | 11100000. */
4985 unsigned char orig[2] = { 0x1f, 0xe0};
4986 unsigned char in[2];
4987 memcpy (in, orig, sizeof orig);
4988 unsigned char expected[2] = { 0x07, 0xf8};
4989 shift_bytes_in_array_right (in, sizeof (in), 2);
4990 verify_array_eq (in, expected, sizeof (in));
4991
4992 memcpy (in, orig, sizeof orig);
4993 memcpy (expected, orig, sizeof orig);
4994 /* Check that shifting by zero doesn't change anything. */
4995 shift_bytes_in_array_right (in, sizeof (in), 0);
4996 verify_array_eq (in, expected, sizeof (in));
4997 }
4998
4999 /* Test clear_bit_region that it clears exactly the bits asked and
5000 nothing more. */
5001
5002 static void
5003 verify_clear_bit_region (void)
5004 {
5005 /* Start with all bits set and test clearing various patterns in them. */
5006 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5007 unsigned char in[3];
5008 unsigned char expected[3];
5009 memcpy (in, orig, sizeof in);
5010
5011 /* Check zeroing out all the bits. */
5012 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5013 expected[0] = expected[1] = expected[2] = 0;
5014 verify_array_eq (in, expected, sizeof in);
5015
5016 memcpy (in, orig, sizeof in);
5017 /* Leave the first and last bits intact. */
5018 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5019 expected[0] = 0x1;
5020 expected[1] = 0;
5021 expected[2] = 0x80;
5022 verify_array_eq (in, expected, sizeof in);
5023 }
5024
5025 /* Test verify_clear_bit_region_be that it clears exactly the bits asked and
5026 nothing more. */
5027
5028 static void
5029 verify_clear_bit_region_be (void)
5030 {
5031 /* Start with all bits set and test clearing various patterns in them. */
5032 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5033 unsigned char in[3];
5034 unsigned char expected[3];
5035 memcpy (in, orig, sizeof in);
5036
5037 /* Check zeroing out all the bits. */
5038 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5039 expected[0] = expected[1] = expected[2] = 0;
5040 verify_array_eq (in, expected, sizeof in);
5041
5042 memcpy (in, orig, sizeof in);
5043 /* Leave the first and last bits intact. */
5044 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5045 expected[0] = 0x80;
5046 expected[1] = 0;
5047 expected[2] = 0x1;
5048 verify_array_eq (in, expected, sizeof in);
5049 }
5050
5051
5052 /* Run all of the selftests within this file. */
5053
5054 void
5055 store_merging_c_tests (void)
5056 {
5057 verify_shift_bytes_in_array ();
5058 verify_shift_bytes_in_array_right ();
5059 verify_clear_bit_region ();
5060 verify_clear_bit_region_be ();
5061 }
5062
5063 } // namespace selftest
5064 #endif /* CHECKING_P. */