re PR middle-end/22141 (Missing optimization when storing structures)
[gcc.git] / gcc / gimple-ssa-store-merging.c
1 /* GIMPLE store merging pass.
2 Copyright (C) 2016-2017 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* The purpose of this pass is to combine multiple memory stores of
22 constant values to consecutive memory locations into fewer wider stores.
23 For example, if we have a sequence peforming four byte stores to
24 consecutive memory locations:
25 [p ] := imm1;
26 [p + 1B] := imm2;
27 [p + 2B] := imm3;
28 [p + 3B] := imm4;
29 we can transform this into a single 4-byte store if the target supports it:
30 [p] := imm1:imm2:imm3:imm4 //concatenated immediates according to endianness.
31
32 The algorithm is applied to each basic block in three phases:
33
34 1) Scan through the basic block recording constant assignments to
35 destinations that can be expressed as a store to memory of a certain size
36 at a certain bit offset. Record store chains to different bases in a
37 hash_map (m_stores) and make sure to terminate such chains when appropriate
38 (for example when when the stored values get used subsequently).
39 These stores can be a result of structure element initializers, array stores
40 etc. A store_immediate_info object is recorded for every such store.
41 Record as many such assignments to a single base as possible until a
42 statement that interferes with the store sequence is encountered.
43
44 2) Analyze the chain of stores recorded in phase 1) (i.e. the vector of
45 store_immediate_info objects) and coalesce contiguous stores into
46 merged_store_group objects.
47
48 For example, given the stores:
49 [p ] := 0;
50 [p + 1B] := 1;
51 [p + 3B] := 0;
52 [p + 4B] := 1;
53 [p + 5B] := 0;
54 [p + 6B] := 0;
55 This phase would produce two merged_store_group objects, one recording the
56 two bytes stored in the memory region [p : p + 1] and another
57 recording the four bytes stored in the memory region [p + 3 : p + 6].
58
59 3) The merged_store_group objects produced in phase 2) are processed
60 to generate the sequence of wider stores that set the contiguous memory
61 regions to the sequence of bytes that correspond to it. This may emit
62 multiple stores per store group to handle contiguous stores that are not
63 of a size that is a power of 2. For example it can try to emit a 40-bit
64 store as a 32-bit store followed by an 8-bit store.
65 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT or
66 TARGET_SLOW_UNALIGNED_ACCESS rules.
67
68 Note on endianness and example:
69 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
70 [p ] := 0x1234;
71 [p + 2B] := 0x5678;
72 [p + 4B] := 0xab;
73 [p + 5B] := 0xcd;
74
75 The memory layout for little-endian (LE) and big-endian (BE) must be:
76 p |LE|BE|
77 ---------
78 0 |34|12|
79 1 |12|34|
80 2 |78|56|
81 3 |56|78|
82 4 |ab|ab|
83 5 |cd|cd|
84
85 To merge these into a single 48-bit merged value 'val' in phase 2)
86 on little-endian we insert stores to higher (consecutive) bitpositions
87 into the most significant bits of the merged value.
88 The final merged value would be: 0xcdab56781234
89
90 For big-endian we insert stores to higher bitpositions into the least
91 significant bits of the merged value.
92 The final merged value would be: 0x12345678abcd
93
94 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
95 followed by a 16-bit store. Again, we must consider endianness when
96 breaking down the 48-bit value 'val' computed above.
97 For little endian we emit:
98 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
99 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
100
101 Whereas for big-endian we emit:
102 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
103 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
104
105 #include "config.h"
106 #include "system.h"
107 #include "coretypes.h"
108 #include "backend.h"
109 #include "tree.h"
110 #include "gimple.h"
111 #include "builtins.h"
112 #include "fold-const.h"
113 #include "tree-pass.h"
114 #include "ssa.h"
115 #include "gimple-pretty-print.h"
116 #include "alias.h"
117 #include "fold-const.h"
118 #include "params.h"
119 #include "print-tree.h"
120 #include "tree-hash-traits.h"
121 #include "gimple-iterator.h"
122 #include "gimplify.h"
123 #include "stor-layout.h"
124 #include "timevar.h"
125 #include "tree-cfg.h"
126 #include "tree-eh.h"
127 #include "target.h"
128 #include "gimplify-me.h"
129 #include "rtl.h"
130 #include "expr.h" /* For get_bit_range. */
131 #include "selftest.h"
132
133 /* The maximum size (in bits) of the stores this pass should generate. */
134 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
135 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
136
137 namespace {
138
139 /* Struct recording the information about a single store of an immediate
140 to memory. These are created in the first phase and coalesced into
141 merged_store_group objects in the second phase. */
142
143 struct store_immediate_info
144 {
145 unsigned HOST_WIDE_INT bitsize;
146 unsigned HOST_WIDE_INT bitpos;
147 unsigned HOST_WIDE_INT bitregion_start;
148 /* This is one past the last bit of the bit region. */
149 unsigned HOST_WIDE_INT bitregion_end;
150 gimple *stmt;
151 unsigned int order;
152 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
153 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
154 gimple *, unsigned int);
155 };
156
157 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
158 unsigned HOST_WIDE_INT bp,
159 unsigned HOST_WIDE_INT brs,
160 unsigned HOST_WIDE_INT bre,
161 gimple *st,
162 unsigned int ord)
163 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
164 stmt (st), order (ord)
165 {
166 }
167
168 /* Struct representing a group of stores to contiguous memory locations.
169 These are produced by the second phase (coalescing) and consumed in the
170 third phase that outputs the widened stores. */
171
172 struct merged_store_group
173 {
174 unsigned HOST_WIDE_INT start;
175 unsigned HOST_WIDE_INT width;
176 unsigned HOST_WIDE_INT bitregion_start;
177 unsigned HOST_WIDE_INT bitregion_end;
178 /* The size of the allocated memory for val and mask. */
179 unsigned HOST_WIDE_INT buf_size;
180 unsigned HOST_WIDE_INT align_base;
181
182 unsigned int align;
183 unsigned int first_order;
184 unsigned int last_order;
185
186 auto_vec<store_immediate_info *> stores;
187 /* We record the first and last original statements in the sequence because
188 we'll need their vuse/vdef and replacement position. It's easier to keep
189 track of them separately as 'stores' is reordered by apply_stores. */
190 gimple *last_stmt;
191 gimple *first_stmt;
192 unsigned char *val;
193 unsigned char *mask;
194
195 merged_store_group (store_immediate_info *);
196 ~merged_store_group ();
197 void merge_into (store_immediate_info *);
198 void merge_overlapping (store_immediate_info *);
199 bool apply_stores ();
200 private:
201 void do_merge (store_immediate_info *);
202 };
203
204 /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
205
206 static void
207 dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
208 {
209 if (!fd)
210 return;
211
212 for (unsigned int i = 0; i < len; i++)
213 fprintf (fd, "%x ", ptr[i]);
214 fprintf (fd, "\n");
215 }
216
217 /* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
218 bits between adjacent elements. AMNT should be within
219 [0, BITS_PER_UNIT).
220 Example, AMNT = 2:
221 00011111|11100000 << 2 = 01111111|10000000
222 PTR[1] | PTR[0] PTR[1] | PTR[0]. */
223
224 static void
225 shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt)
226 {
227 if (amnt == 0)
228 return;
229
230 unsigned char carry_over = 0U;
231 unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
232 unsigned char clear_mask = (~0U) << amnt;
233
234 for (unsigned int i = 0; i < sz; i++)
235 {
236 unsigned prev_carry_over = carry_over;
237 carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
238
239 ptr[i] <<= amnt;
240 if (i != 0)
241 {
242 ptr[i] &= clear_mask;
243 ptr[i] |= prev_carry_over;
244 }
245 }
246 }
247
248 /* Like shift_bytes_in_array but for big-endian.
249 Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
250 bits between adjacent elements. AMNT should be within
251 [0, BITS_PER_UNIT).
252 Example, AMNT = 2:
253 00011111|11100000 >> 2 = 00000111|11111000
254 PTR[0] | PTR[1] PTR[0] | PTR[1]. */
255
256 static void
257 shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
258 unsigned int amnt)
259 {
260 if (amnt == 0)
261 return;
262
263 unsigned char carry_over = 0U;
264 unsigned char carry_mask = ~(~0U << amnt);
265
266 for (unsigned int i = 0; i < sz; i++)
267 {
268 unsigned prev_carry_over = carry_over;
269 carry_over = ptr[i] & carry_mask;
270
271 carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
272 ptr[i] >>= amnt;
273 ptr[i] |= prev_carry_over;
274 }
275 }
276
277 /* Clear out LEN bits starting from bit START in the byte array
278 PTR. This clears the bits to the *right* from START.
279 START must be within [0, BITS_PER_UNIT) and counts starting from
280 the least significant bit. */
281
282 static void
283 clear_bit_region_be (unsigned char *ptr, unsigned int start,
284 unsigned int len)
285 {
286 if (len == 0)
287 return;
288 /* Clear len bits to the right of start. */
289 else if (len <= start + 1)
290 {
291 unsigned char mask = (~(~0U << len));
292 mask = mask << (start + 1U - len);
293 ptr[0] &= ~mask;
294 }
295 else if (start != BITS_PER_UNIT - 1)
296 {
297 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
298 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
299 len - (start % BITS_PER_UNIT) - 1);
300 }
301 else if (start == BITS_PER_UNIT - 1
302 && len > BITS_PER_UNIT)
303 {
304 unsigned int nbytes = len / BITS_PER_UNIT;
305 memset (ptr, 0, nbytes);
306 if (len % BITS_PER_UNIT != 0)
307 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
308 len % BITS_PER_UNIT);
309 }
310 else
311 gcc_unreachable ();
312 }
313
314 /* In the byte array PTR clear the bit region starting at bit
315 START and is LEN bits wide.
316 For regions spanning multiple bytes do this recursively until we reach
317 zero LEN or a region contained within a single byte. */
318
319 static void
320 clear_bit_region (unsigned char *ptr, unsigned int start,
321 unsigned int len)
322 {
323 /* Degenerate base case. */
324 if (len == 0)
325 return;
326 else if (start >= BITS_PER_UNIT)
327 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
328 /* Second base case. */
329 else if ((start + len) <= BITS_PER_UNIT)
330 {
331 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
332 mask >>= BITS_PER_UNIT - (start + len);
333
334 ptr[0] &= ~mask;
335
336 return;
337 }
338 /* Clear most significant bits in a byte and proceed with the next byte. */
339 else if (start != 0)
340 {
341 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
342 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
343 }
344 /* Whole bytes need to be cleared. */
345 else if (start == 0 && len > BITS_PER_UNIT)
346 {
347 unsigned int nbytes = len / BITS_PER_UNIT;
348 /* We could recurse on each byte but we clear whole bytes, so a simple
349 memset will do. */
350 memset (ptr, '\0', nbytes);
351 /* Clear the remaining sub-byte region if there is one. */
352 if (len % BITS_PER_UNIT != 0)
353 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
354 }
355 else
356 gcc_unreachable ();
357 }
358
359 /* Write BITLEN bits of EXPR to the byte array PTR at
360 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
361 Return true if the operation succeeded. */
362
363 static bool
364 encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
365 unsigned int total_bytes)
366 {
367 unsigned int first_byte = bitpos / BITS_PER_UNIT;
368 tree tmp_int = expr;
369 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
370 || (bitpos % BITS_PER_UNIT)
371 || !int_mode_for_size (bitlen, 0).exists ());
372
373 if (!sub_byte_op_p)
374 return native_encode_expr (tmp_int, ptr + first_byte, total_bytes) != 0;
375
376 /* LITTLE-ENDIAN
377 We are writing a non byte-sized quantity or at a position that is not
378 at a byte boundary.
379 |--------|--------|--------| ptr + first_byte
380 ^ ^
381 xxx xxxxxxxx xxx< bp>
382 |______EXPR____|
383
384 First native_encode_expr EXPR into a temporary buffer and shift each
385 byte in the buffer by 'bp' (carrying the bits over as necessary).
386 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
387 <------bitlen---->< bp>
388 Then we clear the destination bits:
389 |---00000|00000000|000-----| ptr + first_byte
390 <-------bitlen--->< bp>
391
392 Finally we ORR the bytes of the shifted EXPR into the cleared region:
393 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
394
395 BIG-ENDIAN
396 We are writing a non byte-sized quantity or at a position that is not
397 at a byte boundary.
398 ptr + first_byte |--------|--------|--------|
399 ^ ^
400 <bp >xxx xxxxxxxx xxx
401 |_____EXPR_____|
402
403 First native_encode_expr EXPR into a temporary buffer and shift each
404 byte in the buffer to the right by (carrying the bits over as necessary).
405 We shift by as much as needed to align the most significant bit of EXPR
406 with bitpos:
407 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
408 <---bitlen----> <bp ><-----bitlen----->
409 Then we clear the destination bits:
410 ptr + first_byte |-----000||00000000||00000---|
411 <bp ><-------bitlen----->
412
413 Finally we ORR the bytes of the shifted EXPR into the cleared region:
414 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
415 The awkwardness comes from the fact that bitpos is counted from the
416 most significant bit of a byte. */
417
418 /* Allocate an extra byte so that we have space to shift into. */
419 unsigned int byte_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (expr))) + 1;
420 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
421 memset (tmpbuf, '\0', byte_size);
422 /* The store detection code should only have allowed constants that are
423 accepted by native_encode_expr. */
424 if (native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
425 gcc_unreachable ();
426
427 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
428 bytes to write. This means it can write more than
429 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
430 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
431 bitlen and zero out the bits that are not relevant as well (that may
432 contain a sign bit due to sign-extension). */
433 unsigned int padding
434 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
435 /* On big-endian the padding is at the 'front' so just skip the initial
436 bytes. */
437 if (BYTES_BIG_ENDIAN)
438 tmpbuf += padding;
439
440 byte_size -= padding;
441
442 if (bitlen % BITS_PER_UNIT != 0)
443 {
444 if (BYTES_BIG_ENDIAN)
445 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
446 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
447 else
448 clear_bit_region (tmpbuf, bitlen,
449 byte_size * BITS_PER_UNIT - bitlen);
450 }
451 /* Left shifting relies on the last byte being clear if bitlen is
452 a multiple of BITS_PER_UNIT, which might not be clear if
453 there are padding bytes. */
454 else if (!BYTES_BIG_ENDIAN)
455 tmpbuf[byte_size - 1] = '\0';
456
457 /* Clear the bit region in PTR where the bits from TMPBUF will be
458 inserted into. */
459 if (BYTES_BIG_ENDIAN)
460 clear_bit_region_be (ptr + first_byte,
461 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
462 else
463 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
464
465 int shift_amnt;
466 int bitlen_mod = bitlen % BITS_PER_UNIT;
467 int bitpos_mod = bitpos % BITS_PER_UNIT;
468
469 bool skip_byte = false;
470 if (BYTES_BIG_ENDIAN)
471 {
472 /* BITPOS and BITLEN are exactly aligned and no shifting
473 is necessary. */
474 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
475 || (bitpos_mod == 0 && bitlen_mod == 0))
476 shift_amnt = 0;
477 /* |. . . . . . . .|
478 <bp > <blen >.
479 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
480 of the value until it aligns with 'bp' in the next byte over. */
481 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
482 {
483 shift_amnt = bitlen_mod + bitpos_mod;
484 skip_byte = bitlen_mod != 0;
485 }
486 /* |. . . . . . . .|
487 <----bp--->
488 <---blen---->.
489 Shift the value right within the same byte so it aligns with 'bp'. */
490 else
491 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
492 }
493 else
494 shift_amnt = bitpos % BITS_PER_UNIT;
495
496 /* Create the shifted version of EXPR. */
497 if (!BYTES_BIG_ENDIAN)
498 {
499 shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
500 if (shift_amnt == 0)
501 byte_size--;
502 }
503 else
504 {
505 gcc_assert (BYTES_BIG_ENDIAN);
506 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
507 /* If shifting right forced us to move into the next byte skip the now
508 empty byte. */
509 if (skip_byte)
510 {
511 tmpbuf++;
512 byte_size--;
513 }
514 }
515
516 /* Insert the bits from TMPBUF. */
517 for (unsigned int i = 0; i < byte_size; i++)
518 ptr[first_byte + i] |= tmpbuf[i];
519
520 return true;
521 }
522
523 /* Sorting function for store_immediate_info objects.
524 Sorts them by bitposition. */
525
526 static int
527 sort_by_bitpos (const void *x, const void *y)
528 {
529 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
530 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
531
532 if ((*tmp)->bitpos < (*tmp2)->bitpos)
533 return -1;
534 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
535 return 1;
536 else
537 /* If they are the same let's use the order which is guaranteed to
538 be different. */
539 return (*tmp)->order - (*tmp2)->order;
540 }
541
542 /* Sorting function for store_immediate_info objects.
543 Sorts them by the order field. */
544
545 static int
546 sort_by_order (const void *x, const void *y)
547 {
548 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
549 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
550
551 if ((*tmp)->order < (*tmp2)->order)
552 return -1;
553 else if ((*tmp)->order > (*tmp2)->order)
554 return 1;
555
556 gcc_unreachable ();
557 }
558
559 /* Initialize a merged_store_group object from a store_immediate_info
560 object. */
561
562 merged_store_group::merged_store_group (store_immediate_info *info)
563 {
564 start = info->bitpos;
565 width = info->bitsize;
566 bitregion_start = info->bitregion_start;
567 bitregion_end = info->bitregion_end;
568 /* VAL has memory allocated for it in apply_stores once the group
569 width has been finalized. */
570 val = NULL;
571 mask = NULL;
572 unsigned HOST_WIDE_INT align_bitpos = 0;
573 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
574 &align, &align_bitpos);
575 align_base = start - align_bitpos;
576 stores.create (1);
577 stores.safe_push (info);
578 last_stmt = info->stmt;
579 last_order = info->order;
580 first_stmt = last_stmt;
581 first_order = last_order;
582 buf_size = 0;
583 }
584
585 merged_store_group::~merged_store_group ()
586 {
587 if (val)
588 XDELETEVEC (val);
589 }
590
591 /* Helper method for merge_into and merge_overlapping to do
592 the common part. */
593 void
594 merged_store_group::do_merge (store_immediate_info *info)
595 {
596 bitregion_start = MIN (bitregion_start, info->bitregion_start);
597 bitregion_end = MAX (bitregion_end, info->bitregion_end);
598
599 unsigned int this_align;
600 unsigned HOST_WIDE_INT align_bitpos = 0;
601 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
602 &this_align, &align_bitpos);
603 if (this_align > align)
604 {
605 align = this_align;
606 align_base = info->bitpos - align_bitpos;
607 }
608
609 gimple *stmt = info->stmt;
610 stores.safe_push (info);
611 if (info->order > last_order)
612 {
613 last_order = info->order;
614 last_stmt = stmt;
615 }
616 else if (info->order < first_order)
617 {
618 first_order = info->order;
619 first_stmt = stmt;
620 }
621 }
622
623 /* Merge a store recorded by INFO into this merged store.
624 The store is not overlapping with the existing recorded
625 stores. */
626
627 void
628 merged_store_group::merge_into (store_immediate_info *info)
629 {
630 unsigned HOST_WIDE_INT wid = info->bitsize;
631 /* Make sure we're inserting in the position we think we're inserting. */
632 gcc_assert (info->bitpos >= start + width
633 && info->bitregion_start <= bitregion_end);
634
635 width += wid;
636 do_merge (info);
637 }
638
639 /* Merge a store described by INFO into this merged store.
640 INFO overlaps in some way with the current store (i.e. it's not contiguous
641 which is handled by merged_store_group::merge_into). */
642
643 void
644 merged_store_group::merge_overlapping (store_immediate_info *info)
645 {
646 /* If the store extends the size of the group, extend the width. */
647 if (info->bitpos + info->bitsize > start + width)
648 width += info->bitpos + info->bitsize - (start + width);
649
650 do_merge (info);
651 }
652
653 /* Go through all the recorded stores in this group in program order and
654 apply their values to the VAL byte array to create the final merged
655 value. Return true if the operation succeeded. */
656
657 bool
658 merged_store_group::apply_stores ()
659 {
660 /* Make sure we have more than one store in the group, otherwise we cannot
661 merge anything. */
662 if (bitregion_start % BITS_PER_UNIT != 0
663 || bitregion_end % BITS_PER_UNIT != 0
664 || stores.length () == 1)
665 return false;
666
667 stores.qsort (sort_by_order);
668 store_immediate_info *info;
669 unsigned int i;
670 /* Create a buffer of a size that is 2 times the number of bytes we're
671 storing. That way native_encode_expr can write power-of-2-sized
672 chunks without overrunning. */
673 buf_size = 2 * ((bitregion_end - bitregion_start) / BITS_PER_UNIT);
674 val = XNEWVEC (unsigned char, 2 * buf_size);
675 mask = val + buf_size;
676 memset (val, 0, buf_size);
677 memset (mask, ~0U, buf_size);
678
679 FOR_EACH_VEC_ELT (stores, i, info)
680 {
681 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
682 bool ret = encode_tree_to_bitpos (gimple_assign_rhs1 (info->stmt),
683 val, info->bitsize,
684 pos_in_buffer, buf_size);
685 if (dump_file && (dump_flags & TDF_DETAILS))
686 {
687 if (ret)
688 {
689 fprintf (dump_file, "After writing ");
690 print_generic_expr (dump_file,
691 gimple_assign_rhs1 (info->stmt), 0);
692 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
693 " at position %d the merged region contains:\n",
694 info->bitsize, pos_in_buffer);
695 dump_char_array (dump_file, val, buf_size);
696 }
697 else
698 fprintf (dump_file, "Failed to merge stores\n");
699 }
700 if (!ret)
701 return false;
702 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
703 if (BYTES_BIG_ENDIAN)
704 clear_bit_region_be (m, (BITS_PER_UNIT - 1
705 - (pos_in_buffer % BITS_PER_UNIT)),
706 info->bitsize);
707 else
708 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
709 }
710 return true;
711 }
712
713 /* Structure describing the store chain. */
714
715 struct imm_store_chain_info
716 {
717 /* Doubly-linked list that imposes an order on chain processing.
718 PNXP (prev's next pointer) points to the head of a list, or to
719 the next field in the previous chain in the list.
720 See pass_store_merging::m_stores_head for more rationale. */
721 imm_store_chain_info *next, **pnxp;
722 tree base_addr;
723 auto_vec<store_immediate_info *> m_store_info;
724 auto_vec<merged_store_group *> m_merged_store_groups;
725
726 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
727 : next (inspt), pnxp (&inspt), base_addr (b_a)
728 {
729 inspt = this;
730 if (next)
731 {
732 gcc_checking_assert (pnxp == next->pnxp);
733 next->pnxp = &next;
734 }
735 }
736 ~imm_store_chain_info ()
737 {
738 *pnxp = next;
739 if (next)
740 {
741 gcc_checking_assert (&next == next->pnxp);
742 next->pnxp = pnxp;
743 }
744 }
745 bool terminate_and_process_chain ();
746 bool coalesce_immediate_stores ();
747 bool output_merged_store (merged_store_group *);
748 bool output_merged_stores ();
749 };
750
751 const pass_data pass_data_tree_store_merging = {
752 GIMPLE_PASS, /* type */
753 "store-merging", /* name */
754 OPTGROUP_NONE, /* optinfo_flags */
755 TV_GIMPLE_STORE_MERGING, /* tv_id */
756 PROP_ssa, /* properties_required */
757 0, /* properties_provided */
758 0, /* properties_destroyed */
759 0, /* todo_flags_start */
760 TODO_update_ssa, /* todo_flags_finish */
761 };
762
763 class pass_store_merging : public gimple_opt_pass
764 {
765 public:
766 pass_store_merging (gcc::context *ctxt)
767 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head ()
768 {
769 }
770
771 /* Pass not supported for PDP-endianness, nor for insane hosts
772 or target character sizes where native_{encode,interpret}_expr
773 doesn't work properly. */
774 virtual bool
775 gate (function *)
776 {
777 return flag_store_merging
778 && WORDS_BIG_ENDIAN == BYTES_BIG_ENDIAN
779 && CHAR_BIT == 8
780 && BITS_PER_UNIT == 8;
781 }
782
783 virtual unsigned int execute (function *);
784
785 private:
786 hash_map<tree_operand_hash, struct imm_store_chain_info *> m_stores;
787
788 /* Form a doubly-linked stack of the elements of m_stores, so that
789 we can iterate over them in a predictable way. Using this order
790 avoids extraneous differences in the compiler output just because
791 of tree pointer variations (e.g. different chains end up in
792 different positions of m_stores, so they are handled in different
793 orders, so they allocate or release SSA names in different
794 orders, and when they get reused, subsequent passes end up
795 getting different SSA names, which may ultimately change
796 decisions when going out of SSA). */
797 imm_store_chain_info *m_stores_head;
798
799 bool terminate_and_process_all_chains ();
800 bool terminate_all_aliasing_chains (imm_store_chain_info **,
801 bool, gimple *);
802 bool terminate_and_release_chain (imm_store_chain_info *);
803 }; // class pass_store_merging
804
805 /* Terminate and process all recorded chains. Return true if any changes
806 were made. */
807
808 bool
809 pass_store_merging::terminate_and_process_all_chains ()
810 {
811 bool ret = false;
812 while (m_stores_head)
813 ret |= terminate_and_release_chain (m_stores_head);
814 gcc_assert (m_stores.elements () == 0);
815 gcc_assert (m_stores_head == NULL);
816
817 return ret;
818 }
819
820 /* Terminate all chains that are affected by the assignment to DEST, appearing
821 in statement STMT and ultimately points to the object BASE. Return true if
822 at least one aliasing chain was terminated. BASE and DEST are allowed to
823 be NULL_TREE. In that case the aliasing checks are performed on the whole
824 statement rather than a particular operand in it. VAR_OFFSET_P signifies
825 whether STMT represents a store to BASE offset by a variable amount.
826 If that is the case we have to terminate any chain anchored at BASE. */
827
828 bool
829 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
830 **chain_info,
831 bool var_offset_p,
832 gimple *stmt)
833 {
834 bool ret = false;
835
836 /* If the statement doesn't touch memory it can't alias. */
837 if (!gimple_vuse (stmt))
838 return false;
839
840 /* Check if the assignment destination (BASE) is part of a store chain.
841 This is to catch non-constant stores to destinations that may be part
842 of a chain. */
843 if (chain_info)
844 {
845 /* We have a chain at BASE and we're writing to [BASE + <variable>].
846 This can interfere with any of the stores so terminate
847 the chain. */
848 if (var_offset_p)
849 {
850 terminate_and_release_chain (*chain_info);
851 ret = true;
852 }
853 /* Otherwise go through every store in the chain to see if it
854 aliases with any of them. */
855 else
856 {
857 store_immediate_info *info;
858 unsigned int i;
859 FOR_EACH_VEC_ELT ((*chain_info)->m_store_info, i, info)
860 {
861 if (ref_maybe_used_by_stmt_p (stmt,
862 gimple_assign_lhs (info->stmt))
863 || stmt_may_clobber_ref_p (stmt,
864 gimple_assign_lhs (info->stmt)))
865 {
866 if (dump_file && (dump_flags & TDF_DETAILS))
867 {
868 fprintf (dump_file,
869 "stmt causes chain termination:\n");
870 print_gimple_stmt (dump_file, stmt, 0);
871 }
872 terminate_and_release_chain (*chain_info);
873 ret = true;
874 break;
875 }
876 }
877 }
878 }
879
880 /* Check for aliasing with all other store chains. */
881 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
882 {
883 next = cur->next;
884
885 /* We already checked all the stores in chain_info and terminated the
886 chain if necessary. Skip it here. */
887 if (chain_info && (*chain_info) == cur)
888 continue;
889
890 /* We can't use the base object here as that does not reliably exist.
891 Build a ao_ref from the base object address (if we know the
892 minimum and maximum offset and the maximum size we could improve
893 things here). */
894 ao_ref chain_ref;
895 ao_ref_init_from_ptr_and_size (&chain_ref, cur->base_addr, NULL_TREE);
896 if (ref_maybe_used_by_stmt_p (stmt, &chain_ref)
897 || stmt_may_clobber_ref_p_1 (stmt, &chain_ref))
898 {
899 terminate_and_release_chain (cur);
900 ret = true;
901 }
902 }
903
904 return ret;
905 }
906
907 /* Helper function. Terminate the recorded chain storing to base object
908 BASE. Return true if the merging and output was successful. The m_stores
909 entry is removed after the processing in any case. */
910
911 bool
912 pass_store_merging::terminate_and_release_chain (imm_store_chain_info *chain_info)
913 {
914 bool ret = chain_info->terminate_and_process_chain ();
915 m_stores.remove (chain_info->base_addr);
916 delete chain_info;
917 return ret;
918 }
919
920 /* Go through the candidate stores recorded in m_store_info and merge them
921 into merged_store_group objects recorded into m_merged_store_groups
922 representing the widened stores. Return true if coalescing was successful
923 and the number of widened stores is fewer than the original number
924 of stores. */
925
926 bool
927 imm_store_chain_info::coalesce_immediate_stores ()
928 {
929 /* Anything less can't be processed. */
930 if (m_store_info.length () < 2)
931 return false;
932
933 if (dump_file && (dump_flags & TDF_DETAILS))
934 fprintf (dump_file, "Attempting to coalesce %u stores in chain.\n",
935 m_store_info.length ());
936
937 store_immediate_info *info;
938 unsigned int i;
939
940 /* Order the stores by the bitposition they write to. */
941 m_store_info.qsort (sort_by_bitpos);
942
943 info = m_store_info[0];
944 merged_store_group *merged_store = new merged_store_group (info);
945
946 FOR_EACH_VEC_ELT (m_store_info, i, info)
947 {
948 if (dump_file && (dump_flags & TDF_DETAILS))
949 {
950 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
951 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:\n",
952 i, info->bitsize, info->bitpos);
953 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
954 fprintf (dump_file, "\n------------\n");
955 }
956
957 if (i == 0)
958 continue;
959
960 /* |---store 1---|
961 |---store 2---|
962 Overlapping stores. */
963 unsigned HOST_WIDE_INT start = info->bitpos;
964 if (IN_RANGE (start, merged_store->start,
965 merged_store->start + merged_store->width - 1))
966 {
967 merged_store->merge_overlapping (info);
968 continue;
969 }
970
971 /* |---store 1---| <gap> |---store 2---|.
972 Gap between stores. Start a new group if there are any gaps
973 between bitregions. */
974 if (info->bitregion_start > merged_store->bitregion_end)
975 {
976 /* Try to apply all the stores recorded for the group to determine
977 the bitpattern they write and discard it if that fails.
978 This will also reject single-store groups. */
979 if (!merged_store->apply_stores ())
980 delete merged_store;
981 else
982 m_merged_store_groups.safe_push (merged_store);
983
984 merged_store = new merged_store_group (info);
985
986 continue;
987 }
988
989 /* |---store 1---||---store 2---|
990 This store is consecutive to the previous one.
991 Merge it into the current store group. */
992 merged_store->merge_into (info);
993 }
994
995 /* Record or discard the last store group. */
996 if (!merged_store->apply_stores ())
997 delete merged_store;
998 else
999 m_merged_store_groups.safe_push (merged_store);
1000
1001 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
1002 bool success
1003 = !m_merged_store_groups.is_empty ()
1004 && m_merged_store_groups.length () < m_store_info.length ();
1005
1006 if (success && dump_file)
1007 fprintf (dump_file, "Coalescing successful!\n"
1008 "Merged into %u stores\n",
1009 m_merged_store_groups.length ());
1010
1011 return success;
1012 }
1013
1014 /* Return the type to use for the merged stores described by STMTS.
1015 This is needed to get the alias sets right. */
1016
1017 static tree
1018 get_alias_type_for_stmts (auto_vec<gimple *> &stmts)
1019 {
1020 gimple *stmt;
1021 unsigned int i;
1022 tree lhs = gimple_assign_lhs (stmts[0]);
1023 tree type = reference_alias_ptr_type (lhs);
1024
1025 FOR_EACH_VEC_ELT (stmts, i, stmt)
1026 {
1027 if (i == 0)
1028 continue;
1029
1030 lhs = gimple_assign_lhs (stmt);
1031 tree type1 = reference_alias_ptr_type (lhs);
1032 if (!alias_ptr_types_compatible_p (type, type1))
1033 return ptr_type_node;
1034 }
1035 return type;
1036 }
1037
1038 /* Return the location_t information we can find among the statements
1039 in STMTS. */
1040
1041 static location_t
1042 get_location_for_stmts (auto_vec<gimple *> &stmts)
1043 {
1044 gimple *stmt;
1045 unsigned int i;
1046
1047 FOR_EACH_VEC_ELT (stmts, i, stmt)
1048 if (gimple_has_location (stmt))
1049 return gimple_location (stmt);
1050
1051 return UNKNOWN_LOCATION;
1052 }
1053
1054 /* Used to decribe a store resulting from splitting a wide store in smaller
1055 regularly-sized stores in split_group. */
1056
1057 struct split_store
1058 {
1059 unsigned HOST_WIDE_INT bytepos;
1060 unsigned HOST_WIDE_INT size;
1061 unsigned HOST_WIDE_INT align;
1062 auto_vec<gimple *> orig_stmts;
1063 /* True if there is a single orig stmt covering the whole split store. */
1064 bool orig;
1065 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1066 unsigned HOST_WIDE_INT);
1067 };
1068
1069 /* Simple constructor. */
1070
1071 split_store::split_store (unsigned HOST_WIDE_INT bp,
1072 unsigned HOST_WIDE_INT sz,
1073 unsigned HOST_WIDE_INT al)
1074 : bytepos (bp), size (sz), align (al), orig (false)
1075 {
1076 orig_stmts.create (0);
1077 }
1078
1079 /* Record all statements corresponding to stores in GROUP that write to
1080 the region starting at BITPOS and is of size BITSIZE. Record such
1081 statements in STMTS if non-NULL. The stores in GROUP must be sorted by
1082 bitposition. Return INFO if there is exactly one original store
1083 in the range. */
1084
1085 static store_immediate_info *
1086 find_constituent_stmts (struct merged_store_group *group,
1087 vec<gimple *> *stmts,
1088 unsigned int *first,
1089 unsigned HOST_WIDE_INT bitpos,
1090 unsigned HOST_WIDE_INT bitsize)
1091 {
1092 store_immediate_info *info, *ret = NULL;
1093 unsigned int i;
1094 bool second = false;
1095 bool update_first = true;
1096 unsigned HOST_WIDE_INT end = bitpos + bitsize;
1097 for (i = *first; group->stores.iterate (i, &info); ++i)
1098 {
1099 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
1100 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
1101 if (stmt_end <= bitpos)
1102 {
1103 /* BITPOS passed to this function never decreases from within the
1104 same split_group call, so optimize and don't scan info records
1105 which are known to end before or at BITPOS next time.
1106 Only do it if all stores before this one also pass this. */
1107 if (update_first)
1108 *first = i + 1;
1109 continue;
1110 }
1111 else
1112 update_first = false;
1113
1114 /* The stores in GROUP are ordered by bitposition so if we're past
1115 the region for this group return early. */
1116 if (stmt_start >= end)
1117 return ret;
1118
1119 if (stmts)
1120 {
1121 stmts->safe_push (info->stmt);
1122 if (ret)
1123 {
1124 ret = NULL;
1125 second = true;
1126 }
1127 }
1128 else if (ret)
1129 return NULL;
1130 if (!second)
1131 ret = info;
1132 }
1133 return ret;
1134 }
1135
1136 /* Split a merged store described by GROUP by populating the SPLIT_STORES
1137 vector (if non-NULL) with split_store structs describing the byte offset
1138 (from the base), the bit size and alignment of each store as well as the
1139 original statements involved in each such split group.
1140 This is to separate the splitting strategy from the statement
1141 building/emission/linking done in output_merged_store.
1142 Return number of new stores.
1143 If SPLIT_STORES is NULL, it is just a dry run to count number of
1144 new stores. */
1145
1146 static unsigned int
1147 split_group (merged_store_group *group, bool allow_unaligned,
1148 vec<struct split_store *> *split_stores)
1149 {
1150 unsigned HOST_WIDE_INT pos = group->bitregion_start;
1151 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
1152 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
1153 unsigned HOST_WIDE_INT group_align = group->align;
1154 unsigned HOST_WIDE_INT align_base = group->align_base;
1155
1156 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
1157
1158 unsigned int ret = 0, first = 0;
1159 unsigned HOST_WIDE_INT try_pos = bytepos;
1160 group->stores.qsort (sort_by_bitpos);
1161
1162 while (size > 0)
1163 {
1164 if ((allow_unaligned || group_align <= BITS_PER_UNIT)
1165 && group->mask[try_pos - bytepos] == (unsigned char) ~0U)
1166 {
1167 /* Skip padding bytes. */
1168 ++try_pos;
1169 size -= BITS_PER_UNIT;
1170 continue;
1171 }
1172
1173 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
1174 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
1175 unsigned HOST_WIDE_INT align_bitpos
1176 = (try_bitpos - align_base) & (group_align - 1);
1177 unsigned HOST_WIDE_INT align = group_align;
1178 if (align_bitpos)
1179 align = least_bit_hwi (align_bitpos);
1180 if (!allow_unaligned)
1181 try_size = MIN (try_size, align);
1182 store_immediate_info *info
1183 = find_constituent_stmts (group, NULL, &first, try_bitpos, try_size);
1184 if (info)
1185 {
1186 /* If there is just one original statement for the range, see if
1187 we can just reuse the original store which could be even larger
1188 than try_size. */
1189 unsigned HOST_WIDE_INT stmt_end
1190 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
1191 info = find_constituent_stmts (group, NULL, &first, try_bitpos,
1192 stmt_end - try_bitpos);
1193 if (info && info->bitpos >= try_bitpos)
1194 {
1195 try_size = stmt_end - try_bitpos;
1196 goto found;
1197 }
1198 }
1199
1200 /* Approximate store bitsize for the case when there are no padding
1201 bits. */
1202 while (try_size > size)
1203 try_size /= 2;
1204 /* Now look for whole padding bytes at the end of that bitsize. */
1205 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
1206 if (group->mask[try_pos - bytepos + nonmasked - 1]
1207 != (unsigned char) ~0U)
1208 break;
1209 if (nonmasked == 0)
1210 {
1211 /* If entire try_size range is padding, skip it. */
1212 try_pos += try_size / BITS_PER_UNIT;
1213 size -= try_size;
1214 continue;
1215 }
1216 /* Otherwise try to decrease try_size if second half, last 3 quarters
1217 etc. are padding. */
1218 nonmasked *= BITS_PER_UNIT;
1219 while (nonmasked <= try_size / 2)
1220 try_size /= 2;
1221 if (!allow_unaligned && group_align > BITS_PER_UNIT)
1222 {
1223 /* Now look for whole padding bytes at the start of that bitsize. */
1224 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
1225 for (masked = 0; masked < try_bytesize; ++masked)
1226 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U)
1227 break;
1228 masked *= BITS_PER_UNIT;
1229 gcc_assert (masked < try_size);
1230 if (masked >= try_size / 2)
1231 {
1232 while (masked >= try_size / 2)
1233 {
1234 try_size /= 2;
1235 try_pos += try_size / BITS_PER_UNIT;
1236 size -= try_size;
1237 masked -= try_size;
1238 }
1239 /* Need to recompute the alignment, so just retry at the new
1240 position. */
1241 continue;
1242 }
1243 }
1244
1245 found:
1246 ++ret;
1247
1248 if (split_stores)
1249 {
1250 struct split_store *store
1251 = new split_store (try_pos, try_size, align);
1252 info = find_constituent_stmts (group, &store->orig_stmts,
1253 &first, try_bitpos, try_size);
1254 if (info
1255 && info->bitpos >= try_bitpos
1256 && info->bitpos + info->bitsize <= try_bitpos + try_size)
1257 store->orig = true;
1258 split_stores->safe_push (store);
1259 }
1260
1261 try_pos += try_size / BITS_PER_UNIT;
1262 size -= try_size;
1263 }
1264
1265 return ret;
1266 }
1267
1268 /* Given a merged store group GROUP output the widened version of it.
1269 The store chain is against the base object BASE.
1270 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
1271 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
1272 Make sure that the number of statements output is less than the number of
1273 original statements. If a better sequence is possible emit it and
1274 return true. */
1275
1276 bool
1277 imm_store_chain_info::output_merged_store (merged_store_group *group)
1278 {
1279 unsigned HOST_WIDE_INT start_byte_pos
1280 = group->bitregion_start / BITS_PER_UNIT;
1281
1282 unsigned int orig_num_stmts = group->stores.length ();
1283 if (orig_num_stmts < 2)
1284 return false;
1285
1286 auto_vec<struct split_store *, 32> split_stores;
1287 split_stores.create (0);
1288 bool allow_unaligned
1289 = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
1290 if (allow_unaligned)
1291 {
1292 /* If unaligned stores are allowed, see how many stores we'd emit
1293 for unaligned and how many stores we'd emit for aligned stores.
1294 Only use unaligned stores if it allows fewer stores than aligned. */
1295 unsigned aligned_cnt = split_group (group, false, NULL);
1296 unsigned unaligned_cnt = split_group (group, true, NULL);
1297 if (aligned_cnt <= unaligned_cnt)
1298 allow_unaligned = false;
1299 }
1300 split_group (group, allow_unaligned, &split_stores);
1301
1302 if (split_stores.length () >= orig_num_stmts)
1303 {
1304 /* We didn't manage to reduce the number of statements. Bail out. */
1305 if (dump_file && (dump_flags & TDF_DETAILS))
1306 {
1307 fprintf (dump_file, "Exceeded original number of stmts (%u)."
1308 " Not profitable to emit new sequence.\n",
1309 orig_num_stmts);
1310 }
1311 return false;
1312 }
1313
1314 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
1315 gimple_seq seq = NULL;
1316 tree last_vdef, new_vuse;
1317 last_vdef = gimple_vdef (group->last_stmt);
1318 new_vuse = gimple_vuse (group->last_stmt);
1319
1320 gimple *stmt = NULL;
1321 split_store *split_store;
1322 unsigned int i;
1323
1324 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &seq,
1325 is_gimple_mem_ref_addr, NULL_TREE);
1326 FOR_EACH_VEC_ELT (split_stores, i, split_store)
1327 {
1328 unsigned HOST_WIDE_INT try_size = split_store->size;
1329 unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
1330 unsigned HOST_WIDE_INT align = split_store->align;
1331 tree dest, src;
1332 location_t loc;
1333 if (split_store->orig)
1334 {
1335 /* If there is just a single constituent store which covers
1336 the whole area, just reuse the lhs and rhs. */
1337 dest = gimple_assign_lhs (split_store->orig_stmts[0]);
1338 src = gimple_assign_rhs1 (split_store->orig_stmts[0]);
1339 loc = gimple_location (split_store->orig_stmts[0]);
1340 }
1341 else
1342 {
1343 tree offset_type
1344 = get_alias_type_for_stmts (split_store->orig_stmts);
1345 loc = get_location_for_stmts (split_store->orig_stmts);
1346
1347 tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
1348 int_type = build_aligned_type (int_type, align);
1349 dest = fold_build2 (MEM_REF, int_type, addr,
1350 build_int_cst (offset_type, try_pos));
1351 src = native_interpret_expr (int_type,
1352 group->val + try_pos - start_byte_pos,
1353 group->buf_size);
1354 tree mask
1355 = native_interpret_expr (int_type,
1356 group->mask + try_pos - start_byte_pos,
1357 group->buf_size);
1358 if (!integer_zerop (mask))
1359 {
1360 tree tem = make_ssa_name (int_type);
1361 tree load_src = unshare_expr (dest);
1362 /* The load might load some or all bits uninitialized,
1363 avoid -W*uninitialized warnings in that case.
1364 As optimization, it would be nice if all the bits are
1365 provably uninitialized (no stores at all yet or previous
1366 store a CLOBBER) we'd optimize away the load and replace
1367 it e.g. with 0. */
1368 TREE_NO_WARNING (load_src) = 1;
1369 stmt = gimple_build_assign (tem, load_src);
1370 gimple_set_location (stmt, loc);
1371 gimple_set_vuse (stmt, new_vuse);
1372 gimple_seq_add_stmt_without_update (&seq, stmt);
1373
1374 /* FIXME: If there is a single chunk of zero bits in mask,
1375 perhaps use BIT_INSERT_EXPR instead? */
1376 stmt = gimple_build_assign (make_ssa_name (int_type),
1377 BIT_AND_EXPR, tem, mask);
1378 gimple_set_location (stmt, loc);
1379 gimple_seq_add_stmt_without_update (&seq, stmt);
1380 tem = gimple_assign_lhs (stmt);
1381
1382 src = wide_int_to_tree (int_type,
1383 wi::bit_and_not (wi::to_wide (src),
1384 wi::to_wide (mask)));
1385 stmt = gimple_build_assign (make_ssa_name (int_type),
1386 BIT_IOR_EXPR, tem, src);
1387 gimple_set_location (stmt, loc);
1388 gimple_seq_add_stmt_without_update (&seq, stmt);
1389 src = gimple_assign_lhs (stmt);
1390 }
1391 }
1392
1393 stmt = gimple_build_assign (dest, src);
1394 gimple_set_location (stmt, loc);
1395 gimple_set_vuse (stmt, new_vuse);
1396 gimple_seq_add_stmt_without_update (&seq, stmt);
1397
1398 tree new_vdef;
1399 if (i < split_stores.length () - 1)
1400 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
1401 else
1402 new_vdef = last_vdef;
1403
1404 gimple_set_vdef (stmt, new_vdef);
1405 SSA_NAME_DEF_STMT (new_vdef) = stmt;
1406 new_vuse = new_vdef;
1407 }
1408
1409 FOR_EACH_VEC_ELT (split_stores, i, split_store)
1410 delete split_store;
1411
1412 gcc_assert (seq);
1413 if (dump_file)
1414 {
1415 fprintf (dump_file,
1416 "New sequence of %u stmts to replace old one of %u stmts\n",
1417 split_stores.length (), orig_num_stmts);
1418 if (dump_flags & TDF_DETAILS)
1419 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
1420 }
1421 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
1422
1423 return true;
1424 }
1425
1426 /* Process the merged_store_group objects created in the coalescing phase.
1427 The stores are all against the base object BASE.
1428 Try to output the widened stores and delete the original statements if
1429 successful. Return true iff any changes were made. */
1430
1431 bool
1432 imm_store_chain_info::output_merged_stores ()
1433 {
1434 unsigned int i;
1435 merged_store_group *merged_store;
1436 bool ret = false;
1437 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
1438 {
1439 if (output_merged_store (merged_store))
1440 {
1441 unsigned int j;
1442 store_immediate_info *store;
1443 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
1444 {
1445 gimple *stmt = store->stmt;
1446 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1447 gsi_remove (&gsi, true);
1448 if (stmt != merged_store->last_stmt)
1449 {
1450 unlink_stmt_vdef (stmt);
1451 release_defs (stmt);
1452 }
1453 }
1454 ret = true;
1455 }
1456 }
1457 if (ret && dump_file)
1458 fprintf (dump_file, "Merging successful!\n");
1459
1460 return ret;
1461 }
1462
1463 /* Coalesce the store_immediate_info objects recorded against the base object
1464 BASE in the first phase and output them.
1465 Delete the allocated structures.
1466 Return true if any changes were made. */
1467
1468 bool
1469 imm_store_chain_info::terminate_and_process_chain ()
1470 {
1471 /* Process store chain. */
1472 bool ret = false;
1473 if (m_store_info.length () > 1)
1474 {
1475 ret = coalesce_immediate_stores ();
1476 if (ret)
1477 ret = output_merged_stores ();
1478 }
1479
1480 /* Delete all the entries we allocated ourselves. */
1481 store_immediate_info *info;
1482 unsigned int i;
1483 FOR_EACH_VEC_ELT (m_store_info, i, info)
1484 delete info;
1485
1486 merged_store_group *merged_info;
1487 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
1488 delete merged_info;
1489
1490 return ret;
1491 }
1492
1493 /* Return true iff LHS is a destination potentially interesting for
1494 store merging. In practice these are the codes that get_inner_reference
1495 can process. */
1496
1497 static bool
1498 lhs_valid_for_store_merging_p (tree lhs)
1499 {
1500 tree_code code = TREE_CODE (lhs);
1501
1502 if (code == ARRAY_REF || code == ARRAY_RANGE_REF || code == MEM_REF
1503 || code == COMPONENT_REF || code == BIT_FIELD_REF)
1504 return true;
1505
1506 return false;
1507 }
1508
1509 /* Return true if the tree RHS is a constant we want to consider
1510 during store merging. In practice accept all codes that
1511 native_encode_expr accepts. */
1512
1513 static bool
1514 rhs_valid_for_store_merging_p (tree rhs)
1515 {
1516 return native_encode_expr (rhs, NULL,
1517 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs)))) != 0;
1518 }
1519
1520 /* Entry point for the pass. Go over each basic block recording chains of
1521 immediate stores. Upon encountering a terminating statement (as defined
1522 by stmt_terminates_chain_p) process the recorded stores and emit the widened
1523 variants. */
1524
1525 unsigned int
1526 pass_store_merging::execute (function *fun)
1527 {
1528 basic_block bb;
1529 hash_set<gimple *> orig_stmts;
1530
1531 FOR_EACH_BB_FN (bb, fun)
1532 {
1533 gimple_stmt_iterator gsi;
1534 unsigned HOST_WIDE_INT num_statements = 0;
1535 /* Record the original statements so that we can keep track of
1536 statements emitted in this pass and not re-process new
1537 statements. */
1538 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1539 {
1540 if (is_gimple_debug (gsi_stmt (gsi)))
1541 continue;
1542
1543 if (++num_statements >= 2)
1544 break;
1545 }
1546
1547 if (num_statements < 2)
1548 continue;
1549
1550 if (dump_file && (dump_flags & TDF_DETAILS))
1551 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
1552
1553 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1554 {
1555 gimple *stmt = gsi_stmt (gsi);
1556
1557 if (is_gimple_debug (stmt))
1558 continue;
1559
1560 if (gimple_has_volatile_ops (stmt))
1561 {
1562 /* Terminate all chains. */
1563 if (dump_file && (dump_flags & TDF_DETAILS))
1564 fprintf (dump_file, "Volatile access terminates "
1565 "all chains\n");
1566 terminate_and_process_all_chains ();
1567 continue;
1568 }
1569
1570 if (gimple_assign_single_p (stmt) && gimple_vdef (stmt)
1571 && !stmt_can_throw_internal (stmt)
1572 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt)))
1573 {
1574 tree lhs = gimple_assign_lhs (stmt);
1575 tree rhs = gimple_assign_rhs1 (stmt);
1576
1577 HOST_WIDE_INT bitsize, bitpos;
1578 unsigned HOST_WIDE_INT bitregion_start = 0;
1579 unsigned HOST_WIDE_INT bitregion_end = 0;
1580 machine_mode mode;
1581 int unsignedp = 0, reversep = 0, volatilep = 0;
1582 tree offset, base_addr;
1583 base_addr
1584 = get_inner_reference (lhs, &bitsize, &bitpos, &offset, &mode,
1585 &unsignedp, &reversep, &volatilep);
1586 if (TREE_CODE (lhs) == COMPONENT_REF
1587 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (lhs, 1)))
1588 {
1589 get_bit_range (&bitregion_start, &bitregion_end, lhs,
1590 &bitpos, &offset);
1591 if (bitregion_end)
1592 ++bitregion_end;
1593 }
1594 if (bitsize == 0)
1595 continue;
1596
1597 /* As a future enhancement we could handle stores with the same
1598 base and offset. */
1599 bool invalid = reversep
1600 || ((bitsize > MAX_BITSIZE_MODE_ANY_INT)
1601 && (TREE_CODE (rhs) != INTEGER_CST))
1602 || !rhs_valid_for_store_merging_p (rhs);
1603
1604 /* We do not want to rewrite TARGET_MEM_REFs. */
1605 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
1606 invalid = true;
1607 /* In some cases get_inner_reference may return a
1608 MEM_REF [ptr + byteoffset]. For the purposes of this pass
1609 canonicalize the base_addr to MEM_REF [ptr] and take
1610 byteoffset into account in the bitpos. This occurs in
1611 PR 23684 and this way we can catch more chains. */
1612 else if (TREE_CODE (base_addr) == MEM_REF)
1613 {
1614 offset_int bit_off, byte_off = mem_ref_offset (base_addr);
1615 bit_off = byte_off << LOG2_BITS_PER_UNIT;
1616 bit_off += bitpos;
1617 if (!wi::neg_p (bit_off) && wi::fits_shwi_p (bit_off))
1618 {
1619 bitpos = bit_off.to_shwi ();
1620 if (bitregion_end)
1621 {
1622 bit_off = byte_off << LOG2_BITS_PER_UNIT;
1623 bit_off += bitregion_start;
1624 if (wi::fits_uhwi_p (bit_off))
1625 {
1626 bitregion_start = bit_off.to_uhwi ();
1627 bit_off = byte_off << LOG2_BITS_PER_UNIT;
1628 bit_off += bitregion_end;
1629 if (wi::fits_uhwi_p (bit_off))
1630 bitregion_end = bit_off.to_uhwi ();
1631 else
1632 bitregion_end = 0;
1633 }
1634 else
1635 bitregion_end = 0;
1636 }
1637 }
1638 else
1639 invalid = true;
1640 base_addr = TREE_OPERAND (base_addr, 0);
1641 }
1642 /* get_inner_reference returns the base object, get at its
1643 address now. */
1644 else
1645 {
1646 if (bitpos < 0)
1647 invalid = true;
1648 base_addr = build_fold_addr_expr (base_addr);
1649 }
1650
1651 if (!bitregion_end)
1652 {
1653 bitregion_start = ROUND_DOWN (bitpos, BITS_PER_UNIT);
1654 bitregion_end = ROUND_UP (bitpos + bitsize, BITS_PER_UNIT);
1655 }
1656
1657 if (! invalid
1658 && offset != NULL_TREE)
1659 {
1660 /* If the access is variable offset then a base
1661 decl has to be address-taken to be able to
1662 emit pointer-based stores to it.
1663 ??? We might be able to get away with
1664 re-using the original base up to the first
1665 variable part and then wrapping that inside
1666 a BIT_FIELD_REF. */
1667 tree base = get_base_address (base_addr);
1668 if (! base
1669 || (DECL_P (base)
1670 && ! TREE_ADDRESSABLE (base)))
1671 invalid = true;
1672 else
1673 base_addr = build2 (POINTER_PLUS_EXPR,
1674 TREE_TYPE (base_addr),
1675 base_addr, offset);
1676 }
1677
1678 struct imm_store_chain_info **chain_info
1679 = m_stores.get (base_addr);
1680
1681 if (!invalid)
1682 {
1683 store_immediate_info *info;
1684 if (chain_info)
1685 {
1686 unsigned int ord = (*chain_info)->m_store_info.length ();
1687 info = new store_immediate_info (bitsize, bitpos,
1688 bitregion_start,
1689 bitregion_end,
1690 stmt, ord);
1691 if (dump_file && (dump_flags & TDF_DETAILS))
1692 {
1693 fprintf (dump_file,
1694 "Recording immediate store from stmt:\n");
1695 print_gimple_stmt (dump_file, stmt, 0);
1696 }
1697 (*chain_info)->m_store_info.safe_push (info);
1698 /* If we reach the limit of stores to merge in a chain
1699 terminate and process the chain now. */
1700 if ((*chain_info)->m_store_info.length ()
1701 == (unsigned int)
1702 PARAM_VALUE (PARAM_MAX_STORES_TO_MERGE))
1703 {
1704 if (dump_file && (dump_flags & TDF_DETAILS))
1705 fprintf (dump_file,
1706 "Reached maximum number of statements"
1707 " to merge:\n");
1708 terminate_and_release_chain (*chain_info);
1709 }
1710 continue;
1711 }
1712
1713 /* Store aliases any existing chain? */
1714 terminate_all_aliasing_chains (chain_info, false, stmt);
1715 /* Start a new chain. */
1716 struct imm_store_chain_info *new_chain
1717 = new imm_store_chain_info (m_stores_head, base_addr);
1718 info = new store_immediate_info (bitsize, bitpos,
1719 bitregion_start,
1720 bitregion_end,
1721 stmt, 0);
1722 new_chain->m_store_info.safe_push (info);
1723 m_stores.put (base_addr, new_chain);
1724 if (dump_file && (dump_flags & TDF_DETAILS))
1725 {
1726 fprintf (dump_file,
1727 "Starting new chain with statement:\n");
1728 print_gimple_stmt (dump_file, stmt, 0);
1729 fprintf (dump_file, "The base object is:\n");
1730 print_generic_expr (dump_file, base_addr);
1731 fprintf (dump_file, "\n");
1732 }
1733 }
1734 else
1735 terminate_all_aliasing_chains (chain_info,
1736 offset != NULL_TREE, stmt);
1737
1738 continue;
1739 }
1740
1741 terminate_all_aliasing_chains (NULL, false, stmt);
1742 }
1743 terminate_and_process_all_chains ();
1744 }
1745 return 0;
1746 }
1747
1748 } // anon namespace
1749
1750 /* Construct and return a store merging pass object. */
1751
1752 gimple_opt_pass *
1753 make_pass_store_merging (gcc::context *ctxt)
1754 {
1755 return new pass_store_merging (ctxt);
1756 }
1757
1758 #if CHECKING_P
1759
1760 namespace selftest {
1761
1762 /* Selftests for store merging helpers. */
1763
1764 /* Assert that all elements of the byte arrays X and Y, both of length N
1765 are equal. */
1766
1767 static void
1768 verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
1769 {
1770 for (unsigned int i = 0; i < n; i++)
1771 {
1772 if (x[i] != y[i])
1773 {
1774 fprintf (stderr, "Arrays do not match. X:\n");
1775 dump_char_array (stderr, x, n);
1776 fprintf (stderr, "Y:\n");
1777 dump_char_array (stderr, y, n);
1778 }
1779 ASSERT_EQ (x[i], y[i]);
1780 }
1781 }
1782
1783 /* Test shift_bytes_in_array and that it carries bits across between
1784 bytes correctly. */
1785
1786 static void
1787 verify_shift_bytes_in_array (void)
1788 {
1789 /* byte 1 | byte 0
1790 00011111 | 11100000. */
1791 unsigned char orig[2] = { 0xe0, 0x1f };
1792 unsigned char in[2];
1793 memcpy (in, orig, sizeof orig);
1794
1795 unsigned char expected[2] = { 0x80, 0x7f };
1796 shift_bytes_in_array (in, sizeof (in), 2);
1797 verify_array_eq (in, expected, sizeof (in));
1798
1799 memcpy (in, orig, sizeof orig);
1800 memcpy (expected, orig, sizeof orig);
1801 /* Check that shifting by zero doesn't change anything. */
1802 shift_bytes_in_array (in, sizeof (in), 0);
1803 verify_array_eq (in, expected, sizeof (in));
1804
1805 }
1806
1807 /* Test shift_bytes_in_array_right and that it carries bits across between
1808 bytes correctly. */
1809
1810 static void
1811 verify_shift_bytes_in_array_right (void)
1812 {
1813 /* byte 1 | byte 0
1814 00011111 | 11100000. */
1815 unsigned char orig[2] = { 0x1f, 0xe0};
1816 unsigned char in[2];
1817 memcpy (in, orig, sizeof orig);
1818 unsigned char expected[2] = { 0x07, 0xf8};
1819 shift_bytes_in_array_right (in, sizeof (in), 2);
1820 verify_array_eq (in, expected, sizeof (in));
1821
1822 memcpy (in, orig, sizeof orig);
1823 memcpy (expected, orig, sizeof orig);
1824 /* Check that shifting by zero doesn't change anything. */
1825 shift_bytes_in_array_right (in, sizeof (in), 0);
1826 verify_array_eq (in, expected, sizeof (in));
1827 }
1828
1829 /* Test clear_bit_region that it clears exactly the bits asked and
1830 nothing more. */
1831
1832 static void
1833 verify_clear_bit_region (void)
1834 {
1835 /* Start with all bits set and test clearing various patterns in them. */
1836 unsigned char orig[3] = { 0xff, 0xff, 0xff};
1837 unsigned char in[3];
1838 unsigned char expected[3];
1839 memcpy (in, orig, sizeof in);
1840
1841 /* Check zeroing out all the bits. */
1842 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
1843 expected[0] = expected[1] = expected[2] = 0;
1844 verify_array_eq (in, expected, sizeof in);
1845
1846 memcpy (in, orig, sizeof in);
1847 /* Leave the first and last bits intact. */
1848 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
1849 expected[0] = 0x1;
1850 expected[1] = 0;
1851 expected[2] = 0x80;
1852 verify_array_eq (in, expected, sizeof in);
1853 }
1854
1855 /* Test verify_clear_bit_region_be that it clears exactly the bits asked and
1856 nothing more. */
1857
1858 static void
1859 verify_clear_bit_region_be (void)
1860 {
1861 /* Start with all bits set and test clearing various patterns in them. */
1862 unsigned char orig[3] = { 0xff, 0xff, 0xff};
1863 unsigned char in[3];
1864 unsigned char expected[3];
1865 memcpy (in, orig, sizeof in);
1866
1867 /* Check zeroing out all the bits. */
1868 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
1869 expected[0] = expected[1] = expected[2] = 0;
1870 verify_array_eq (in, expected, sizeof in);
1871
1872 memcpy (in, orig, sizeof in);
1873 /* Leave the first and last bits intact. */
1874 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
1875 expected[0] = 0x80;
1876 expected[1] = 0;
1877 expected[2] = 0x1;
1878 verify_array_eq (in, expected, sizeof in);
1879 }
1880
1881
1882 /* Run all of the selftests within this file. */
1883
1884 void
1885 store_merging_c_tests (void)
1886 {
1887 verify_shift_bytes_in_array ();
1888 verify_shift_bytes_in_array_right ();
1889 verify_clear_bit_region ();
1890 verify_clear_bit_region_be ();
1891 }
1892
1893 } // namespace selftest
1894 #endif /* CHECKING_P. */