-/* GIMPLE store merging pass.
- Copyright (C) 2016-2017 Free Software Foundation, Inc.
+/* GIMPLE store merging and byte swapping passes.
+ Copyright (C) 2009-2020 Free Software Foundation, Inc.
Contributed by ARM Ltd.
This file is part of GCC.
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
-/* The purpose of this pass is to combine multiple memory stores of
- constant values to consecutive memory locations into fewer wider stores.
+/* The purpose of the store merging pass is to combine multiple memory stores
+ of constant values, values loaded from memory, bitwise operations on those,
+ or bit-field values, to consecutive locations, into fewer wider stores.
+
For example, if we have a sequence peforming four byte stores to
consecutive memory locations:
[p ] := imm1;
[p + 2B] := imm3;
[p + 3B] := imm4;
we can transform this into a single 4-byte store if the target supports it:
- [p] := imm1:imm2:imm3:imm4 //concatenated immediates according to endianness.
+ [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
+
+ Or:
+ [p ] := [q ];
+ [p + 1B] := [q + 1B];
+ [p + 2B] := [q + 2B];
+ [p + 3B] := [q + 3B];
+ if there is no overlap can be transformed into a single 4-byte
+ load followed by single 4-byte store.
+
+ Or:
+ [p ] := [q ] ^ imm1;
+ [p + 1B] := [q + 1B] ^ imm2;
+ [p + 2B] := [q + 2B] ^ imm3;
+ [p + 3B] := [q + 3B] ^ imm4;
+ if there is no overlap can be transformed into a single 4-byte
+ load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
+
+ Or:
+ [p:1 ] := imm;
+ [p:31] := val & 0x7FFFFFFF;
+ we can transform this into a single 4-byte store if the target supports it:
+ [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
The algorithm is applied to each basic block in three phases:
- 1) Scan through the basic block recording constant assignments to
- destinations that can be expressed as a store to memory of a certain size
- at a certain bit offset. Record store chains to different bases in a
- hash_map (m_stores) and make sure to terminate such chains when appropriate
- (for example when when the stored values get used subsequently).
+ 1) Scan through the basic block and record assignments to destinations
+ that can be expressed as a store to memory of a certain size at a certain
+ bit offset from base expressions we can handle. For bit-fields we also
+ record the surrounding bit region, i.e. bits that could be stored in
+ a read-modify-write operation when storing the bit-field. Record store
+ chains to different bases in a hash_map (m_stores) and make sure to
+ terminate such chains when appropriate (for example when the stored
+ values get used subsequently).
These stores can be a result of structure element initializers, array stores
etc. A store_immediate_info object is recorded for every such store.
Record as many such assignments to a single base as possible until a
statement that interferes with the store sequence is encountered.
+ Each store has up to 2 operands, which can be a either constant, a memory
+ load or an SSA name, from which the value to be stored can be computed.
+ At most one of the operands can be a constant. The operands are recorded
+ in store_operand_info struct.
- 2) Analyze the chain of stores recorded in phase 1) (i.e. the vector of
+ 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
store_immediate_info objects) and coalesce contiguous stores into
- merged_store_group objects.
+ merged_store_group objects. For bit-field stores, we don't need to
+ require the stores to be contiguous, just their surrounding bit regions
+ have to be contiguous. If the expression being stored is different
+ between adjacent stores, such as one store storing a constant and
+ following storing a value loaded from memory, or if the loaded memory
+ objects are not adjacent, a new merged_store_group is created as well.
For example, given the stores:
[p ] := 0;
multiple stores per store group to handle contiguous stores that are not
of a size that is a power of 2. For example it can try to emit a 40-bit
store as a 32-bit store followed by an 8-bit store.
- We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT or
- SLOW_UNALIGNED_ACCESS rules.
+ We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
+ or TARGET_SLOW_UNALIGNED_ACCESS settings.
Note on endianness and example:
Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
#include "gimple-pretty-print.h"
#include "alias.h"
#include "fold-const.h"
-#include "params.h"
#include "print-tree.h"
#include "tree-hash-traits.h"
#include "gimple-iterator.h"
#include "gimplify.h"
+#include "gimple-fold.h"
#include "stor-layout.h"
#include "timevar.h"
+#include "cfganal.h"
+#include "cfgcleanup.h"
#include "tree-cfg.h"
+#include "except.h"
#include "tree-eh.h"
#include "target.h"
#include "gimplify-me.h"
+#include "rtl.h"
+#include "expr.h" /* For get_bit_range. */
+#include "optabs-tree.h"
+#include "dbgcnt.h"
#include "selftest.h"
/* The maximum size (in bits) of the stores this pass should generate. */
#define MAX_STORE_BITSIZE (BITS_PER_WORD)
#define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
-namespace {
+/* Limit to bound the number of aliasing checks for loads with the same
+ vuse as the corresponding store. */
+#define MAX_STORE_ALIAS_CHECKS 64
-/* Struct recording the information about a single store of an immediate
- to memory. These are created in the first phase and coalesced into
- merged_store_group objects in the second phase. */
+namespace {
-struct store_immediate_info
+struct bswap_stat
{
- unsigned HOST_WIDE_INT bitsize;
- unsigned HOST_WIDE_INT bitpos;
- gimple *stmt;
- unsigned int order;
- store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
- gimple *, unsigned int);
+ /* Number of hand-written 16-bit nop / bswaps found. */
+ int found_16bit;
+
+ /* Number of hand-written 32-bit nop / bswaps found. */
+ int found_32bit;
+
+ /* Number of hand-written 64-bit nop / bswaps found. */
+ int found_64bit;
+} nop_stats, bswap_stats;
+
+/* A symbolic number structure is used to detect byte permutation and selection
+ patterns of a source. To achieve that, its field N contains an artificial
+ number consisting of BITS_PER_MARKER sized markers tracking where does each
+ byte come from in the source:
+
+ 0 - target byte has the value 0
+ FF - target byte has an unknown value (eg. due to sign extension)
+ 1..size - marker value is the byte index in the source (0 for lsb).
+
+ To detect permutations on memory sources (arrays and structures), a symbolic
+ number is also associated:
+ - a base address BASE_ADDR and an OFFSET giving the address of the source;
+ - a range which gives the difference between the highest and lowest accessed
+ memory location to make such a symbolic number;
+ - the address SRC of the source element of lowest address as a convenience
+ to easily get BASE_ADDR + offset + lowest bytepos;
+ - number of expressions N_OPS bitwise ored together to represent
+ approximate cost of the computation.
+
+ Note 1: the range is different from size as size reflects the size of the
+ type of the current expression. For instance, for an array char a[],
+ (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
+ (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
+ time a range of 1.
+
+ Note 2: for non-memory sources, range holds the same value as size.
+
+ Note 3: SRC points to the SSA_NAME in case of non-memory source. */
+
+struct symbolic_number {
+ uint64_t n;
+ tree type;
+ tree base_addr;
+ tree offset;
+ poly_int64_pod bytepos;
+ tree src;
+ tree alias_set;
+ tree vuse;
+ unsigned HOST_WIDE_INT range;
+ int n_ops;
};
-store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
- unsigned HOST_WIDE_INT bp,
- gimple *st,
- unsigned int ord)
- : bitsize (bs), bitpos (bp), stmt (st), order (ord)
+#define BITS_PER_MARKER 8
+#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
+#define MARKER_BYTE_UNKNOWN MARKER_MASK
+#define HEAD_MARKER(n, size) \
+ ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
+
+/* The number which the find_bswap_or_nop_1 result should match in
+ order to have a nop. The number is masked according to the size of
+ the symbolic number before using it. */
+#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
+ (uint64_t)0x08070605 << 32 | 0x04030201)
+
+/* The number which the find_bswap_or_nop_1 result should match in
+ order to have a byte swap. The number is masked according to the
+ size of the symbolic number before using it. */
+#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
+ (uint64_t)0x01020304 << 32 | 0x05060708)
+
+/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
+ number N. Return false if the requested operation is not permitted
+ on a symbolic number. */
+
+inline bool
+do_shift_rotate (enum tree_code code,
+ struct symbolic_number *n,
+ int count)
{
+ int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+ unsigned head_marker;
+
+ if (count < 0
+ || count >= TYPE_PRECISION (n->type)
+ || count % BITS_PER_UNIT != 0)
+ return false;
+ count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
+
+ /* Zero out the extra bits of N in order to avoid them being shifted
+ into the significant bits. */
+ if (size < 64 / BITS_PER_MARKER)
+ n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
+
+ switch (code)
+ {
+ case LSHIFT_EXPR:
+ n->n <<= count;
+ break;
+ case RSHIFT_EXPR:
+ head_marker = HEAD_MARKER (n->n, size);
+ n->n >>= count;
+ /* Arithmetic shift of signed type: result is dependent on the value. */
+ if (!TYPE_UNSIGNED (n->type) && head_marker)
+ for (i = 0; i < count / BITS_PER_MARKER; i++)
+ n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
+ << ((size - 1 - i) * BITS_PER_MARKER);
+ break;
+ case LROTATE_EXPR:
+ n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
+ break;
+ case RROTATE_EXPR:
+ n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
+ break;
+ default:
+ return false;
+ }
+ /* Zero unused bits for size. */
+ if (size < 64 / BITS_PER_MARKER)
+ n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
+ return true;
}
-/* Struct representing a group of stores to contiguous memory locations.
- These are produced by the second phase (coalescing) and consumed in the
- third phase that outputs the widened stores. */
+/* Perform sanity checking for the symbolic number N and the gimple
+ statement STMT. */
-struct merged_store_group
+inline bool
+verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
{
- unsigned HOST_WIDE_INT start;
- unsigned HOST_WIDE_INT width;
- /* The size of the allocated memory for val. */
- unsigned HOST_WIDE_INT buf_size;
+ tree lhs_type;
- unsigned int align;
- unsigned int first_order;
- unsigned int last_order;
+ lhs_type = gimple_expr_type (stmt);
- auto_vec<struct store_immediate_info *> stores;
- /* We record the first and last original statements in the sequence because
- we'll need their vuse/vdef and replacement position. It's easier to keep
- track of them separately as 'stores' is reordered by apply_stores. */
- gimple *last_stmt;
- gimple *first_stmt;
- unsigned char *val;
+ if (TREE_CODE (lhs_type) != INTEGER_TYPE
+ && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
+ return false;
- merged_store_group (store_immediate_info *);
- ~merged_store_group ();
- void merge_into (store_immediate_info *);
- void merge_overlapping (store_immediate_info *);
- bool apply_stores ();
-};
+ if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
+ return false;
-/* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
+ return true;
+}
-static void
-dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
+/* Initialize the symbolic number N for the bswap pass from the base element
+ SRC manipulated by the bitwise OR expression. */
+
+bool
+init_symbolic_number (struct symbolic_number *n, tree src)
{
- if (!fd)
- return;
+ int size;
- for (unsigned int i = 0; i < len; i++)
- fprintf (fd, "%x ", ptr[i]);
- fprintf (fd, "\n");
+ if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
+ return false;
+
+ n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
+ n->src = src;
+
+ /* Set up the symbolic number N by setting each byte to a value between 1 and
+ the byte size of rhs1. The highest order byte is set to n->size and the
+ lowest order byte to 1. */
+ n->type = TREE_TYPE (src);
+ size = TYPE_PRECISION (n->type);
+ if (size % BITS_PER_UNIT != 0)
+ return false;
+ size /= BITS_PER_UNIT;
+ if (size > 64 / BITS_PER_MARKER)
+ return false;
+ n->range = size;
+ n->n = CMPNOP;
+ n->n_ops = 1;
+
+ if (size < 64 / BITS_PER_MARKER)
+ n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
+
+ return true;
}
-/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
- bits between adjacent elements. AMNT should be within
- [0, BITS_PER_UNIT).
- Example, AMNT = 2:
- 00011111|11100000 << 2 = 01111111|10000000
- PTR[1] | PTR[0] PTR[1] | PTR[0]. */
+/* Check if STMT might be a byte swap or a nop from a memory source and returns
+ the answer. If so, REF is that memory source and the base of the memory area
+ accessed and the offset of the access from that base are recorded in N. */
-static void
-shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt)
+bool
+find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
{
- if (amnt == 0)
- return;
+ /* Leaf node is an array or component ref. Memorize its base and
+ offset from base to compare to other such leaf node. */
+ poly_int64 bitsize, bitpos, bytepos;
+ machine_mode mode;
+ int unsignedp, reversep, volatilep;
+ tree offset, base_addr;
+
+ /* Not prepared to handle PDP endian. */
+ if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
+ return false;
+
+ if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
+ return false;
- unsigned char carry_over = 0U;
- unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
- unsigned char clear_mask = (~0U) << amnt;
+ base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
+ &unsignedp, &reversep, &volatilep);
- for (unsigned int i = 0; i < sz; i++)
+ if (TREE_CODE (base_addr) == TARGET_MEM_REF)
+ /* Do not rewrite TARGET_MEM_REF. */
+ return false;
+ else if (TREE_CODE (base_addr) == MEM_REF)
{
- unsigned prev_carry_over = carry_over;
- carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
+ poly_offset_int bit_offset = 0;
+ tree off = TREE_OPERAND (base_addr, 1);
- ptr[i] <<= amnt;
- if (i != 0)
+ if (!integer_zerop (off))
{
- ptr[i] &= clear_mask;
- ptr[i] |= prev_carry_over;
+ poly_offset_int boff = mem_ref_offset (base_addr);
+ boff <<= LOG2_BITS_PER_UNIT;
+ bit_offset += boff;
}
- }
-}
-/* Like shift_bytes_in_array but for big-endian.
- Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
- bits between adjacent elements. AMNT should be within
- [0, BITS_PER_UNIT).
- Example, AMNT = 2:
- 00011111|11100000 >> 2 = 00000111|11111000
- PTR[0] | PTR[1] PTR[0] | PTR[1]. */
+ base_addr = TREE_OPERAND (base_addr, 0);
-static void
-shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
- unsigned int amnt)
-{
- if (amnt == 0)
- return;
+ /* Avoid returning a negative bitpos as this may wreak havoc later. */
+ if (maybe_lt (bit_offset, 0))
+ {
+ tree byte_offset = wide_int_to_tree
+ (sizetype, bits_to_bytes_round_down (bit_offset));
+ bit_offset = num_trailing_bits (bit_offset);
+ if (offset)
+ offset = size_binop (PLUS_EXPR, offset, byte_offset);
+ else
+ offset = byte_offset;
+ }
- unsigned char carry_over = 0U;
- unsigned char carry_mask = ~(~0U << amnt);
+ bitpos += bit_offset.force_shwi ();
+ }
+ else
+ base_addr = build_fold_addr_expr (base_addr);
- for (unsigned int i = 0; i < sz; i++)
- {
- unsigned prev_carry_over = carry_over;
- carry_over = ptr[i] & carry_mask;
+ if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
+ return false;
+ if (!multiple_p (bitsize, BITS_PER_UNIT))
+ return false;
+ if (reversep)
+ return false;
- carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
- ptr[i] >>= amnt;
- ptr[i] |= prev_carry_over;
- }
+ if (!init_symbolic_number (n, ref))
+ return false;
+ n->base_addr = base_addr;
+ n->offset = offset;
+ n->bytepos = bytepos;
+ n->alias_set = reference_alias_ptr_type (ref);
+ n->vuse = gimple_vuse (stmt);
+ return true;
}
-/* Clear out LEN bits starting from bit START in the byte array
- PTR. This clears the bits to the *right* from START.
- START must be within [0, BITS_PER_UNIT) and counts starting from
- the least significant bit. */
+/* Compute the symbolic number N representing the result of a bitwise OR on 2
+ symbolic number N1 and N2 whose source statements are respectively
+ SOURCE_STMT1 and SOURCE_STMT2. */
-static void
-clear_bit_region_be (unsigned char *ptr, unsigned int start,
- unsigned int len)
+gimple *
+perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
+ gimple *source_stmt2, struct symbolic_number *n2,
+ struct symbolic_number *n)
{
- if (len == 0)
- return;
- /* Clear len bits to the right of start. */
- else if (len <= start + 1)
+ int i, size;
+ uint64_t mask;
+ gimple *source_stmt;
+ struct symbolic_number *n_start;
+
+ tree rhs1 = gimple_assign_rhs1 (source_stmt1);
+ if (TREE_CODE (rhs1) == BIT_FIELD_REF
+ && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
+ rhs1 = TREE_OPERAND (rhs1, 0);
+ tree rhs2 = gimple_assign_rhs1 (source_stmt2);
+ if (TREE_CODE (rhs2) == BIT_FIELD_REF
+ && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
+ rhs2 = TREE_OPERAND (rhs2, 0);
+
+ /* Sources are different, cancel bswap if they are not memory location with
+ the same base (array, structure, ...). */
+ if (rhs1 != rhs2)
{
- unsigned char mask = (~(~0U << len));
- mask = mask << (start + 1U - len);
- ptr[0] &= ~mask;
+ uint64_t inc;
+ HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
+ struct symbolic_number *toinc_n_ptr, *n_end;
+ basic_block bb1, bb2;
+
+ if (!n1->base_addr || !n2->base_addr
+ || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
+ return NULL;
+
+ if (!n1->offset != !n2->offset
+ || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
+ return NULL;
+
+ start1 = 0;
+ if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
+ return NULL;
+
+ if (start1 < start2)
+ {
+ n_start = n1;
+ start_sub = start2 - start1;
+ }
+ else
+ {
+ n_start = n2;
+ start_sub = start1 - start2;
+ }
+
+ bb1 = gimple_bb (source_stmt1);
+ bb2 = gimple_bb (source_stmt2);
+ if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
+ source_stmt = source_stmt1;
+ else
+ source_stmt = source_stmt2;
+
+ /* Find the highest address at which a load is performed and
+ compute related info. */
+ end1 = start1 + (n1->range - 1);
+ end2 = start2 + (n2->range - 1);
+ if (end1 < end2)
+ {
+ end = end2;
+ end_sub = end2 - end1;
+ }
+ else
+ {
+ end = end1;
+ end_sub = end1 - end2;
+ }
+ n_end = (end2 > end1) ? n2 : n1;
+
+ /* Find symbolic number whose lsb is the most significant. */
+ if (BYTES_BIG_ENDIAN)
+ toinc_n_ptr = (n_end == n1) ? n2 : n1;
+ else
+ toinc_n_ptr = (n_start == n1) ? n2 : n1;
+
+ n->range = end - MIN (start1, start2) + 1;
+
+ /* Check that the range of memory covered can be represented by
+ a symbolic number. */
+ if (n->range > 64 / BITS_PER_MARKER)
+ return NULL;
+
+ /* Reinterpret byte marks in symbolic number holding the value of
+ bigger weight according to target endianness. */
+ inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
+ size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
+ for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
+ {
+ unsigned marker
+ = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
+ if (marker && marker != MARKER_BYTE_UNKNOWN)
+ toinc_n_ptr->n += inc;
+ }
}
- else if (start != BITS_PER_UNIT - 1)
+ else
{
- clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
- clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
- len - (start % BITS_PER_UNIT) - 1);
+ n->range = n1->range;
+ n_start = n1;
+ source_stmt = source_stmt1;
}
- else if (start == BITS_PER_UNIT - 1
- && len > BITS_PER_UNIT)
+
+ if (!n1->alias_set
+ || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
+ n->alias_set = n1->alias_set;
+ else
+ n->alias_set = ptr_type_node;
+ n->vuse = n_start->vuse;
+ n->base_addr = n_start->base_addr;
+ n->offset = n_start->offset;
+ n->src = n_start->src;
+ n->bytepos = n_start->bytepos;
+ n->type = n_start->type;
+ size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+
+ for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
{
- unsigned int nbytes = len / BITS_PER_UNIT;
- for (unsigned int i = 0; i < nbytes; i++)
- ptr[i] = 0U;
- if (len % BITS_PER_UNIT != 0)
- clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
- len % BITS_PER_UNIT);
+ uint64_t masked1, masked2;
+
+ masked1 = n1->n & mask;
+ masked2 = n2->n & mask;
+ if (masked1 && masked2 && masked1 != masked2)
+ return NULL;
}
- else
- gcc_unreachable ();
+ n->n = n1->n | n2->n;
+ n->n_ops = n1->n_ops + n2->n_ops;
+
+ return source_stmt;
}
-/* In the byte array PTR clear the bit region starting at bit
- START and is LEN bits wide.
- For regions spanning multiple bytes do this recursively until we reach
- zero LEN or a region contained within a single byte. */
+/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
+ the operation given by the rhs of STMT on the result. If the operation
+ could successfully be executed the function returns a gimple stmt whose
+ rhs's first tree is the expression of the source operand and NULL
+ otherwise. */
-static void
-clear_bit_region (unsigned char *ptr, unsigned int start,
- unsigned int len)
+gimple *
+find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
{
- /* Degenerate base case. */
- if (len == 0)
- return;
- else if (start >= BITS_PER_UNIT)
- clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
- /* Second base case. */
- else if ((start + len) <= BITS_PER_UNIT)
- {
- unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
- mask >>= BITS_PER_UNIT - (start + len);
+ enum tree_code code;
+ tree rhs1, rhs2 = NULL;
+ gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
+ enum gimple_rhs_class rhs_class;
- ptr[0] &= ~mask;
+ if (!limit || !is_gimple_assign (stmt))
+ return NULL;
- return;
- }
- /* Clear most significant bits in a byte and proceed with the next byte. */
- else if (start != 0)
+ rhs1 = gimple_assign_rhs1 (stmt);
+
+ if (find_bswap_or_nop_load (stmt, rhs1, n))
+ return stmt;
+
+ /* Handle BIT_FIELD_REF. */
+ if (TREE_CODE (rhs1) == BIT_FIELD_REF
+ && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
{
- clear_bit_region (ptr, start, BITS_PER_UNIT - start);
- clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
+ if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1, 1))
+ || !tree_fits_uhwi_p (TREE_OPERAND (rhs1, 2)))
+ return NULL;
+
+ unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
+ unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
+ if (bitpos % BITS_PER_UNIT == 0
+ && bitsize % BITS_PER_UNIT == 0
+ && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
+ {
+ /* Handle big-endian bit numbering in BIT_FIELD_REF. */
+ if (BYTES_BIG_ENDIAN)
+ bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
+
+ /* Shift. */
+ if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
+ return NULL;
+
+ /* Mask. */
+ uint64_t mask = 0;
+ uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
+ for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
+ i++, tmp <<= BITS_PER_UNIT)
+ mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
+ n->n &= mask;
+
+ /* Convert. */
+ n->type = TREE_TYPE (rhs1);
+ if (!n->base_addr)
+ n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+
+ return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
+ }
+
+ return NULL;
}
- /* Whole bytes need to be cleared. */
- else if (start == 0 && len > BITS_PER_UNIT)
+
+ if (TREE_CODE (rhs1) != SSA_NAME)
+ return NULL;
+
+ code = gimple_assign_rhs_code (stmt);
+ rhs_class = gimple_assign_rhs_class (stmt);
+ rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
+
+ if (rhs_class == GIMPLE_BINARY_RHS)
+ rhs2 = gimple_assign_rhs2 (stmt);
+
+ /* Handle unary rhs and binary rhs with integer constants as second
+ operand. */
+
+ if (rhs_class == GIMPLE_UNARY_RHS
+ || (rhs_class == GIMPLE_BINARY_RHS
+ && TREE_CODE (rhs2) == INTEGER_CST))
{
- unsigned int nbytes = len / BITS_PER_UNIT;
- /* We could recurse on each byte but we clear whole bytes, so a simple
- memset will do. */
- memset (ptr, '\0', nbytes);
- /* Clear the remaining sub-byte region if there is one. */
- if (len % BITS_PER_UNIT != 0)
- clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
+ if (code != BIT_AND_EXPR
+ && code != LSHIFT_EXPR
+ && code != RSHIFT_EXPR
+ && code != LROTATE_EXPR
+ && code != RROTATE_EXPR
+ && !CONVERT_EXPR_CODE_P (code))
+ return NULL;
+
+ source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
+
+ /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
+ we have to initialize the symbolic number. */
+ if (!source_stmt1)
+ {
+ if (gimple_assign_load_p (stmt)
+ || !init_symbolic_number (n, rhs1))
+ return NULL;
+ source_stmt1 = stmt;
+ }
+
+ switch (code)
+ {
+ case BIT_AND_EXPR:
+ {
+ int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+ uint64_t val = int_cst_value (rhs2), mask = 0;
+ uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
+
+ /* Only constants masking full bytes are allowed. */
+ for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
+ if ((val & tmp) != 0 && (val & tmp) != tmp)
+ return NULL;
+ else if (val & tmp)
+ mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
+
+ n->n &= mask;
+ }
+ break;
+ case LSHIFT_EXPR:
+ case RSHIFT_EXPR:
+ case LROTATE_EXPR:
+ case RROTATE_EXPR:
+ if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
+ return NULL;
+ break;
+ CASE_CONVERT:
+ {
+ int i, type_size, old_type_size;
+ tree type;
+
+ type = gimple_expr_type (stmt);
+ type_size = TYPE_PRECISION (type);
+ if (type_size % BITS_PER_UNIT != 0)
+ return NULL;
+ type_size /= BITS_PER_UNIT;
+ if (type_size > 64 / BITS_PER_MARKER)
+ return NULL;
+
+ /* Sign extension: result is dependent on the value. */
+ old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+ if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
+ && HEAD_MARKER (n->n, old_type_size))
+ for (i = 0; i < type_size - old_type_size; i++)
+ n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
+ << ((type_size - 1 - i) * BITS_PER_MARKER);
+
+ if (type_size < 64 / BITS_PER_MARKER)
+ {
+ /* If STMT casts to a smaller type mask out the bits not
+ belonging to the target type. */
+ n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
+ }
+ n->type = type;
+ if (!n->base_addr)
+ n->range = type_size;
+ }
+ break;
+ default:
+ return NULL;
+ };
+ return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
}
- else
- gcc_unreachable ();
-}
-/* Write BITLEN bits of EXPR to the byte array PTR at
- bit position BITPOS. PTR should contain TOTAL_BYTES elements.
- Return true if the operation succeeded. */
+ /* Handle binary rhs. */
-static bool
-encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
- unsigned int total_bytes)
-{
- unsigned int first_byte = bitpos / BITS_PER_UNIT;
- tree tmp_int = expr;
- bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
- || (bitpos % BITS_PER_UNIT)
- || !int_mode_for_size (bitlen, 0).exists ());
+ if (rhs_class == GIMPLE_BINARY_RHS)
+ {
+ struct symbolic_number n1, n2;
+ gimple *source_stmt, *source_stmt2;
- if (!sub_byte_op_p)
- return (native_encode_expr (tmp_int, ptr + first_byte, total_bytes, 0)
- != 0);
+ if (code != BIT_IOR_EXPR)
+ return NULL;
- /* LITTLE-ENDIAN
- We are writing a non byte-sized quantity or at a position that is not
- at a byte boundary.
- |--------|--------|--------| ptr + first_byte
- ^ ^
- xxx xxxxxxxx xxx< bp>
- |______EXPR____|
+ if (TREE_CODE (rhs2) != SSA_NAME)
+ return NULL;
- First native_encode_expr EXPR into a temporary buffer and shift each
- byte in the buffer by 'bp' (carrying the bits over as necessary).
- |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
- <------bitlen---->< bp>
- Then we clear the destination bits:
- |---00000|00000000|000-----| ptr + first_byte
- <-------bitlen--->< bp>
+ rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
- Finally we ORR the bytes of the shifted EXPR into the cleared region:
- |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
+ switch (code)
+ {
+ case BIT_IOR_EXPR:
+ source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
- BIG-ENDIAN
- We are writing a non byte-sized quantity or at a position that is not
- at a byte boundary.
- ptr + first_byte |--------|--------|--------|
- ^ ^
- <bp >xxx xxxxxxxx xxx
- |_____EXPR_____|
+ if (!source_stmt1)
+ return NULL;
- First native_encode_expr EXPR into a temporary buffer and shift each
- byte in the buffer to the right by (carrying the bits over as necessary).
- We shift by as much as needed to align the most significant bit of EXPR
- with bitpos:
- |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
- <---bitlen----> <bp ><-----bitlen----->
- Then we clear the destination bits:
- ptr + first_byte |-----000||00000000||00000---|
- <bp ><-------bitlen----->
+ source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
- Finally we ORR the bytes of the shifted EXPR into the cleared region:
- ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
- The awkwardness comes from the fact that bitpos is counted from the
- most significant bit of a byte. */
+ if (!source_stmt2)
+ return NULL;
- /* Allocate an extra byte so that we have space to shift into. */
- unsigned int byte_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (expr))) + 1;
- unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
- memset (tmpbuf, '\0', byte_size);
- /* The store detection code should only have allowed constants that are
- accepted by native_encode_expr. */
- if (native_encode_expr (expr, tmpbuf, byte_size - 1, 0) == 0)
- gcc_unreachable ();
+ if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
+ return NULL;
- /* The native_encode_expr machinery uses TYPE_MODE to determine how many
- bytes to write. This means it can write more than
- ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
+ if (n1.vuse != n2.vuse)
+ return NULL;
+
+ source_stmt
+ = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
+
+ if (!source_stmt)
+ return NULL;
+
+ if (!verify_symbolic_number_p (n, stmt))
+ return NULL;
+
+ break;
+ default:
+ return NULL;
+ }
+ return source_stmt;
+ }
+ return NULL;
+}
+
+/* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
+ *CMPXCHG, *CMPNOP and adjust *N. */
+
+void
+find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
+ uint64_t *cmpnop)
+{
+ unsigned rsize;
+ uint64_t tmpn, mask;
+
+ /* The number which the find_bswap_or_nop_1 result should match in order
+ to have a full byte swap. The number is shifted to the right
+ according to the size of the symbolic number before using it. */
+ *cmpxchg = CMPXCHG;
+ *cmpnop = CMPNOP;
+
+ /* Find real size of result (highest non-zero byte). */
+ if (n->base_addr)
+ for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
+ else
+ rsize = n->range;
+
+ /* Zero out the bits corresponding to untouched bytes in original gimple
+ expression. */
+ if (n->range < (int) sizeof (int64_t))
+ {
+ mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
+ *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
+ *cmpnop &= mask;
+ }
+
+ /* Zero out the bits corresponding to unused bytes in the result of the
+ gimple expression. */
+ if (rsize < n->range)
+ {
+ if (BYTES_BIG_ENDIAN)
+ {
+ mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
+ *cmpxchg &= mask;
+ *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
+ }
+ else
+ {
+ mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
+ *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
+ *cmpnop &= mask;
+ }
+ n->range = rsize;
+ }
+
+ n->range *= BITS_PER_UNIT;
+}
+
+/* Check if STMT completes a bswap implementation or a read in a given
+ endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
+ accordingly. It also sets N to represent the kind of operations
+ performed: size of the resulting expression and whether it works on
+ a memory source, and if so alias-set and vuse. At last, the
+ function returns a stmt whose rhs's first tree is the source
+ expression. */
+
+gimple *
+find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
+{
+ /* The last parameter determines the depth search limit. It usually
+ correlates directly to the number n of bytes to be touched. We
+ increase that number by 2 * (log2(n) + 1) here in order to also
+ cover signed -> unsigned conversions of the src operand as can be seen
+ in libgcc, and for initial shift/and operation of the src operand. */
+ int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
+ limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
+ gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
+
+ if (!ins_stmt)
+ return NULL;
+
+ uint64_t cmpxchg, cmpnop;
+ find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
+
+ /* A complete byte swap should make the symbolic number to start with
+ the largest digit in the highest order byte. Unchanged symbolic
+ number indicates a read with same endianness as target architecture. */
+ if (n->n == cmpnop)
+ *bswap = false;
+ else if (n->n == cmpxchg)
+ *bswap = true;
+ else
+ return NULL;
+
+ /* Useless bit manipulation performed by code. */
+ if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
+ return NULL;
+
+ return ins_stmt;
+}
+
+const pass_data pass_data_optimize_bswap =
+{
+ GIMPLE_PASS, /* type */
+ "bswap", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_optimize_bswap : public gimple_opt_pass
+{
+public:
+ pass_optimize_bswap (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+ return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
+ }
+
+ virtual unsigned int execute (function *);
+
+}; // class pass_optimize_bswap
+
+/* Perform the bswap optimization: replace the expression computed in the rhs
+ of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
+ bswap, load or load + bswap expression.
+ Which of these alternatives replace the rhs is given by N->base_addr (non
+ null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
+ load to perform are also given in N while the builtin bswap invoke is given
+ in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
+ load statements involved to construct the rhs in gsi_stmt (GSI) and
+ N->range gives the size of the rhs expression for maintaining some
+ statistics.
+
+ Note that if the replacement involve a load and if gsi_stmt (GSI) is
+ non-NULL, that stmt is moved just after INS_STMT to do the load with the
+ same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
+
+tree
+bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
+ tree bswap_type, tree load_type, struct symbolic_number *n,
+ bool bswap)
+{
+ tree src, tmp, tgt = NULL_TREE;
+ gimple *bswap_stmt;
+
+ gimple *cur_stmt = gsi_stmt (gsi);
+ src = n->src;
+ if (cur_stmt)
+ tgt = gimple_assign_lhs (cur_stmt);
+
+ /* Need to load the value from memory first. */
+ if (n->base_addr)
+ {
+ gimple_stmt_iterator gsi_ins = gsi;
+ if (ins_stmt)
+ gsi_ins = gsi_for_stmt (ins_stmt);
+ tree addr_expr, addr_tmp, val_expr, val_tmp;
+ tree load_offset_ptr, aligned_load_type;
+ gimple *load_stmt;
+ unsigned align = get_object_alignment (src);
+ poly_int64 load_offset = 0;
+
+ if (cur_stmt)
+ {
+ basic_block ins_bb = gimple_bb (ins_stmt);
+ basic_block cur_bb = gimple_bb (cur_stmt);
+ if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
+ return NULL_TREE;
+
+ /* Move cur_stmt just before one of the load of the original
+ to ensure it has the same VUSE. See PR61517 for what could
+ go wrong. */
+ if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
+ reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
+ gsi_move_before (&gsi, &gsi_ins);
+ gsi = gsi_for_stmt (cur_stmt);
+ }
+ else
+ gsi = gsi_ins;
+
+ /* Compute address to load from and cast according to the size
+ of the load. */
+ addr_expr = build_fold_addr_expr (src);
+ if (is_gimple_mem_ref_addr (addr_expr))
+ addr_tmp = unshare_expr (addr_expr);
+ else
+ {
+ addr_tmp = unshare_expr (n->base_addr);
+ if (!is_gimple_mem_ref_addr (addr_tmp))
+ addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
+ is_gimple_mem_ref_addr,
+ NULL_TREE, true,
+ GSI_SAME_STMT);
+ load_offset = n->bytepos;
+ if (n->offset)
+ {
+ tree off
+ = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
+ true, NULL_TREE, true,
+ GSI_SAME_STMT);
+ gimple *stmt
+ = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
+ POINTER_PLUS_EXPR, addr_tmp, off);
+ gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+ addr_tmp = gimple_assign_lhs (stmt);
+ }
+ }
+
+ /* Perform the load. */
+ aligned_load_type = load_type;
+ if (align < TYPE_ALIGN (load_type))
+ aligned_load_type = build_aligned_type (load_type, align);
+ load_offset_ptr = build_int_cst (n->alias_set, load_offset);
+ val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
+ load_offset_ptr);
+
+ if (!bswap)
+ {
+ if (n->range == 16)
+ nop_stats.found_16bit++;
+ else if (n->range == 32)
+ nop_stats.found_32bit++;
+ else
+ {
+ gcc_assert (n->range == 64);
+ nop_stats.found_64bit++;
+ }
+
+ /* Convert the result of load if necessary. */
+ if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
+ {
+ val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
+ "load_dst");
+ load_stmt = gimple_build_assign (val_tmp, val_expr);
+ gimple_set_vuse (load_stmt, n->vuse);
+ gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
+ gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
+ update_stmt (cur_stmt);
+ }
+ else if (cur_stmt)
+ {
+ gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
+ gimple_set_vuse (cur_stmt, n->vuse);
+ update_stmt (cur_stmt);
+ }
+ else
+ {
+ tgt = make_ssa_name (load_type);
+ cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
+ gimple_set_vuse (cur_stmt, n->vuse);
+ gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "%d bit load in target endianness found at: ",
+ (int) n->range);
+ print_gimple_stmt (dump_file, cur_stmt, 0);
+ }
+ return tgt;
+ }
+ else
+ {
+ val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
+ load_stmt = gimple_build_assign (val_tmp, val_expr);
+ gimple_set_vuse (load_stmt, n->vuse);
+ gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
+ }
+ src = val_tmp;
+ }
+ else if (!bswap)
+ {
+ gimple *g = NULL;
+ if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
+ {
+ if (!is_gimple_val (src))
+ return NULL_TREE;
+ g = gimple_build_assign (tgt, NOP_EXPR, src);
+ }
+ else if (cur_stmt)
+ g = gimple_build_assign (tgt, src);
+ else
+ tgt = src;
+ if (n->range == 16)
+ nop_stats.found_16bit++;
+ else if (n->range == 32)
+ nop_stats.found_32bit++;
+ else
+ {
+ gcc_assert (n->range == 64);
+ nop_stats.found_64bit++;
+ }
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "%d bit reshuffle in target endianness found at: ",
+ (int) n->range);
+ if (cur_stmt)
+ print_gimple_stmt (dump_file, cur_stmt, 0);
+ else
+ {
+ print_generic_expr (dump_file, tgt, TDF_NONE);
+ fprintf (dump_file, "\n");
+ }
+ }
+ if (cur_stmt)
+ gsi_replace (&gsi, g, true);
+ return tgt;
+ }
+ else if (TREE_CODE (src) == BIT_FIELD_REF)
+ src = TREE_OPERAND (src, 0);
+
+ if (n->range == 16)
+ bswap_stats.found_16bit++;
+ else if (n->range == 32)
+ bswap_stats.found_32bit++;
+ else
+ {
+ gcc_assert (n->range == 64);
+ bswap_stats.found_64bit++;
+ }
+
+ tmp = src;
+
+ /* Convert the src expression if necessary. */
+ if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
+ {
+ gimple *convert_stmt;
+
+ tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
+ convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
+ gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
+ }
+
+ /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
+ are considered as rotation of 2N bit values by N bits is generally not
+ equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
+ gives 0x03040102 while a bswap for that value is 0x04030201. */
+ if (bswap && n->range == 16)
+ {
+ tree count = build_int_cst (NULL, BITS_PER_UNIT);
+ src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
+ bswap_stmt = gimple_build_assign (NULL, src);
+ }
+ else
+ bswap_stmt = gimple_build_call (fndecl, 1, tmp);
+
+ if (tgt == NULL_TREE)
+ tgt = make_ssa_name (bswap_type);
+ tmp = tgt;
+
+ /* Convert the result if necessary. */
+ if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
+ {
+ gimple *convert_stmt;
+
+ tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
+ convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
+ gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
+ }
+
+ gimple_set_lhs (bswap_stmt, tmp);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "%d bit bswap implementation found at: ",
+ (int) n->range);
+ if (cur_stmt)
+ print_gimple_stmt (dump_file, cur_stmt, 0);
+ else
+ {
+ print_generic_expr (dump_file, tgt, TDF_NONE);
+ fprintf (dump_file, "\n");
+ }
+ }
+
+ if (cur_stmt)
+ {
+ gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
+ gsi_remove (&gsi, true);
+ }
+ else
+ gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
+ return tgt;
+}
+
+/* Find manual byte swap implementations as well as load in a given
+ endianness. Byte swaps are turned into a bswap builtin invokation
+ while endian loads are converted to bswap builtin invokation or
+ simple load according to the target endianness. */
+
+unsigned int
+pass_optimize_bswap::execute (function *fun)
+{
+ basic_block bb;
+ bool bswap32_p, bswap64_p;
+ bool changed = false;
+ tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
+
+ bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
+ && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
+ bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
+ && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
+ || (bswap32_p && word_mode == SImode)));
+
+ /* Determine the argument type of the builtins. The code later on
+ assumes that the return and argument type are the same. */
+ if (bswap32_p)
+ {
+ tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
+ bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
+ }
+
+ if (bswap64_p)
+ {
+ tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
+ bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
+ }
+
+ memset (&nop_stats, 0, sizeof (nop_stats));
+ memset (&bswap_stats, 0, sizeof (bswap_stats));
+ calculate_dominance_info (CDI_DOMINATORS);
+
+ FOR_EACH_BB_FN (bb, fun)
+ {
+ gimple_stmt_iterator gsi;
+
+ /* We do a reverse scan for bswap patterns to make sure we get the
+ widest match. As bswap pattern matching doesn't handle previously
+ inserted smaller bswap replacements as sub-patterns, the wider
+ variant wouldn't be detected. */
+ for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
+ {
+ gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
+ tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
+ enum tree_code code;
+ struct symbolic_number n;
+ bool bswap;
+
+ /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
+ might be moved to a different basic block by bswap_replace and gsi
+ must not points to it if that's the case. Moving the gsi_prev
+ there make sure that gsi points to the statement previous to
+ cur_stmt while still making sure that all statements are
+ considered in this basic block. */
+ gsi_prev (&gsi);
+
+ if (!is_gimple_assign (cur_stmt))
+ continue;
+
+ code = gimple_assign_rhs_code (cur_stmt);
+ switch (code)
+ {
+ case LROTATE_EXPR:
+ case RROTATE_EXPR:
+ if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
+ || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
+ % BITS_PER_UNIT)
+ continue;
+ /* Fall through. */
+ case BIT_IOR_EXPR:
+ break;
+ default:
+ continue;
+ }
+
+ ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
+
+ if (!ins_stmt)
+ continue;
+
+ switch (n.range)
+ {
+ case 16:
+ /* Already in canonical form, nothing to do. */
+ if (code == LROTATE_EXPR || code == RROTATE_EXPR)
+ continue;
+ load_type = bswap_type = uint16_type_node;
+ break;
+ case 32:
+ load_type = uint32_type_node;
+ if (bswap32_p)
+ {
+ fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
+ bswap_type = bswap32_type;
+ }
+ break;
+ case 64:
+ load_type = uint64_type_node;
+ if (bswap64_p)
+ {
+ fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
+ bswap_type = bswap64_type;
+ }
+ break;
+ default:
+ continue;
+ }
+
+ if (bswap && !fndecl && n.range != 16)
+ continue;
+
+ if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
+ bswap_type, load_type, &n, bswap))
+ changed = true;
+ }
+ }
+
+ statistics_counter_event (fun, "16-bit nop implementations found",
+ nop_stats.found_16bit);
+ statistics_counter_event (fun, "32-bit nop implementations found",
+ nop_stats.found_32bit);
+ statistics_counter_event (fun, "64-bit nop implementations found",
+ nop_stats.found_64bit);
+ statistics_counter_event (fun, "16-bit bswap implementations found",
+ bswap_stats.found_16bit);
+ statistics_counter_event (fun, "32-bit bswap implementations found",
+ bswap_stats.found_32bit);
+ statistics_counter_event (fun, "64-bit bswap implementations found",
+ bswap_stats.found_64bit);
+
+ return (changed ? TODO_update_ssa : 0);
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_optimize_bswap (gcc::context *ctxt)
+{
+ return new pass_optimize_bswap (ctxt);
+}
+
+namespace {
+
+/* Struct recording one operand for the store, which is either a constant,
+ then VAL represents the constant and all the other fields are zero, or
+ a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
+ and the other fields also reflect the memory load, or an SSA name, then
+ VAL represents the SSA name and all the other fields are zero, */
+
+class store_operand_info
+{
+public:
+ tree val;
+ tree base_addr;
+ poly_uint64 bitsize;
+ poly_uint64 bitpos;
+ poly_uint64 bitregion_start;
+ poly_uint64 bitregion_end;
+ gimple *stmt;
+ bool bit_not_p;
+ store_operand_info ();
+};
+
+store_operand_info::store_operand_info ()
+ : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
+ bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
+{
+}
+
+/* Struct recording the information about a single store of an immediate
+ to memory. These are created in the first phase and coalesced into
+ merged_store_group objects in the second phase. */
+
+class store_immediate_info
+{
+public:
+ unsigned HOST_WIDE_INT bitsize;
+ unsigned HOST_WIDE_INT bitpos;
+ unsigned HOST_WIDE_INT bitregion_start;
+ /* This is one past the last bit of the bit region. */
+ unsigned HOST_WIDE_INT bitregion_end;
+ gimple *stmt;
+ unsigned int order;
+ /* INTEGER_CST for constant store, STRING_CST for string store,
+ MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
+ BIT_INSERT_EXPR for bit insertion.
+ LROTATE_EXPR if it can be only bswap optimized and
+ ops are not really meaningful.
+ NOP_EXPR if bswap optimization detected identity, ops
+ are not meaningful. */
+ enum tree_code rhs_code;
+ /* Two fields for bswap optimization purposes. */
+ struct symbolic_number n;
+ gimple *ins_stmt;
+ /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
+ bool bit_not_p;
+ /* True if ops have been swapped and thus ops[1] represents
+ rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
+ bool ops_swapped_p;
+ /* The index number of the landing pad, or 0 if there is none. */
+ int lp_nr;
+ /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
+ just the first one. */
+ store_operand_info ops[2];
+ store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
+ unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
+ gimple *, unsigned int, enum tree_code,
+ struct symbolic_number &, gimple *, bool, int,
+ const store_operand_info &,
+ const store_operand_info &);
+};
+
+store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
+ unsigned HOST_WIDE_INT bp,
+ unsigned HOST_WIDE_INT brs,
+ unsigned HOST_WIDE_INT bre,
+ gimple *st,
+ unsigned int ord,
+ enum tree_code rhscode,
+ struct symbolic_number &nr,
+ gimple *ins_stmtp,
+ bool bitnotp,
+ int nr2,
+ const store_operand_info &op0r,
+ const store_operand_info &op1r)
+ : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
+ stmt (st), order (ord), rhs_code (rhscode), n (nr),
+ ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
+ lp_nr (nr2)
+#if __cplusplus >= 201103L
+ , ops { op0r, op1r }
+{
+}
+#else
+{
+ ops[0] = op0r;
+ ops[1] = op1r;
+}
+#endif
+
+/* Struct representing a group of stores to contiguous memory locations.
+ These are produced by the second phase (coalescing) and consumed in the
+ third phase that outputs the widened stores. */
+
+class merged_store_group
+{
+public:
+ unsigned HOST_WIDE_INT start;
+ unsigned HOST_WIDE_INT width;
+ unsigned HOST_WIDE_INT bitregion_start;
+ unsigned HOST_WIDE_INT bitregion_end;
+ /* The size of the allocated memory for val and mask. */
+ unsigned HOST_WIDE_INT buf_size;
+ unsigned HOST_WIDE_INT align_base;
+ poly_uint64 load_align_base[2];
+
+ unsigned int align;
+ unsigned int load_align[2];
+ unsigned int first_order;
+ unsigned int last_order;
+ bool bit_insertion;
+ bool string_concatenation;
+ bool only_constants;
+ unsigned int first_nonmergeable_order;
+ int lp_nr;
+
+ auto_vec<store_immediate_info *> stores;
+ /* We record the first and last original statements in the sequence because
+ we'll need their vuse/vdef and replacement position. It's easier to keep
+ track of them separately as 'stores' is reordered by apply_stores. */
+ gimple *last_stmt;
+ gimple *first_stmt;
+ unsigned char *val;
+ unsigned char *mask;
+
+ merged_store_group (store_immediate_info *);
+ ~merged_store_group ();
+ bool can_be_merged_into (store_immediate_info *);
+ void merge_into (store_immediate_info *);
+ void merge_overlapping (store_immediate_info *);
+ bool apply_stores ();
+private:
+ void do_merge (store_immediate_info *);
+};
+
+/* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
+
+static void
+dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
+{
+ if (!fd)
+ return;
+
+ for (unsigned int i = 0; i < len; i++)
+ fprintf (fd, "%02x ", ptr[i]);
+ fprintf (fd, "\n");
+}
+
+/* Clear out LEN bits starting from bit START in the byte array
+ PTR. This clears the bits to the *right* from START.
+ START must be within [0, BITS_PER_UNIT) and counts starting from
+ the least significant bit. */
+
+static void
+clear_bit_region_be (unsigned char *ptr, unsigned int start,
+ unsigned int len)
+{
+ if (len == 0)
+ return;
+ /* Clear len bits to the right of start. */
+ else if (len <= start + 1)
+ {
+ unsigned char mask = (~(~0U << len));
+ mask = mask << (start + 1U - len);
+ ptr[0] &= ~mask;
+ }
+ else if (start != BITS_PER_UNIT - 1)
+ {
+ clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
+ clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
+ len - (start % BITS_PER_UNIT) - 1);
+ }
+ else if (start == BITS_PER_UNIT - 1
+ && len > BITS_PER_UNIT)
+ {
+ unsigned int nbytes = len / BITS_PER_UNIT;
+ memset (ptr, 0, nbytes);
+ if (len % BITS_PER_UNIT != 0)
+ clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
+ len % BITS_PER_UNIT);
+ }
+ else
+ gcc_unreachable ();
+}
+
+/* In the byte array PTR clear the bit region starting at bit
+ START and is LEN bits wide.
+ For regions spanning multiple bytes do this recursively until we reach
+ zero LEN or a region contained within a single byte. */
+
+static void
+clear_bit_region (unsigned char *ptr, unsigned int start,
+ unsigned int len)
+{
+ /* Degenerate base case. */
+ if (len == 0)
+ return;
+ else if (start >= BITS_PER_UNIT)
+ clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
+ /* Second base case. */
+ else if ((start + len) <= BITS_PER_UNIT)
+ {
+ unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
+ mask >>= BITS_PER_UNIT - (start + len);
+
+ ptr[0] &= ~mask;
+
+ return;
+ }
+ /* Clear most significant bits in a byte and proceed with the next byte. */
+ else if (start != 0)
+ {
+ clear_bit_region (ptr, start, BITS_PER_UNIT - start);
+ clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
+ }
+ /* Whole bytes need to be cleared. */
+ else if (start == 0 && len > BITS_PER_UNIT)
+ {
+ unsigned int nbytes = len / BITS_PER_UNIT;
+ /* We could recurse on each byte but we clear whole bytes, so a simple
+ memset will do. */
+ memset (ptr, '\0', nbytes);
+ /* Clear the remaining sub-byte region if there is one. */
+ if (len % BITS_PER_UNIT != 0)
+ clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
+ }
+ else
+ gcc_unreachable ();
+}
+
+/* Write BITLEN bits of EXPR to the byte array PTR at
+ bit position BITPOS. PTR should contain TOTAL_BYTES elements.
+ Return true if the operation succeeded. */
+
+static bool
+encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
+ unsigned int total_bytes)
+{
+ unsigned int first_byte = bitpos / BITS_PER_UNIT;
+ bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
+ || (bitpos % BITS_PER_UNIT)
+ || !int_mode_for_size (bitlen, 0).exists ());
+ bool empty_ctor_p
+ = (TREE_CODE (expr) == CONSTRUCTOR
+ && CONSTRUCTOR_NELTS (expr) == 0
+ && TYPE_SIZE_UNIT (TREE_TYPE (expr))
+ && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
+
+ if (!sub_byte_op_p)
+ {
+ if (first_byte >= total_bytes)
+ return false;
+ total_bytes -= first_byte;
+ if (empty_ctor_p)
+ {
+ unsigned HOST_WIDE_INT rhs_bytes
+ = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
+ if (rhs_bytes > total_bytes)
+ return false;
+ memset (ptr + first_byte, '\0', rhs_bytes);
+ return true;
+ }
+ return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
+ }
+
+ /* LITTLE-ENDIAN
+ We are writing a non byte-sized quantity or at a position that is not
+ at a byte boundary.
+ |--------|--------|--------| ptr + first_byte
+ ^ ^
+ xxx xxxxxxxx xxx< bp>
+ |______EXPR____|
+
+ First native_encode_expr EXPR into a temporary buffer and shift each
+ byte in the buffer by 'bp' (carrying the bits over as necessary).
+ |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
+ <------bitlen---->< bp>
+ Then we clear the destination bits:
+ |---00000|00000000|000-----| ptr + first_byte
+ <-------bitlen--->< bp>
+
+ Finally we ORR the bytes of the shifted EXPR into the cleared region:
+ |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
+
+ BIG-ENDIAN
+ We are writing a non byte-sized quantity or at a position that is not
+ at a byte boundary.
+ ptr + first_byte |--------|--------|--------|
+ ^ ^
+ <bp >xxx xxxxxxxx xxx
+ |_____EXPR_____|
+
+ First native_encode_expr EXPR into a temporary buffer and shift each
+ byte in the buffer to the right by (carrying the bits over as necessary).
+ We shift by as much as needed to align the most significant bit of EXPR
+ with bitpos:
+ |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
+ <---bitlen----> <bp ><-----bitlen----->
+ Then we clear the destination bits:
+ ptr + first_byte |-----000||00000000||00000---|
+ <bp ><-------bitlen----->
+
+ Finally we ORR the bytes of the shifted EXPR into the cleared region:
+ ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
+ The awkwardness comes from the fact that bitpos is counted from the
+ most significant bit of a byte. */
+
+ /* We must be dealing with fixed-size data at this point, since the
+ total size is also fixed. */
+ unsigned int byte_size;
+ if (empty_ctor_p)
+ {
+ unsigned HOST_WIDE_INT rhs_bytes
+ = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
+ if (rhs_bytes > total_bytes)
+ return false;
+ byte_size = rhs_bytes;
+ }
+ else
+ {
+ fixed_size_mode mode
+ = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
+ byte_size
+ = mode == BLKmode
+ ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)))
+ : GET_MODE_SIZE (mode);
+ }
+ /* Allocate an extra byte so that we have space to shift into. */
+ byte_size++;
+ unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
+ memset (tmpbuf, '\0', byte_size);
+ /* The store detection code should only have allowed constants that are
+ accepted by native_encode_expr or empty ctors. */
+ if (!empty_ctor_p
+ && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
+ gcc_unreachable ();
+
+ /* The native_encode_expr machinery uses TYPE_MODE to determine how many
+ bytes to write. This means it can write more than
+ ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
write 8 bytes for a bitlen of 40). Skip the bytes that are not within
bitlen and zero out the bits that are not relevant as well (that may
contain a sign bit due to sign-extension). */
/* Create the shifted version of EXPR. */
if (!BYTES_BIG_ENDIAN)
{
- shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
+ shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
if (shift_amnt == 0)
byte_size--;
}
else if ((*tmp)->bitpos > (*tmp2)->bitpos)
return 1;
else
- return 0;
+ /* If they are the same let's use the order which is guaranteed to
+ be different. */
+ return (*tmp)->order - (*tmp2)->order;
}
/* Sorting function for store_immediate_info objects.
{
start = info->bitpos;
width = info->bitsize;
+ bitregion_start = info->bitregion_start;
+ bitregion_end = info->bitregion_end;
/* VAL has memory allocated for it in apply_stores once the group
width has been finalized. */
val = NULL;
- align = get_object_alignment (gimple_assign_lhs (info->stmt));
+ mask = NULL;
+ bit_insertion = info->rhs_code == BIT_INSERT_EXPR;
+ string_concatenation = info->rhs_code == STRING_CST;
+ only_constants = info->rhs_code == INTEGER_CST;
+ first_nonmergeable_order = ~0U;
+ lp_nr = info->lp_nr;
+ unsigned HOST_WIDE_INT align_bitpos = 0;
+ get_object_alignment_1 (gimple_assign_lhs (info->stmt),
+ &align, &align_bitpos);
+ align_base = start - align_bitpos;
+ for (int i = 0; i < 2; ++i)
+ {
+ store_operand_info &op = info->ops[i];
+ if (op.base_addr == NULL_TREE)
+ {
+ load_align[i] = 0;
+ load_align_base[i] = 0;
+ }
+ else
+ {
+ get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
+ load_align_base[i] = op.bitpos - align_bitpos;
+ }
+ }
stores.create (1);
stores.safe_push (info);
last_stmt = info->stmt;
XDELETEVEC (val);
}
+/* Return true if the store described by INFO can be merged into the group. */
+
+bool
+merged_store_group::can_be_merged_into (store_immediate_info *info)
+{
+ /* Do not merge bswap patterns. */
+ if (info->rhs_code == LROTATE_EXPR)
+ return false;
+
+ if (info->lp_nr != lp_nr)
+ return false;
+
+ /* The canonical case. */
+ if (info->rhs_code == stores[0]->rhs_code)
+ return true;
+
+ /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST. */
+ if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
+ return !string_concatenation;
+
+ if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
+ return !string_concatenation;
+
+ /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
+ only for small regions since this can generate a lot of instructions. */
+ if (info->rhs_code == MEM_REF
+ && (stores[0]->rhs_code == INTEGER_CST
+ || stores[0]->rhs_code == BIT_INSERT_EXPR)
+ && info->bitregion_start == stores[0]->bitregion_start
+ && info->bitregion_end == stores[0]->bitregion_end
+ && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
+ return !string_concatenation;
+
+ if (stores[0]->rhs_code == MEM_REF
+ && (info->rhs_code == INTEGER_CST
+ || info->rhs_code == BIT_INSERT_EXPR)
+ && info->bitregion_start == stores[0]->bitregion_start
+ && info->bitregion_end == stores[0]->bitregion_end
+ && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
+ return !string_concatenation;
+
+ /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR. */
+ if (info->rhs_code == STRING_CST
+ && stores[0]->rhs_code == INTEGER_CST
+ && stores[0]->bitsize == CHAR_BIT)
+ return !bit_insertion;
+
+ if (stores[0]->rhs_code == STRING_CST
+ && info->rhs_code == INTEGER_CST
+ && info->bitsize == CHAR_BIT)
+ return !bit_insertion;
+
+ return false;
+}
+
+/* Helper method for merge_into and merge_overlapping to do
+ the common part. */
+
+void
+merged_store_group::do_merge (store_immediate_info *info)
+{
+ bitregion_start = MIN (bitregion_start, info->bitregion_start);
+ bitregion_end = MAX (bitregion_end, info->bitregion_end);
+
+ unsigned int this_align;
+ unsigned HOST_WIDE_INT align_bitpos = 0;
+ get_object_alignment_1 (gimple_assign_lhs (info->stmt),
+ &this_align, &align_bitpos);
+ if (this_align > align)
+ {
+ align = this_align;
+ align_base = info->bitpos - align_bitpos;
+ }
+ for (int i = 0; i < 2; ++i)
+ {
+ store_operand_info &op = info->ops[i];
+ if (!op.base_addr)
+ continue;
+
+ get_object_alignment_1 (op.val, &this_align, &align_bitpos);
+ if (this_align > load_align[i])
+ {
+ load_align[i] = this_align;
+ load_align_base[i] = op.bitpos - align_bitpos;
+ }
+ }
+
+ gimple *stmt = info->stmt;
+ stores.safe_push (info);
+ if (info->order > last_order)
+ {
+ last_order = info->order;
+ last_stmt = stmt;
+ }
+ else if (info->order < first_order)
+ {
+ first_order = info->order;
+ first_stmt = stmt;
+ }
+
+ /* We need to use extraction if there is any bit-field. */
+ if (info->rhs_code == BIT_INSERT_EXPR)
+ {
+ bit_insertion = true;
+ gcc_assert (!string_concatenation);
+ }
+
+ /* We need to use concatenation if there is any string. */
+ if (info->rhs_code == STRING_CST)
+ {
+ string_concatenation = true;
+ gcc_assert (!bit_insertion);
+ }
+
+ if (info->rhs_code != INTEGER_CST)
+ only_constants = false;
+}
+
/* Merge a store recorded by INFO into this merged store.
The store is not overlapping with the existing recorded
stores. */
void
merged_store_group::merge_into (store_immediate_info *info)
{
- unsigned HOST_WIDE_INT wid = info->bitsize;
/* Make sure we're inserting in the position we think we're inserting. */
- gcc_assert (info->bitpos == start + width);
+ gcc_assert (info->bitpos >= start + width
+ && info->bitregion_start <= bitregion_end);
- width += wid;
- gimple *stmt = info->stmt;
- stores.safe_push (info);
- if (info->order > last_order)
- {
- last_order = info->order;
- last_stmt = stmt;
- }
- else if (info->order < first_order)
- {
- first_order = info->order;
- first_stmt = stmt;
- }
+ width = info->bitpos + info->bitsize - start;
+ do_merge (info);
}
/* Merge a store described by INFO into this merged store.
void
merged_store_group::merge_overlapping (store_immediate_info *info)
{
- gimple *stmt = info->stmt;
- stores.safe_push (info);
-
/* If the store extends the size of the group, extend the width. */
- if ((info->bitpos + info->bitsize) > (start + width))
- width += info->bitpos + info->bitsize - (start + width);
+ if (info->bitpos + info->bitsize > start + width)
+ width = info->bitpos + info->bitsize - start;
- if (info->order > last_order)
- {
- last_order = info->order;
- last_stmt = stmt;
- }
- else if (info->order < first_order)
- {
- first_order = info->order;
- first_stmt = stmt;
- }
+ do_merge (info);
}
/* Go through all the recorded stores in this group in program order and
bool
merged_store_group::apply_stores ()
{
- /* The total width of the stores must add up to a whole number of bytes
- and start at a byte boundary. We don't support emitting bitfield
- references for now. Also, make sure we have more than one store
- in the group, otherwise we cannot merge anything. */
- if (width % BITS_PER_UNIT != 0
- || start % BITS_PER_UNIT != 0
+ store_immediate_info *info;
+ unsigned int i;
+
+ /* Make sure we have more than one store in the group, otherwise we cannot
+ merge anything. */
+ if (bitregion_start % BITS_PER_UNIT != 0
+ || bitregion_end % BITS_PER_UNIT != 0
|| stores.length () == 1)
return false;
+ buf_size = (bitregion_end - bitregion_start) / BITS_PER_UNIT;
+
+ /* Really do string concatenation for large strings only. */
+ if (buf_size <= MOVE_MAX)
+ string_concatenation = false;
+
+ /* Create a power-of-2-sized buffer for native_encode_expr. */
+ if (!string_concatenation)
+ buf_size = 1 << ceil_log2 (buf_size);
+
+ val = XNEWVEC (unsigned char, 2 * buf_size);
+ mask = val + buf_size;
+ memset (val, 0, buf_size);
+ memset (mask, ~0U, buf_size);
+
stores.qsort (sort_by_order);
- struct store_immediate_info *info;
- unsigned int i;
- /* Create a buffer of a size that is 2 times the number of bytes we're
- storing. That way native_encode_expr can write power-of-2-sized
- chunks without overrunning. */
- buf_size = 2 * (ROUND_UP (width, BITS_PER_UNIT) / BITS_PER_UNIT);
- val = XCNEWVEC (unsigned char, buf_size);
FOR_EACH_VEC_ELT (stores, i, info)
{
- unsigned int pos_in_buffer = info->bitpos - start;
- bool ret = encode_tree_to_bitpos (gimple_assign_rhs1 (info->stmt),
- val, info->bitsize,
- pos_in_buffer, buf_size);
- if (dump_file && (dump_flags & TDF_DETAILS))
+ unsigned int pos_in_buffer = info->bitpos - bitregion_start;
+ tree cst;
+ if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
+ cst = info->ops[0].val;
+ else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
+ cst = info->ops[1].val;
+ else
+ cst = NULL_TREE;
+ bool ret = true;
+ if (cst && info->rhs_code != BIT_INSERT_EXPR)
+ ret = encode_tree_to_bitpos (cst, val, info->bitsize, pos_in_buffer,
+ buf_size);
+ unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
+ if (BYTES_BIG_ENDIAN)
+ clear_bit_region_be (m, (BITS_PER_UNIT - 1
+ - (pos_in_buffer % BITS_PER_UNIT)),
+ info->bitsize);
+ else
+ clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
+ if (cst && dump_file && (dump_flags & TDF_DETAILS))
{
if (ret)
{
- fprintf (dump_file, "After writing ");
- print_generic_expr (dump_file,
- gimple_assign_rhs1 (info->stmt), 0);
+ fputs ("After writing ", dump_file);
+ print_generic_expr (dump_file, cst, TDF_NONE);
fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
- " at position %d the merged region contains:\n",
- info->bitsize, pos_in_buffer);
+ " at position %d\n", info->bitsize, pos_in_buffer);
+ fputs (" the merged value contains ", dump_file);
dump_char_array (dump_file, val, buf_size);
+ fputs (" the merged mask contains ", dump_file);
+ dump_char_array (dump_file, mask, buf_size);
+ if (bit_insertion)
+ fputs (" bit insertion is required\n", dump_file);
+ if (string_concatenation)
+ fputs (" string concatenation is required\n", dump_file);
}
else
fprintf (dump_file, "Failed to merge stores\n");
- }
+ }
if (!ret)
return false;
}
+ stores.qsort (sort_by_bitpos);
return true;
}
/* Structure describing the store chain. */
-struct imm_store_chain_info
+class imm_store_chain_info
{
+public:
/* Doubly-linked list that imposes an order on chain processing.
PNXP (prev's next pointer) points to the head of a list, or to
the next field in the previous chain in the list.
See pass_store_merging::m_stores_head for more rationale. */
imm_store_chain_info *next, **pnxp;
tree base_addr;
- auto_vec<struct store_immediate_info *> m_store_info;
+ auto_vec<store_immediate_info *> m_store_info;
auto_vec<merged_store_group *> m_merged_store_groups;
imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
}
}
bool terminate_and_process_chain ();
+ bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
bool coalesce_immediate_stores ();
bool output_merged_store (merged_store_group *);
bool output_merged_stores ();
{
}
- /* Pass not supported for PDP-endianness. */
+ /* Pass not supported for PDP-endian, nor for insane hosts or
+ target character sizes where native_{encode,interpret}_expr
+ doesn't work properly. */
virtual bool
gate (function *)
{
- return flag_store_merging && (WORDS_BIG_ENDIAN == BYTES_BIG_ENDIAN);
+ return flag_store_merging
+ && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
+ && CHAR_BIT == 8
+ && BITS_PER_UNIT == 8;
}
virtual unsigned int execute (function *);
private:
- hash_map<tree_operand_hash, struct imm_store_chain_info *> m_stores;
+ hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
/* Form a doubly-linked stack of the elements of m_stores, so that
we can iterate over them in a predictable way. Using this order
decisions when going out of SSA). */
imm_store_chain_info *m_stores_head;
+ bool process_store (gimple *);
+ bool terminate_and_process_chain (imm_store_chain_info *);
+ bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
bool terminate_and_process_all_chains ();
- bool terminate_all_aliasing_chains (imm_store_chain_info **,
- bool, gimple *);
- bool terminate_and_release_chain (imm_store_chain_info *);
}; // class pass_store_merging
/* Terminate and process all recorded chains. Return true if any changes
{
bool ret = false;
while (m_stores_head)
- ret |= terminate_and_release_chain (m_stores_head);
- gcc_assert (m_stores.elements () == 0);
- gcc_assert (m_stores_head == NULL);
-
+ ret |= terminate_and_process_chain (m_stores_head);
+ gcc_assert (m_stores.is_empty ());
return ret;
}
-/* Terminate all chains that are affected by the assignment to DEST, appearing
- in statement STMT and ultimately points to the object BASE. Return true if
- at least one aliasing chain was terminated. BASE and DEST are allowed to
- be NULL_TREE. In that case the aliasing checks are performed on the whole
- statement rather than a particular operand in it. VAR_OFFSET_P signifies
- whether STMT represents a store to BASE offset by a variable amount.
- If that is the case we have to terminate any chain anchored at BASE. */
+/* Terminate all chains that are affected by the statement STMT.
+ CHAIN_INFO is the chain we should ignore from the checks if
+ non-NULL. Return true if any changes were made. */
bool
pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
**chain_info,
- bool var_offset_p,
gimple *stmt)
{
bool ret = false;
if (!gimple_vuse (stmt))
return false;
- /* Check if the assignment destination (BASE) is part of a store chain.
- This is to catch non-constant stores to destinations that may be part
- of a chain. */
- if (chain_info)
- {
- /* We have a chain at BASE and we're writing to [BASE + <variable>].
- This can interfere with any of the stores so terminate
- the chain. */
- if (var_offset_p)
- {
- terminate_and_release_chain (*chain_info);
- ret = true;
- }
- /* Otherwise go through every store in the chain to see if it
- aliases with any of them. */
- else
- {
- struct store_immediate_info *info;
- unsigned int i;
- FOR_EACH_VEC_ELT ((*chain_info)->m_store_info, i, info)
- {
- if (ref_maybe_used_by_stmt_p (stmt,
- gimple_assign_lhs (info->stmt))
- || stmt_may_clobber_ref_p (stmt,
- gimple_assign_lhs (info->stmt)))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file,
- "stmt causes chain termination:\n");
- print_gimple_stmt (dump_file, stmt, 0);
- }
- terminate_and_release_chain (*chain_info);
- ret = true;
- break;
- }
- }
- }
- }
-
- /* Check for aliasing with all other store chains. */
+ tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
+ ao_ref store_lhs_ref;
+ ao_ref_init (&store_lhs_ref, store_lhs);
for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
{
next = cur->next;
/* We already checked all the stores in chain_info and terminated the
chain if necessary. Skip it here. */
- if (chain_info && (*chain_info) == cur)
+ if (chain_info && *chain_info == cur)
continue;
- /* We can't use the base object here as that does not reliably exist.
- Build a ao_ref from the base object address (if we know the
- minimum and maximum offset and the maximum size we could improve
- things here). */
- ao_ref chain_ref;
- ao_ref_init_from_ptr_and_size (&chain_ref, cur->base_addr, NULL_TREE);
- if (ref_maybe_used_by_stmt_p (stmt, &chain_ref)
- || stmt_may_clobber_ref_p_1 (stmt, &chain_ref))
+ store_immediate_info *info;
+ unsigned int i;
+ FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
{
- terminate_and_release_chain (cur);
- ret = true;
+ tree lhs = gimple_assign_lhs (info->stmt);
+ ao_ref lhs_ref;
+ ao_ref_init (&lhs_ref, lhs);
+ if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
+ || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
+ || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
+ &lhs_ref, false)))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "stmt causes chain termination:\n");
+ print_gimple_stmt (dump_file, stmt, 0);
+ }
+ ret |= terminate_and_process_chain (cur);
+ break;
+ }
}
}
entry is removed after the processing in any case. */
bool
-pass_store_merging::terminate_and_release_chain (imm_store_chain_info *chain_info)
+pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
{
bool ret = chain_info->terminate_and_process_chain ();
m_stores.remove (chain_info->base_addr);
return ret;
}
+/* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
+ may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
+ be able to sink load of REF across stores between FIRST and LAST, up
+ to right before LAST. */
+
+bool
+stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
+{
+ ao_ref r;
+ ao_ref_init (&r, ref);
+ unsigned int count = 0;
+ tree vop = gimple_vdef (last);
+ gimple *stmt;
+
+ /* Return true conservatively if the basic blocks are different. */
+ if (gimple_bb (first) != gimple_bb (last))
+ return true;
+
+ do
+ {
+ stmt = SSA_NAME_DEF_STMT (vop);
+ if (stmt_may_clobber_ref_p_1 (stmt, &r))
+ return true;
+ if (gimple_store_p (stmt)
+ && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
+ return true;
+ /* Avoid quadratic compile time by bounding the number of checks
+ we perform. */
+ if (++count > MAX_STORE_ALIAS_CHECKS)
+ return true;
+ vop = gimple_vuse (stmt);
+ }
+ while (stmt != first);
+
+ return false;
+}
+
+/* Return true if INFO->ops[IDX] is mergeable with the
+ corresponding loads already in MERGED_STORE group.
+ BASE_ADDR is the base address of the whole store group. */
+
+bool
+compatible_load_p (merged_store_group *merged_store,
+ store_immediate_info *info,
+ tree base_addr, int idx)
+{
+ store_immediate_info *infof = merged_store->stores[0];
+ if (!info->ops[idx].base_addr
+ || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
+ info->bitpos - infof->bitpos)
+ || !operand_equal_p (info->ops[idx].base_addr,
+ infof->ops[idx].base_addr, 0))
+ return false;
+
+ store_immediate_info *infol = merged_store->stores.last ();
+ tree load_vuse = gimple_vuse (info->ops[idx].stmt);
+ /* In this case all vuses should be the same, e.g.
+ _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
+ or
+ _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
+ and we can emit the coalesced load next to any of those loads. */
+ if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
+ && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
+ return true;
+
+ /* Otherwise, at least for now require that the load has the same
+ vuse as the store. See following examples. */
+ if (gimple_vuse (info->stmt) != load_vuse)
+ return false;
+
+ if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
+ || (infof != infol
+ && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
+ return false;
+
+ /* If the load is from the same location as the store, already
+ the construction of the immediate chain info guarantees no intervening
+ stores, so no further checks are needed. Example:
+ _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
+ if (known_eq (info->ops[idx].bitpos, info->bitpos)
+ && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
+ return true;
+
+ /* Otherwise, we need to punt if any of the loads can be clobbered by any
+ of the stores in the group, or any other stores in between those.
+ Previous calls to compatible_load_p ensured that for all the
+ merged_store->stores IDX loads, no stmts starting with
+ merged_store->first_stmt and ending right before merged_store->last_stmt
+ clobbers those loads. */
+ gimple *first = merged_store->first_stmt;
+ gimple *last = merged_store->last_stmt;
+ unsigned int i;
+ store_immediate_info *infoc;
+ /* The stores are sorted by increasing store bitpos, so if info->stmt store
+ comes before the so far first load, we'll be changing
+ merged_store->first_stmt. In that case we need to give up if
+ any of the earlier processed loads clobber with the stmts in the new
+ range. */
+ if (info->order < merged_store->first_order)
+ {
+ FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
+ if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
+ return false;
+ first = info->stmt;
+ }
+ /* Similarly, we could change merged_store->last_stmt, so ensure
+ in that case no stmts in the new range clobber any of the earlier
+ processed loads. */
+ else if (info->order > merged_store->last_order)
+ {
+ FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
+ if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
+ return false;
+ last = info->stmt;
+ }
+ /* And finally, we'd be adding a new load to the set, ensure it isn't
+ clobbered in the new range. */
+ if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
+ return false;
+
+ /* Otherwise, we are looking for:
+ _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
+ or
+ _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
+ return true;
+}
+
+/* Add all refs loaded to compute VAL to REFS vector. */
+
+void
+gather_bswap_load_refs (vec<tree> *refs, tree val)
+{
+ if (TREE_CODE (val) != SSA_NAME)
+ return;
+
+ gimple *stmt = SSA_NAME_DEF_STMT (val);
+ if (!is_gimple_assign (stmt))
+ return;
+
+ if (gimple_assign_load_p (stmt))
+ {
+ refs->safe_push (gimple_assign_rhs1 (stmt));
+ return;
+ }
+
+ switch (gimple_assign_rhs_class (stmt))
+ {
+ case GIMPLE_BINARY_RHS:
+ gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
+ /* FALLTHRU */
+ case GIMPLE_UNARY_RHS:
+ gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Check if there are any stores in M_STORE_INFO after index I
+ (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
+ a potential group ending with END that have their order
+ smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
+ all the stores already merged and the one under consideration
+ have rhs_code of INTEGER_CST. Return true if there are no such stores.
+ Consider:
+ MEM[(long long int *)p_28] = 0;
+ MEM[(long long int *)p_28 + 8B] = 0;
+ MEM[(long long int *)p_28 + 16B] = 0;
+ MEM[(long long int *)p_28 + 24B] = 0;
+ _129 = (int) _130;
+ MEM[(int *)p_28 + 8B] = _129;
+ MEM[(int *)p_28].a = -1;
+ We already have
+ MEM[(long long int *)p_28] = 0;
+ MEM[(int *)p_28].a = -1;
+ stmts in the current group and need to consider if it is safe to
+ add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
+ There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
+ store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
+ into the group and merging of those 3 stores is successful, merged
+ stmts will be emitted at the latest store from that group, i.e.
+ LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
+ The MEM[(int *)p_28 + 8B] = _129; store that originally follows
+ the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
+ so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
+ into the group. That way it will be its own store group and will
+ not be touched. If ALL_INTEGER_CST_P and there are overlapping
+ INTEGER_CST stores, those are mergeable using merge_overlapping,
+ so don't return false for those. */
+
+static bool
+check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
+ bool all_integer_cst_p, unsigned int last_order,
+ unsigned HOST_WIDE_INT end)
+{
+ unsigned int len = m_store_info.length ();
+ for (++i; i < len; ++i)
+ {
+ store_immediate_info *info = m_store_info[i];
+ if (info->bitpos >= end)
+ break;
+ if (info->order < last_order
+ && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
+ return false;
+ }
+ return true;
+}
+
+/* Return true if m_store_info[first] and at least one following store
+ form a group which store try_size bitsize value which is byte swapped
+ from a memory load or some value, or identity from some value.
+ This uses the bswap pass APIs. */
+
+bool
+imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
+ unsigned int first,
+ unsigned int try_size)
+{
+ unsigned int len = m_store_info.length (), last = first;
+ unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
+ if (width >= try_size)
+ return false;
+ for (unsigned int i = first + 1; i < len; ++i)
+ {
+ if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
+ || m_store_info[i]->lp_nr != merged_store->lp_nr
+ || m_store_info[i]->ins_stmt == NULL)
+ return false;
+ width += m_store_info[i]->bitsize;
+ if (width >= try_size)
+ {
+ last = i;
+ break;
+ }
+ }
+ if (width != try_size)
+ return false;
+
+ bool allow_unaligned
+ = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
+ /* Punt if the combined store would not be aligned and we need alignment. */
+ if (!allow_unaligned)
+ {
+ unsigned int align = merged_store->align;
+ unsigned HOST_WIDE_INT align_base = merged_store->align_base;
+ for (unsigned int i = first + 1; i <= last; ++i)
+ {
+ unsigned int this_align;
+ unsigned HOST_WIDE_INT align_bitpos = 0;
+ get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
+ &this_align, &align_bitpos);
+ if (this_align > align)
+ {
+ align = this_align;
+ align_base = m_store_info[i]->bitpos - align_bitpos;
+ }
+ }
+ unsigned HOST_WIDE_INT align_bitpos
+ = (m_store_info[first]->bitpos - align_base) & (align - 1);
+ if (align_bitpos)
+ align = least_bit_hwi (align_bitpos);
+ if (align < try_size)
+ return false;
+ }
+
+ tree type;
+ switch (try_size)
+ {
+ case 16: type = uint16_type_node; break;
+ case 32: type = uint32_type_node; break;
+ case 64: type = uint64_type_node; break;
+ default: gcc_unreachable ();
+ }
+ struct symbolic_number n;
+ gimple *ins_stmt = NULL;
+ int vuse_store = -1;
+ unsigned int first_order = merged_store->first_order;
+ unsigned int last_order = merged_store->last_order;
+ gimple *first_stmt = merged_store->first_stmt;
+ gimple *last_stmt = merged_store->last_stmt;
+ unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
+ store_immediate_info *infof = m_store_info[first];
+
+ for (unsigned int i = first; i <= last; ++i)
+ {
+ store_immediate_info *info = m_store_info[i];
+ struct symbolic_number this_n = info->n;
+ this_n.type = type;
+ if (!this_n.base_addr)
+ this_n.range = try_size / BITS_PER_UNIT;
+ else
+ /* Update vuse in case it has changed by output_merged_stores. */
+ this_n.vuse = gimple_vuse (info->ins_stmt);
+ unsigned int bitpos = info->bitpos - infof->bitpos;
+ if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
+ BYTES_BIG_ENDIAN
+ ? try_size - info->bitsize - bitpos
+ : bitpos))
+ return false;
+ if (this_n.base_addr && vuse_store)
+ {
+ unsigned int j;
+ for (j = first; j <= last; ++j)
+ if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
+ break;
+ if (j > last)
+ {
+ if (vuse_store == 1)
+ return false;
+ vuse_store = 0;
+ }
+ }
+ if (i == first)
+ {
+ n = this_n;
+ ins_stmt = info->ins_stmt;
+ }
+ else
+ {
+ if (n.base_addr && n.vuse != this_n.vuse)
+ {
+ if (vuse_store == 0)
+ return false;
+ vuse_store = 1;
+ }
+ if (info->order > last_order)
+ {
+ last_order = info->order;
+ last_stmt = info->stmt;
+ }
+ else if (info->order < first_order)
+ {
+ first_order = info->order;
+ first_stmt = info->stmt;
+ }
+ end = MAX (end, info->bitpos + info->bitsize);
+
+ ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
+ &this_n, &n);
+ if (ins_stmt == NULL)
+ return false;
+ }
+ }
+
+ uint64_t cmpxchg, cmpnop;
+ find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop);
+
+ /* A complete byte swap should make the symbolic number to start with
+ the largest digit in the highest order byte. Unchanged symbolic
+ number indicates a read with same endianness as target architecture. */
+ if (n.n != cmpnop && n.n != cmpxchg)
+ return false;
+
+ if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
+ return false;
+
+ if (!check_no_overlap (m_store_info, last, false, last_order, end))
+ return false;
+
+ /* Don't handle memory copy this way if normal non-bswap processing
+ would handle it too. */
+ if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
+ {
+ unsigned int i;
+ for (i = first; i <= last; ++i)
+ if (m_store_info[i]->rhs_code != MEM_REF)
+ break;
+ if (i == last + 1)
+ return false;
+ }
+
+ if (n.n == cmpxchg)
+ switch (try_size)
+ {
+ case 16:
+ /* Will emit LROTATE_EXPR. */
+ break;
+ case 32:
+ if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
+ && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
+ break;
+ return false;
+ case 64:
+ if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
+ && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
+ break;
+ return false;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (!allow_unaligned && n.base_addr)
+ {
+ unsigned int align = get_object_alignment (n.src);
+ if (align < try_size)
+ return false;
+ }
+
+ /* If each load has vuse of the corresponding store, need to verify
+ the loads can be sunk right before the last store. */
+ if (vuse_store == 1)
+ {
+ auto_vec<tree, 64> refs;
+ for (unsigned int i = first; i <= last; ++i)
+ gather_bswap_load_refs (&refs,
+ gimple_assign_rhs1 (m_store_info[i]->stmt));
+
+ unsigned int i;
+ tree ref;
+ FOR_EACH_VEC_ELT (refs, i, ref)
+ if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
+ return false;
+ n.vuse = NULL_TREE;
+ }
+
+ infof->n = n;
+ infof->ins_stmt = ins_stmt;
+ for (unsigned int i = first; i <= last; ++i)
+ {
+ m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
+ m_store_info[i]->ops[0].base_addr = NULL_TREE;
+ m_store_info[i]->ops[1].base_addr = NULL_TREE;
+ if (i != first)
+ merged_store->merge_into (m_store_info[i]);
+ }
+
+ return true;
+}
+
/* Go through the candidate stores recorded in m_store_info and merge them
into merged_store_group objects recorded into m_merged_store_groups
representing the widened stores. Return true if coalescing was successful
return false;
if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Attempting to coalesce %u stores in chain.\n",
+ fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
m_store_info.length ());
store_immediate_info *info;
- unsigned int i;
+ unsigned int i, ignore = 0;
/* Order the stores by the bitposition they write to. */
m_store_info.qsort (sort_by_bitpos);
info = m_store_info[0];
merged_store_group *merged_store = new merged_store_group (info);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fputs ("New store group\n", dump_file);
FOR_EACH_VEC_ELT (m_store_info, i, info)
{
- if (dump_file && (dump_flags & TDF_DETAILS))
+ unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
+
+ if (i <= ignore)
+ goto done;
+
+ /* First try to handle group of stores like:
+ p[0] = data >> 24;
+ p[1] = data >> 16;
+ p[2] = data >> 8;
+ p[3] = data;
+ using the bswap framework. */
+ if (info->bitpos == merged_store->start + merged_store->width
+ && merged_store->stores.length () == 1
+ && merged_store->stores[0]->ins_stmt != NULL
+ && info->lp_nr == merged_store->lp_nr
+ && info->ins_stmt != NULL)
{
- fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
- " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:\n",
- i, info->bitsize, info->bitpos);
- print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
- fprintf (dump_file, "\n------------\n");
+ unsigned int try_size;
+ for (try_size = 64; try_size >= 16; try_size >>= 1)
+ if (try_coalesce_bswap (merged_store, i - 1, try_size))
+ break;
+
+ if (try_size >= 16)
+ {
+ ignore = i + merged_store->stores.length () - 1;
+ m_merged_store_groups.safe_push (merged_store);
+ if (ignore < m_store_info.length ())
+ merged_store = new merged_store_group (m_store_info[ignore]);
+ else
+ merged_store = NULL;
+ goto done;
+ }
}
- if (i == 0)
- continue;
+ new_bitregion_start
+ = MIN (merged_store->bitregion_start, info->bitregion_start);
+ new_bitregion_end
+ = MAX (merged_store->bitregion_end, info->bitregion_end);
+
+ if (info->order >= merged_store->first_nonmergeable_order
+ || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
+ > (unsigned) param_store_merging_max_size))
+ ;
/* |---store 1---|
|---store 2---|
- Overlapping stores. */
- unsigned HOST_WIDE_INT start = info->bitpos;
- if (IN_RANGE (start, merged_store->start,
- merged_store->start + merged_store->width - 1))
+ Overlapping stores. */
+ else if (IN_RANGE (info->bitpos, merged_store->start,
+ merged_store->start + merged_store->width - 1)
+ /* |---store 1---||---store 2---|
+ Handle also the consecutive INTEGER_CST stores case here,
+ as we have here the code to deal with overlaps. */
+ || (info->bitregion_start <= merged_store->bitregion_end
+ && info->rhs_code == INTEGER_CST
+ && merged_store->only_constants
+ && merged_store->can_be_merged_into (info)))
{
- merged_store->merge_overlapping (info);
- continue;
+ /* Only allow overlapping stores of constants. */
+ if (info->rhs_code == INTEGER_CST
+ && merged_store->only_constants
+ && info->lp_nr == merged_store->lp_nr)
+ {
+ unsigned int last_order
+ = MAX (merged_store->last_order, info->order);
+ unsigned HOST_WIDE_INT end
+ = MAX (merged_store->start + merged_store->width,
+ info->bitpos + info->bitsize);
+ if (check_no_overlap (m_store_info, i, true, last_order, end))
+ {
+ /* check_no_overlap call above made sure there are no
+ overlapping stores with non-INTEGER_CST rhs_code
+ in between the first and last of the stores we've
+ just merged. If there are any INTEGER_CST rhs_code
+ stores in between, we need to merge_overlapping them
+ even if in the sort_by_bitpos order there are other
+ overlapping stores in between. Keep those stores as is.
+ Example:
+ MEM[(int *)p_28] = 0;
+ MEM[(char *)p_28 + 3B] = 1;
+ MEM[(char *)p_28 + 1B] = 2;
+ MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
+ We can't merge the zero store with the store of two and
+ not merge anything else, because the store of one is
+ in the original order in between those two, but in
+ store_by_bitpos order it comes after the last store that
+ we can't merge with them. We can merge the first 3 stores
+ and keep the last store as is though. */
+ unsigned int len = m_store_info.length ();
+ unsigned int try_order = last_order;
+ unsigned int first_nonmergeable_order;
+ unsigned int k;
+ bool last_iter = false;
+ int attempts = 0;
+ do
+ {
+ unsigned int max_order = 0;
+ unsigned first_nonmergeable_int_order = ~0U;
+ unsigned HOST_WIDE_INT this_end = end;
+ k = i;
+ first_nonmergeable_order = ~0U;
+ for (unsigned int j = i + 1; j < len; ++j)
+ {
+ store_immediate_info *info2 = m_store_info[j];
+ if (info2->bitpos >= this_end)
+ break;
+ if (info2->order < try_order)
+ {
+ if (info2->rhs_code != INTEGER_CST
+ || info2->lp_nr != merged_store->lp_nr)
+ {
+ /* Normally check_no_overlap makes sure this
+ doesn't happen, but if end grows below,
+ then we need to process more stores than
+ check_no_overlap verified. Example:
+ MEM[(int *)p_5] = 0;
+ MEM[(short *)p_5 + 3B] = 1;
+ MEM[(char *)p_5 + 4B] = _9;
+ MEM[(char *)p_5 + 2B] = 2; */
+ k = 0;
+ break;
+ }
+ k = j;
+ this_end = MAX (this_end,
+ info2->bitpos + info2->bitsize);
+ }
+ else if (info2->rhs_code == INTEGER_CST
+ && info2->lp_nr == merged_store->lp_nr
+ && !last_iter)
+ {
+ max_order = MAX (max_order, info2->order + 1);
+ first_nonmergeable_int_order
+ = MIN (first_nonmergeable_int_order,
+ info2->order);
+ }
+ else
+ first_nonmergeable_order
+ = MIN (first_nonmergeable_order, info2->order);
+ }
+ if (k == 0)
+ {
+ if (last_order == try_order)
+ break;
+ /* If this failed, but only because we grew
+ try_order, retry with the last working one,
+ so that we merge at least something. */
+ try_order = last_order;
+ last_iter = true;
+ continue;
+ }
+ last_order = try_order;
+ /* Retry with a larger try_order to see if we could
+ merge some further INTEGER_CST stores. */
+ if (max_order
+ && (first_nonmergeable_int_order
+ < first_nonmergeable_order))
+ {
+ try_order = MIN (max_order,
+ first_nonmergeable_order);
+ try_order
+ = MIN (try_order,
+ merged_store->first_nonmergeable_order);
+ if (try_order > last_order && ++attempts < 16)
+ continue;
+ }
+ first_nonmergeable_order
+ = MIN (first_nonmergeable_order,
+ first_nonmergeable_int_order);
+ end = this_end;
+ break;
+ }
+ while (1);
+
+ if (k != 0)
+ {
+ merged_store->merge_overlapping (info);
+
+ merged_store->first_nonmergeable_order
+ = MIN (merged_store->first_nonmergeable_order,
+ first_nonmergeable_order);
+
+ for (unsigned int j = i + 1; j <= k; j++)
+ {
+ store_immediate_info *info2 = m_store_info[j];
+ gcc_assert (info2->bitpos < end);
+ if (info2->order < last_order)
+ {
+ gcc_assert (info2->rhs_code == INTEGER_CST);
+ if (info != info2)
+ merged_store->merge_overlapping (info2);
+ }
+ /* Other stores are kept and not merged in any
+ way. */
+ }
+ ignore = k;
+ goto done;
+ }
+ }
+ }
+ }
+ /* |---store 1---||---store 2---|
+ This store is consecutive to the previous one.
+ Merge it into the current store group. There can be gaps in between
+ the stores, but there can't be gaps in between bitregions. */
+ else if (info->bitregion_start <= merged_store->bitregion_end
+ && merged_store->can_be_merged_into (info))
+ {
+ store_immediate_info *infof = merged_store->stores[0];
+
+ /* All the rhs_code ops that take 2 operands are commutative,
+ swap the operands if it could make the operands compatible. */
+ if (infof->ops[0].base_addr
+ && infof->ops[1].base_addr
+ && info->ops[0].base_addr
+ && info->ops[1].base_addr
+ && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
+ info->bitpos - infof->bitpos)
+ && operand_equal_p (info->ops[1].base_addr,
+ infof->ops[0].base_addr, 0))
+ {
+ std::swap (info->ops[0], info->ops[1]);
+ info->ops_swapped_p = true;
+ }
+ if (check_no_overlap (m_store_info, i, false,
+ MAX (merged_store->last_order, info->order),
+ MAX (merged_store->start + merged_store->width,
+ info->bitpos + info->bitsize)))
+ {
+ /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
+ if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
+ {
+ info->rhs_code = BIT_INSERT_EXPR;
+ info->ops[0].val = gimple_assign_rhs1 (info->stmt);
+ info->ops[0].base_addr = NULL_TREE;
+ }
+ else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
+ {
+ store_immediate_info *infoj;
+ unsigned int j;
+ FOR_EACH_VEC_ELT (merged_store->stores, j, infoj)
+ {
+ infoj->rhs_code = BIT_INSERT_EXPR;
+ infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
+ infoj->ops[0].base_addr = NULL_TREE;
+ }
+ merged_store->bit_insertion = true;
+ }
+ if ((infof->ops[0].base_addr
+ ? compatible_load_p (merged_store, info, base_addr, 0)
+ : !info->ops[0].base_addr)
+ && (infof->ops[1].base_addr
+ ? compatible_load_p (merged_store, info, base_addr, 1)
+ : !info->ops[1].base_addr))
+ {
+ merged_store->merge_into (info);
+ goto done;
+ }
+ }
}
/* |---store 1---| <gap> |---store 2---|.
- Gap between stores. Start a new group. */
- if (start != merged_store->start + merged_store->width)
- {
- /* Try to apply all the stores recorded for the group to determine
- the bitpattern they write and discard it if that fails.
- This will also reject single-store groups. */
- if (!merged_store->apply_stores ())
- delete merged_store;
- else
- m_merged_store_groups.safe_push (merged_store);
+ Gap between stores or the rhs not compatible. Start a new group. */
- merged_store = new merged_store_group (info);
+ /* Try to apply all the stores recorded for the group to determine
+ the bitpattern they write and discard it if that fails.
+ This will also reject single-store groups. */
+ if (merged_store->apply_stores ())
+ m_merged_store_groups.safe_push (merged_store);
+ else
+ delete merged_store;
- continue;
+ merged_store = new merged_store_group (info);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fputs ("New store group\n", dump_file);
+
+ done:
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
+ " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
+ i, info->bitsize, info->bitpos);
+ print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
+ fputc ('\n', dump_file);
}
-
- /* |---store 1---||---store 2---|
- This store is consecutive to the previous one.
- Merge it into the current store group. */
- merged_store->merge_into (info);
}
- /* Record or discard the last store group. */
- if (!merged_store->apply_stores ())
- delete merged_store;
- else
- m_merged_store_groups.safe_push (merged_store);
+ /* Record or discard the last store group. */
+ if (merged_store)
+ {
+ if (merged_store->apply_stores ())
+ m_merged_store_groups.safe_push (merged_store);
+ else
+ delete merged_store;
+ }
gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
+
bool success
= !m_merged_store_groups.is_empty ()
&& m_merged_store_groups.length () < m_store_info.length ();
if (success && dump_file)
- fprintf (dump_file, "Coalescing successful!\n"
- "Merged into %u stores\n",
- m_merged_store_groups.length ());
+ fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
+ m_merged_store_groups.length ());
return success;
}
-/* Return the type to use for the merged stores described by STMTS.
- This is needed to get the alias sets right. */
+/* Return the type to use for the merged stores or loads described by STMTS.
+ This is needed to get the alias sets right. If IS_LOAD, look for rhs,
+ otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
+ of the MEM_REFs if any. */
static tree
-get_alias_type_for_stmts (auto_vec<gimple *> &stmts)
+get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
+ unsigned short *cliquep, unsigned short *basep)
{
gimple *stmt;
unsigned int i;
- tree lhs = gimple_assign_lhs (stmts[0]);
- tree type = reference_alias_ptr_type (lhs);
+ tree type = NULL_TREE;
+ tree ret = NULL_TREE;
+ *cliquep = 0;
+ *basep = 0;
FOR_EACH_VEC_ELT (stmts, i, stmt)
{
- if (i == 0)
- continue;
+ tree ref = is_load ? gimple_assign_rhs1 (stmt)
+ : gimple_assign_lhs (stmt);
+ tree type1 = reference_alias_ptr_type (ref);
+ tree base = get_base_address (ref);
- lhs = gimple_assign_lhs (stmt);
- tree type1 = reference_alias_ptr_type (lhs);
+ if (i == 0)
+ {
+ if (TREE_CODE (base) == MEM_REF)
+ {
+ *cliquep = MR_DEPENDENCE_CLIQUE (base);
+ *basep = MR_DEPENDENCE_BASE (base);
+ }
+ ret = type = type1;
+ continue;
+ }
if (!alias_ptr_types_compatible_p (type, type1))
- return ptr_type_node;
+ ret = ptr_type_node;
+ if (TREE_CODE (base) != MEM_REF
+ || *cliquep != MR_DEPENDENCE_CLIQUE (base)
+ || *basep != MR_DEPENDENCE_BASE (base))
+ {
+ *cliquep = 0;
+ *basep = 0;
+ }
}
- return type;
+ return ret;
}
/* Return the location_t information we can find among the statements
in STMTS. */
static location_t
-get_location_for_stmts (auto_vec<gimple *> &stmts)
+get_location_for_stmts (vec<gimple *> &stmts)
{
gimple *stmt;
unsigned int i;
/* Used to decribe a store resulting from splitting a wide store in smaller
regularly-sized stores in split_group. */
-struct split_store
+class split_store
{
+public:
unsigned HOST_WIDE_INT bytepos;
unsigned HOST_WIDE_INT size;
unsigned HOST_WIDE_INT align;
- auto_vec<gimple *> orig_stmts;
+ auto_vec<store_immediate_info *> orig_stores;
+ /* True if there is a single orig stmt covering the whole split store. */
+ bool orig;
split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT);
};
split_store::split_store (unsigned HOST_WIDE_INT bp,
unsigned HOST_WIDE_INT sz,
unsigned HOST_WIDE_INT al)
- : bytepos (bp), size (sz), align (al)
+ : bytepos (bp), size (sz), align (al), orig (false)
{
- orig_stmts.create (0);
+ orig_stores.create (0);
}
-/* Record all statements corresponding to stores in GROUP that write to
- the region starting at BITPOS and is of size BITSIZE. Record such
- statements in STMTS. The stores in GROUP must be sorted by
- bitposition. */
+/* Record all stores in GROUP that write to the region starting at BITPOS and
+ is of size BITSIZE. Record infos for such statements in STORES if
+ non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
+ if there is exactly one original store in the range (in that case ignore
+ clobber stmts, unless there are only clobber stmts). */
-static void
-find_constituent_stmts (struct merged_store_group *group,
- auto_vec<gimple *> &stmts,
+static store_immediate_info *
+find_constituent_stores (class merged_store_group *group,
+ vec<store_immediate_info *> *stores,
+ unsigned int *first,
unsigned HOST_WIDE_INT bitpos,
unsigned HOST_WIDE_INT bitsize)
{
- struct store_immediate_info *info;
+ store_immediate_info *info, *ret = NULL;
unsigned int i;
+ bool second = false;
+ bool update_first = true;
unsigned HOST_WIDE_INT end = bitpos + bitsize;
- FOR_EACH_VEC_ELT (group->stores, i, info)
+ for (i = *first; group->stores.iterate (i, &info); ++i)
{
unsigned HOST_WIDE_INT stmt_start = info->bitpos;
unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
- if (stmt_end < bitpos)
- continue;
+ if (stmt_end <= bitpos)
+ {
+ /* BITPOS passed to this function never decreases from within the
+ same split_group call, so optimize and don't scan info records
+ which are known to end before or at BITPOS next time.
+ Only do it if all stores before this one also pass this. */
+ if (update_first)
+ *first = i + 1;
+ continue;
+ }
+ else
+ update_first = false;
+
/* The stores in GROUP are ordered by bitposition so if we're past
- the region for this group return early. */
- if (stmt_start > end)
- return;
-
- if (IN_RANGE (stmt_start, bitpos, bitpos + bitsize)
- || IN_RANGE (stmt_end, bitpos, end)
- /* The statement writes a region that completely encloses the region
- that this group writes. Unlikely to occur but let's
- handle it. */
- || IN_RANGE (bitpos, stmt_start, stmt_end))
- stmts.safe_push (info->stmt);
+ the region for this group return early. */
+ if (stmt_start >= end)
+ return ret;
+
+ if (gimple_clobber_p (info->stmt))
+ {
+ if (stores)
+ stores->safe_push (info);
+ if (ret == NULL)
+ ret = info;
+ continue;
+ }
+ if (stores)
+ {
+ stores->safe_push (info);
+ if (ret && !gimple_clobber_p (ret->stmt))
+ {
+ ret = NULL;
+ second = true;
+ }
+ }
+ else if (ret && !gimple_clobber_p (ret->stmt))
+ return NULL;
+ if (!second)
+ ret = info;
+ }
+ return ret;
+}
+
+/* Return how many SSA_NAMEs used to compute value to store in the INFO
+ store have multiple uses. If any SSA_NAME has multiple uses, also
+ count statements needed to compute it. */
+
+static unsigned
+count_multiple_uses (store_immediate_info *info)
+{
+ gimple *stmt = info->stmt;
+ unsigned ret = 0;
+ switch (info->rhs_code)
+ {
+ case INTEGER_CST:
+ case STRING_CST:
+ return 0;
+ case BIT_AND_EXPR:
+ case BIT_IOR_EXPR:
+ case BIT_XOR_EXPR:
+ if (info->bit_not_p)
+ {
+ if (!has_single_use (gimple_assign_rhs1 (stmt)))
+ ret = 1; /* Fall through below to return
+ the BIT_NOT_EXPR stmt and then
+ BIT_{AND,IOR,XOR}_EXPR and anything it
+ uses. */
+ else
+ /* stmt is after this the BIT_NOT_EXPR. */
+ stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
+ }
+ if (!has_single_use (gimple_assign_rhs1 (stmt)))
+ {
+ ret += 1 + info->ops[0].bit_not_p;
+ if (info->ops[1].base_addr)
+ ret += 1 + info->ops[1].bit_not_p;
+ return ret + 1;
+ }
+ stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
+ /* stmt is now the BIT_*_EXPR. */
+ if (!has_single_use (gimple_assign_rhs1 (stmt)))
+ ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
+ else if (info->ops[info->ops_swapped_p].bit_not_p)
+ {
+ gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
+ if (!has_single_use (gimple_assign_rhs1 (stmt2)))
+ ++ret;
+ }
+ if (info->ops[1].base_addr == NULL_TREE)
+ {
+ gcc_checking_assert (!info->ops_swapped_p);
+ return ret;
+ }
+ if (!has_single_use (gimple_assign_rhs2 (stmt)))
+ ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
+ else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
+ {
+ gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
+ if (!has_single_use (gimple_assign_rhs1 (stmt2)))
+ ++ret;
+ }
+ return ret;
+ case MEM_REF:
+ if (!has_single_use (gimple_assign_rhs1 (stmt)))
+ return 1 + info->ops[0].bit_not_p;
+ else if (info->ops[0].bit_not_p)
+ {
+ stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
+ if (!has_single_use (gimple_assign_rhs1 (stmt)))
+ return 1;
+ }
+ return 0;
+ case BIT_INSERT_EXPR:
+ return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
+ default:
+ gcc_unreachable ();
}
}
/* Split a merged store described by GROUP by populating the SPLIT_STORES
- vector with split_store structs describing the byte offset (from the base),
- the bit size and alignment of each store as well as the original statements
- involved in each such split group.
+ vector (if non-NULL) with split_store structs describing the byte offset
+ (from the base), the bit size and alignment of each store as well as the
+ original statements involved in each such split group.
This is to separate the splitting strategy from the statement
building/emission/linking done in output_merged_store.
- At the moment just start with the widest possible size and keep emitting
- the widest we can until we have emitted all the bytes, halving the size
- when appropriate. */
-
-static bool
-split_group (merged_store_group *group,
- auto_vec<struct split_store *> &split_stores)
+ Return number of new stores.
+ If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
+ If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
+ BZERO_FIRST may be true only when the first store covers the whole group
+ and clears it; if BZERO_FIRST is true, keep that first store in the set
+ unmodified and emit further stores for the overrides only.
+ If SPLIT_STORES is NULL, it is just a dry run to count number of
+ new stores. */
+
+static unsigned int
+split_group (merged_store_group *group, bool allow_unaligned_store,
+ bool allow_unaligned_load, bool bzero_first,
+ vec<split_store *> *split_stores,
+ unsigned *total_orig,
+ unsigned *total_new)
{
- unsigned HOST_WIDE_INT pos = group->start;
- unsigned HOST_WIDE_INT size = group->width;
+ unsigned HOST_WIDE_INT pos = group->bitregion_start;
+ unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
- unsigned HOST_WIDE_INT align = group->align;
+ unsigned HOST_WIDE_INT group_align = group->align;
+ unsigned HOST_WIDE_INT align_base = group->align_base;
+ unsigned HOST_WIDE_INT group_load_align = group_align;
+ bool any_orig = false;
- /* We don't handle partial bitfields for now. We shouldn't have
- reached this far. */
gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
- bool allow_unaligned
- = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
-
- unsigned int try_size = MAX_STORE_BITSIZE;
- while (try_size > size
- || (!allow_unaligned
- && try_size > align))
+ /* For bswap framework using sets of stores, all the checking has been done
+ earlier in try_coalesce_bswap and the result always needs to be emitted
+ as a single store. Likewise for string concatenation, */
+ if (group->stores[0]->rhs_code == LROTATE_EXPR
+ || group->stores[0]->rhs_code == NOP_EXPR
+ || group->string_concatenation)
{
- try_size /= 2;
- if (try_size < BITS_PER_UNIT)
- return false;
+ gcc_assert (!bzero_first);
+ if (total_orig)
+ {
+ /* Avoid the old/new stmt count heuristics. It should be
+ always beneficial. */
+ total_new[0] = 1;
+ total_orig[0] = 2;
+ }
+
+ if (split_stores)
+ {
+ unsigned HOST_WIDE_INT align_bitpos
+ = (group->start - align_base) & (group_align - 1);
+ unsigned HOST_WIDE_INT align = group_align;
+ if (align_bitpos)
+ align = least_bit_hwi (align_bitpos);
+ bytepos = group->start / BITS_PER_UNIT;
+ split_store *store
+ = new split_store (bytepos, group->width, align);
+ unsigned int first = 0;
+ find_constituent_stores (group, &store->orig_stores,
+ &first, group->start, group->width);
+ split_stores->safe_push (store);
+ }
+
+ return 1;
}
+ unsigned int ret = 0, first = 0;
unsigned HOST_WIDE_INT try_pos = bytepos;
- group->stores.qsort (sort_by_bitpos);
+
+ if (total_orig)
+ {
+ unsigned int i;
+ store_immediate_info *info = group->stores[0];
+
+ total_new[0] = 0;
+ total_orig[0] = 1; /* The orig store. */
+ info = group->stores[0];
+ if (info->ops[0].base_addr)
+ total_orig[0]++;
+ if (info->ops[1].base_addr)
+ total_orig[0]++;
+ switch (info->rhs_code)
+ {
+ case BIT_AND_EXPR:
+ case BIT_IOR_EXPR:
+ case BIT_XOR_EXPR:
+ total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
+ break;
+ default:
+ break;
+ }
+ total_orig[0] *= group->stores.length ();
+
+ FOR_EACH_VEC_ELT (group->stores, i, info)
+ {
+ total_new[0] += count_multiple_uses (info);
+ total_orig[0] += (info->bit_not_p
+ + info->ops[0].bit_not_p
+ + info->ops[1].bit_not_p);
+ }
+ }
+
+ if (!allow_unaligned_load)
+ for (int i = 0; i < 2; ++i)
+ if (group->load_align[i])
+ group_load_align = MIN (group_load_align, group->load_align[i]);
+
+ if (bzero_first)
+ {
+ store_immediate_info *gstore;
+ FOR_EACH_VEC_ELT (group->stores, first, gstore)
+ if (!gimple_clobber_p (gstore->stmt))
+ break;
+ ++first;
+ ret = 1;
+ if (split_stores)
+ {
+ split_store *store
+ = new split_store (bytepos, gstore->bitsize, align_base);
+ store->orig_stores.safe_push (gstore);
+ store->orig = true;
+ any_orig = true;
+ split_stores->safe_push (store);
+ }
+ }
while (size > 0)
{
- struct split_store *store = new split_store (try_pos, try_size, align);
+ if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
+ && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
+ || (bzero_first && group->val[try_pos - bytepos] == 0)))
+ {
+ /* Skip padding bytes. */
+ ++try_pos;
+ size -= BITS_PER_UNIT;
+ continue;
+ }
+
unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
- find_constituent_stmts (group, store->orig_stmts, try_bitpos, try_size);
- split_stores.safe_push (store);
+ unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
+ unsigned HOST_WIDE_INT align_bitpos
+ = (try_bitpos - align_base) & (group_align - 1);
+ unsigned HOST_WIDE_INT align = group_align;
+ bool found_orig = false;
+ if (align_bitpos)
+ align = least_bit_hwi (align_bitpos);
+ if (!allow_unaligned_store)
+ try_size = MIN (try_size, align);
+ if (!allow_unaligned_load)
+ {
+ /* If we can't do or don't want to do unaligned stores
+ as well as loads, we need to take the loads into account
+ as well. */
+ unsigned HOST_WIDE_INT load_align = group_load_align;
+ align_bitpos = (try_bitpos - align_base) & (load_align - 1);
+ if (align_bitpos)
+ load_align = least_bit_hwi (align_bitpos);
+ for (int i = 0; i < 2; ++i)
+ if (group->load_align[i])
+ {
+ align_bitpos
+ = known_alignment (try_bitpos
+ - group->stores[0]->bitpos
+ + group->stores[0]->ops[i].bitpos
+ - group->load_align_base[i]);
+ if (align_bitpos & (group_load_align - 1))
+ {
+ unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
+ load_align = MIN (load_align, a);
+ }
+ }
+ try_size = MIN (try_size, load_align);
+ }
+ store_immediate_info *info
+ = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
+ if (info && !gimple_clobber_p (info->stmt))
+ {
+ /* If there is just one original statement for the range, see if
+ we can just reuse the original store which could be even larger
+ than try_size. */
+ unsigned HOST_WIDE_INT stmt_end
+ = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
+ info = find_constituent_stores (group, NULL, &first, try_bitpos,
+ stmt_end - try_bitpos);
+ if (info && info->bitpos >= try_bitpos)
+ {
+ store_immediate_info *info2 = NULL;
+ unsigned int first_copy = first;
+ if (info->bitpos > try_bitpos
+ && stmt_end - try_bitpos <= try_size)
+ {
+ info2 = find_constituent_stores (group, NULL, &first_copy,
+ try_bitpos,
+ info->bitpos - try_bitpos);
+ gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
+ }
+ if (info2 == NULL && stmt_end - try_bitpos < try_size)
+ {
+ info2 = find_constituent_stores (group, NULL, &first_copy,
+ stmt_end,
+ (try_bitpos + try_size)
+ - stmt_end);
+ gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
+ }
+ if (info2 == NULL)
+ {
+ try_size = stmt_end - try_bitpos;
+ found_orig = true;
+ goto found;
+ }
+ }
+ }
- try_pos += try_size / BITS_PER_UNIT;
+ /* Approximate store bitsize for the case when there are no padding
+ bits. */
+ while (try_size > size)
+ try_size /= 2;
+ /* Now look for whole padding bytes at the end of that bitsize. */
+ for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
+ if (group->mask[try_pos - bytepos + nonmasked - 1]
+ != (unsigned char) ~0U
+ && (!bzero_first
+ || group->val[try_pos - bytepos + nonmasked - 1] != 0))
+ break;
+ if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
+ {
+ /* If entire try_size range is padding, skip it. */
+ try_pos += try_size / BITS_PER_UNIT;
+ size -= try_size;
+ continue;
+ }
+ /* Otherwise try to decrease try_size if second half, last 3 quarters
+ etc. are padding. */
+ nonmasked *= BITS_PER_UNIT;
+ while (nonmasked <= try_size / 2)
+ try_size /= 2;
+ if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
+ {
+ /* Now look for whole padding bytes at the start of that bitsize. */
+ unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
+ for (masked = 0; masked < try_bytesize; ++masked)
+ if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
+ && (!bzero_first
+ || group->val[try_pos - bytepos + masked] != 0))
+ break;
+ masked *= BITS_PER_UNIT;
+ gcc_assert (masked < try_size);
+ if (masked >= try_size / 2)
+ {
+ while (masked >= try_size / 2)
+ {
+ try_size /= 2;
+ try_pos += try_size / BITS_PER_UNIT;
+ size -= try_size;
+ masked -= try_size;
+ }
+ /* Need to recompute the alignment, so just retry at the new
+ position. */
+ continue;
+ }
+ }
+
+ found:
+ ++ret;
+
+ if (split_stores)
+ {
+ split_store *store
+ = new split_store (try_pos, try_size, align);
+ info = find_constituent_stores (group, &store->orig_stores,
+ &first, try_bitpos, try_size);
+ if (info
+ && !gimple_clobber_p (info->stmt)
+ && info->bitpos >= try_bitpos
+ && info->bitpos + info->bitsize <= try_bitpos + try_size
+ && (store->orig_stores.length () == 1
+ || found_orig
+ || (info->bitpos == try_bitpos
+ && (info->bitpos + info->bitsize
+ == try_bitpos + try_size))))
+ {
+ store->orig = true;
+ any_orig = true;
+ }
+ split_stores->safe_push (store);
+ }
+ try_pos += try_size / BITS_PER_UNIT;
size -= try_size;
- align = try_size;
- while (size < try_size)
- try_size /= 2;
}
- return true;
+
+ if (total_orig)
+ {
+ unsigned int i;
+ split_store *store;
+ /* If we are reusing some original stores and any of the
+ original SSA_NAMEs had multiple uses, we need to subtract
+ those now before we add the new ones. */
+ if (total_new[0] && any_orig)
+ {
+ FOR_EACH_VEC_ELT (*split_stores, i, store)
+ if (store->orig)
+ total_new[0] -= count_multiple_uses (store->orig_stores[0]);
+ }
+ total_new[0] += ret; /* The new store. */
+ store_immediate_info *info = group->stores[0];
+ if (info->ops[0].base_addr)
+ total_new[0] += ret;
+ if (info->ops[1].base_addr)
+ total_new[0] += ret;
+ switch (info->rhs_code)
+ {
+ case BIT_AND_EXPR:
+ case BIT_IOR_EXPR:
+ case BIT_XOR_EXPR:
+ total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
+ break;
+ default:
+ break;
+ }
+ FOR_EACH_VEC_ELT (*split_stores, i, store)
+ {
+ unsigned int j;
+ bool bit_not_p[3] = { false, false, false };
+ /* If all orig_stores have certain bit_not_p set, then
+ we'd use a BIT_NOT_EXPR stmt and need to account for it.
+ If some orig_stores have certain bit_not_p set, then
+ we'd use a BIT_XOR_EXPR with a mask and need to account for
+ it. */
+ FOR_EACH_VEC_ELT (store->orig_stores, j, info)
+ {
+ if (info->ops[0].bit_not_p)
+ bit_not_p[0] = true;
+ if (info->ops[1].bit_not_p)
+ bit_not_p[1] = true;
+ if (info->bit_not_p)
+ bit_not_p[2] = true;
+ }
+ total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
+ }
+
+ }
+
+ return ret;
+}
+
+/* Return the operation through which the operand IDX (if < 2) or
+ result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
+ is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
+ the bits should be xored with mask. */
+
+static enum tree_code
+invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
+{
+ unsigned int i;
+ store_immediate_info *info;
+ unsigned int cnt = 0;
+ bool any_paddings = false;
+ FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
+ {
+ bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
+ if (bit_not_p)
+ {
+ ++cnt;
+ tree lhs = gimple_assign_lhs (info->stmt);
+ if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
+ && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
+ any_paddings = true;
+ }
+ }
+ mask = NULL_TREE;
+ if (cnt == 0)
+ return NOP_EXPR;
+ if (cnt == split_store->orig_stores.length () && !any_paddings)
+ return BIT_NOT_EXPR;
+
+ unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
+ unsigned buf_size = split_store->size / BITS_PER_UNIT;
+ unsigned char *buf
+ = XALLOCAVEC (unsigned char, buf_size);
+ memset (buf, ~0U, buf_size);
+ FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
+ {
+ bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
+ if (!bit_not_p)
+ continue;
+ /* Clear regions with bit_not_p and invert afterwards, rather than
+ clear regions with !bit_not_p, so that gaps in between stores aren't
+ set in the mask. */
+ unsigned HOST_WIDE_INT bitsize = info->bitsize;
+ unsigned HOST_WIDE_INT prec = bitsize;
+ unsigned int pos_in_buffer = 0;
+ if (any_paddings)
+ {
+ tree lhs = gimple_assign_lhs (info->stmt);
+ if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
+ && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
+ prec = TYPE_PRECISION (TREE_TYPE (lhs));
+ }
+ if (info->bitpos < try_bitpos)
+ {
+ gcc_assert (info->bitpos + bitsize > try_bitpos);
+ if (!BYTES_BIG_ENDIAN)
+ {
+ if (prec <= try_bitpos - info->bitpos)
+ continue;
+ prec -= try_bitpos - info->bitpos;
+ }
+ bitsize -= try_bitpos - info->bitpos;
+ if (BYTES_BIG_ENDIAN && prec > bitsize)
+ prec = bitsize;
+ }
+ else
+ pos_in_buffer = info->bitpos - try_bitpos;
+ if (prec < bitsize)
+ {
+ /* If this is a bool inversion, invert just the least significant
+ prec bits rather than all bits of it. */
+ if (BYTES_BIG_ENDIAN)
+ {
+ pos_in_buffer += bitsize - prec;
+ if (pos_in_buffer >= split_store->size)
+ continue;
+ }
+ bitsize = prec;
+ }
+ if (pos_in_buffer + bitsize > split_store->size)
+ bitsize = split_store->size - pos_in_buffer;
+ unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
+ if (BYTES_BIG_ENDIAN)
+ clear_bit_region_be (p, (BITS_PER_UNIT - 1
+ - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
+ else
+ clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
+ }
+ for (unsigned int i = 0; i < buf_size; ++i)
+ buf[i] = ~buf[i];
+ mask = native_interpret_expr (int_type, buf, buf_size);
+ return BIT_XOR_EXPR;
}
/* Given a merged store group GROUP output the widened version of it.
bool
imm_store_chain_info::output_merged_store (merged_store_group *group)
{
- unsigned HOST_WIDE_INT start_byte_pos = group->start / BITS_PER_UNIT;
-
+ const unsigned HOST_WIDE_INT start_byte_pos
+ = group->bitregion_start / BITS_PER_UNIT;
unsigned int orig_num_stmts = group->stores.length ();
if (orig_num_stmts < 2)
return false;
- auto_vec<struct split_store *> split_stores;
- split_stores.create (0);
- if (!split_group (group, split_stores))
- return false;
+ bool allow_unaligned_store
+ = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
+ bool allow_unaligned_load = allow_unaligned_store;
+ bool bzero_first = false;
+ store_immediate_info *store;
+ unsigned int num_clobber_stmts = 0;
+ if (group->stores[0]->rhs_code == INTEGER_CST)
+ {
+ unsigned int i;
+ FOR_EACH_VEC_ELT (group->stores, i, store)
+ if (gimple_clobber_p (store->stmt))
+ num_clobber_stmts++;
+ else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
+ && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
+ && group->start == store->bitpos
+ && group->width == store->bitsize
+ && (group->start % BITS_PER_UNIT) == 0
+ && (group->width % BITS_PER_UNIT) == 0)
+ {
+ bzero_first = true;
+ break;
+ }
+ else
+ break;
+ FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
+ if (gimple_clobber_p (store->stmt))
+ num_clobber_stmts++;
+ if (num_clobber_stmts == orig_num_stmts)
+ return false;
+ orig_num_stmts -= num_clobber_stmts;
+ }
+ if (allow_unaligned_store || bzero_first)
+ {
+ /* If unaligned stores are allowed, see how many stores we'd emit
+ for unaligned and how many stores we'd emit for aligned stores.
+ Only use unaligned stores if it allows fewer stores than aligned.
+ Similarly, if there is a whole region clear first, prefer expanding
+ it together compared to expanding clear first followed by merged
+ further stores. */
+ unsigned cnt[4] = { ~0, ~0, ~0, ~0 };
+ int pass_min = 0;
+ for (int pass = 0; pass < 4; ++pass)
+ {
+ if (!allow_unaligned_store && (pass & 1) != 0)
+ continue;
+ if (!bzero_first && (pass & 2) != 0)
+ continue;
+ cnt[pass] = split_group (group, (pass & 1) != 0,
+ allow_unaligned_load, (pass & 2) != 0,
+ NULL, NULL, NULL);
+ if (cnt[pass] < cnt[pass_min])
+ pass_min = pass;
+ }
+ if ((pass_min & 1) == 0)
+ allow_unaligned_store = false;
+ if ((pass_min & 2) == 0)
+ bzero_first = false;
+ }
+
+ auto_vec<class split_store *, 32> split_stores;
+ split_store *split_store;
+ unsigned total_orig, total_new, i;
+ split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
+ &split_stores, &total_orig, &total_new);
+
+ /* Determine if there is a clobber covering the whole group at the start,
+ followed by proposed split stores that cover the whole group. In that
+ case, prefer the transformation even if
+ split_stores.length () == orig_num_stmts. */
+ bool clobber_first = false;
+ if (num_clobber_stmts
+ && gimple_clobber_p (group->stores[0]->stmt)
+ && group->start == group->stores[0]->bitpos
+ && group->width == group->stores[0]->bitsize
+ && (group->start % BITS_PER_UNIT) == 0
+ && (group->width % BITS_PER_UNIT) == 0)
+ {
+ clobber_first = true;
+ unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
+ FOR_EACH_VEC_ELT (split_stores, i, split_store)
+ if (split_store->bytepos != pos)
+ {
+ clobber_first = false;
+ break;
+ }
+ else
+ pos += split_store->size / BITS_PER_UNIT;
+ if (pos != (group->start + group->width) / BITS_PER_UNIT)
+ clobber_first = false;
+ }
+
+ if (split_stores.length () >= orig_num_stmts + clobber_first)
+ {
+
+ /* We didn't manage to reduce the number of statements. Bail out. */
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Exceeded original number of stmts (%u)."
+ " Not profitable to emit new sequence.\n",
+ orig_num_stmts);
+ FOR_EACH_VEC_ELT (split_stores, i, split_store)
+ delete split_store;
+ return false;
+ }
+ if (total_orig <= total_new)
+ {
+ /* If number of estimated new statements is above estimated original
+ statements, bail out too. */
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Estimated number of original stmts (%u)"
+ " not larger than estimated number of new"
+ " stmts (%u).\n",
+ total_orig, total_new);
+ FOR_EACH_VEC_ELT (split_stores, i, split_store)
+ delete split_store;
+ return false;
+ }
+ if (group->stores[0]->rhs_code == INTEGER_CST)
+ {
+ bool all_orig = true;
+ FOR_EACH_VEC_ELT (split_stores, i, split_store)
+ if (!split_store->orig)
+ {
+ all_orig = false;
+ break;
+ }
+ if (all_orig)
+ {
+ unsigned int cnt = split_stores.length ();
+ store_immediate_info *store;
+ FOR_EACH_VEC_ELT (group->stores, i, store)
+ if (gimple_clobber_p (store->stmt))
+ ++cnt;
+ /* Punt if we wouldn't make any real changes, i.e. keep all
+ orig stmts + all clobbers. */
+ if (cnt == group->stores.length ())
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Exceeded original number of stmts (%u)."
+ " Not profitable to emit new sequence.\n",
+ orig_num_stmts);
+ FOR_EACH_VEC_ELT (split_stores, i, split_store)
+ delete split_store;
+ return false;
+ }
+ }
+ }
gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
gimple_seq seq = NULL;
- unsigned int num_stmts = 0;
tree last_vdef, new_vuse;
last_vdef = gimple_vdef (group->last_stmt);
new_vuse = gimple_vuse (group->last_stmt);
+ tree bswap_res = NULL_TREE;
+
+ /* Clobbers are not removed. */
+ if (gimple_clobber_p (group->last_stmt))
+ {
+ new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
+ gimple_set_vdef (group->last_stmt, new_vuse);
+ }
+
+ if (group->stores[0]->rhs_code == LROTATE_EXPR
+ || group->stores[0]->rhs_code == NOP_EXPR)
+ {
+ tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
+ gimple *ins_stmt = group->stores[0]->ins_stmt;
+ struct symbolic_number *n = &group->stores[0]->n;
+ bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
+
+ switch (n->range)
+ {
+ case 16:
+ load_type = bswap_type = uint16_type_node;
+ break;
+ case 32:
+ load_type = uint32_type_node;
+ if (bswap)
+ {
+ fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
+ bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
+ }
+ break;
+ case 64:
+ load_type = uint64_type_node;
+ if (bswap)
+ {
+ fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
+ bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* If the loads have each vuse of the corresponding store,
+ we've checked the aliasing already in try_coalesce_bswap and
+ we want to sink the need load into seq. So need to use new_vuse
+ on the load. */
+ if (n->base_addr)
+ {
+ if (n->vuse == NULL)
+ {
+ n->vuse = new_vuse;
+ ins_stmt = NULL;
+ }
+ else
+ /* Update vuse in case it has changed by output_merged_stores. */
+ n->vuse = gimple_vuse (ins_stmt);
+ }
+ bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
+ bswap_type, load_type, n, bswap);
+ gcc_assert (bswap_res);
+ }
gimple *stmt = NULL;
- /* The new SSA names created. Keep track of them so that we can free them
- if we decide to not use the new sequence. */
- auto_vec<tree> new_ssa_names;
- split_store *split_store;
- unsigned int i;
- bool fail = false;
+ auto_vec<gimple *, 32> orig_stmts;
+ gimple_seq this_seq;
+ tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
+ is_gimple_mem_ref_addr, NULL_TREE);
+ gimple_seq_add_seq_without_update (&seq, this_seq);
+
+ tree load_addr[2] = { NULL_TREE, NULL_TREE };
+ gimple_seq load_seq[2] = { NULL, NULL };
+ gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
+ for (int j = 0; j < 2; ++j)
+ {
+ store_operand_info &op = group->stores[0]->ops[j];
+ if (op.base_addr == NULL_TREE)
+ continue;
+
+ store_immediate_info *infol = group->stores.last ();
+ if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
+ {
+ /* We can't pick the location randomly; while we've verified
+ all the loads have the same vuse, they can be still in different
+ basic blocks and we need to pick the one from the last bb:
+ int x = q[0];
+ if (x == N) return;
+ int y = q[1];
+ p[0] = x;
+ p[1] = y;
+ otherwise if we put the wider load at the q[0] load, we might
+ segfault if q[1] is not mapped. */
+ basic_block bb = gimple_bb (op.stmt);
+ gimple *ostmt = op.stmt;
+ store_immediate_info *info;
+ FOR_EACH_VEC_ELT (group->stores, i, info)
+ {
+ gimple *tstmt = info->ops[j].stmt;
+ basic_block tbb = gimple_bb (tstmt);
+ if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
+ {
+ ostmt = tstmt;
+ bb = tbb;
+ }
+ }
+ load_gsi[j] = gsi_for_stmt (ostmt);
+ load_addr[j]
+ = force_gimple_operand_1 (unshare_expr (op.base_addr),
+ &load_seq[j], is_gimple_mem_ref_addr,
+ NULL_TREE);
+ }
+ else if (operand_equal_p (base_addr, op.base_addr, 0))
+ load_addr[j] = addr;
+ else
+ {
+ load_addr[j]
+ = force_gimple_operand_1 (unshare_expr (op.base_addr),
+ &this_seq, is_gimple_mem_ref_addr,
+ NULL_TREE);
+ gimple_seq_add_seq_without_update (&seq, this_seq);
+ }
+ }
+
+ FOR_EACH_VEC_ELT (split_stores, i, split_store)
+ {
+ const unsigned HOST_WIDE_INT try_size = split_store->size;
+ const unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
+ const unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
+ const unsigned HOST_WIDE_INT try_align = split_store->align;
+ const unsigned HOST_WIDE_INT try_offset = try_pos - start_byte_pos;
+ tree dest, src;
+ location_t loc;
+
+ if (split_store->orig)
+ {
+ /* If there is just a single non-clobber constituent store
+ which covers the whole area, just reuse the lhs and rhs. */
+ gimple *orig_stmt = NULL;
+ store_immediate_info *store;
+ unsigned int j;
+ FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
+ if (!gimple_clobber_p (store->stmt))
+ {
+ orig_stmt = store->stmt;
+ break;
+ }
+ dest = gimple_assign_lhs (orig_stmt);
+ src = gimple_assign_rhs1 (orig_stmt);
+ loc = gimple_location (orig_stmt);
+ }
+ else
+ {
+ store_immediate_info *info;
+ unsigned short clique, base;
+ unsigned int k;
+ FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
+ orig_stmts.safe_push (info->stmt);
+ tree offset_type
+ = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
+ tree dest_type;
+ loc = get_location_for_stmts (orig_stmts);
+ orig_stmts.truncate (0);
+
+ if (group->string_concatenation)
+ dest_type
+ = build_array_type_nelts (char_type_node,
+ try_size / BITS_PER_UNIT);
+ else
+ {
+ dest_type = build_nonstandard_integer_type (try_size, UNSIGNED);
+ dest_type = build_aligned_type (dest_type, try_align);
+ }
+ dest = fold_build2 (MEM_REF, dest_type, addr,
+ build_int_cst (offset_type, try_pos));
+ if (TREE_CODE (dest) == MEM_REF)
+ {
+ MR_DEPENDENCE_CLIQUE (dest) = clique;
+ MR_DEPENDENCE_BASE (dest) = base;
+ }
- tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &seq,
- is_gimple_mem_ref_addr, NULL_TREE);
- FOR_EACH_VEC_ELT (split_stores, i, split_store)
- {
- unsigned HOST_WIDE_INT try_size = split_store->size;
- unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
- unsigned HOST_WIDE_INT align = split_store->align;
- tree offset_type = get_alias_type_for_stmts (split_store->orig_stmts);
- location_t loc = get_location_for_stmts (split_store->orig_stmts);
+ tree mask;
+ if (bswap_res || group->string_concatenation)
+ mask = integer_zero_node;
+ else
+ mask = native_interpret_expr (dest_type,
+ group->mask + try_offset,
+ group->buf_size);
+
+ tree ops[2];
+ for (int j = 0;
+ j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
+ ++j)
+ {
+ store_operand_info &op = split_store->orig_stores[0]->ops[j];
+ if (bswap_res)
+ ops[j] = bswap_res;
+ else if (group->string_concatenation)
+ {
+ ops[j] = build_string (try_size / BITS_PER_UNIT,
+ (const char *) group->val + try_offset);
+ TREE_TYPE (ops[j]) = dest_type;
+ }
+ else if (op.base_addr)
+ {
+ FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
+ orig_stmts.safe_push (info->ops[j].stmt);
+
+ offset_type = get_alias_type_for_stmts (orig_stmts, true,
+ &clique, &base);
+ location_t load_loc = get_location_for_stmts (orig_stmts);
+ orig_stmts.truncate (0);
+
+ unsigned HOST_WIDE_INT load_align = group->load_align[j];
+ unsigned HOST_WIDE_INT align_bitpos
+ = known_alignment (try_bitpos
+ - split_store->orig_stores[0]->bitpos
+ + op.bitpos);
+ if (align_bitpos & (load_align - 1))
+ load_align = least_bit_hwi (align_bitpos);
+
+ tree load_int_type
+ = build_nonstandard_integer_type (try_size, UNSIGNED);
+ load_int_type
+ = build_aligned_type (load_int_type, load_align);
+
+ poly_uint64 load_pos
+ = exact_div (try_bitpos
+ - split_store->orig_stores[0]->bitpos
+ + op.bitpos,
+ BITS_PER_UNIT);
+ ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
+ build_int_cst (offset_type, load_pos));
+ if (TREE_CODE (ops[j]) == MEM_REF)
+ {
+ MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
+ MR_DEPENDENCE_BASE (ops[j]) = base;
+ }
+ if (!integer_zerop (mask))
+ /* The load might load some bits (that will be masked off
+ later on) uninitialized, avoid -W*uninitialized
+ warnings in that case. */
+ TREE_NO_WARNING (ops[j]) = 1;
+
+ stmt = gimple_build_assign (make_ssa_name (dest_type), ops[j]);
+ gimple_set_location (stmt, load_loc);
+ if (gsi_bb (load_gsi[j]))
+ {
+ gimple_set_vuse (stmt, gimple_vuse (op.stmt));
+ gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
+ }
+ else
+ {
+ gimple_set_vuse (stmt, new_vuse);
+ gimple_seq_add_stmt_without_update (&seq, stmt);
+ }
+ ops[j] = gimple_assign_lhs (stmt);
+ tree xor_mask;
+ enum tree_code inv_op
+ = invert_op (split_store, j, dest_type, xor_mask);
+ if (inv_op != NOP_EXPR)
+ {
+ stmt = gimple_build_assign (make_ssa_name (dest_type),
+ inv_op, ops[j], xor_mask);
+ gimple_set_location (stmt, load_loc);
+ ops[j] = gimple_assign_lhs (stmt);
+
+ if (gsi_bb (load_gsi[j]))
+ gimple_seq_add_stmt_without_update (&load_seq[j],
+ stmt);
+ else
+ gimple_seq_add_stmt_without_update (&seq, stmt);
+ }
+ }
+ else
+ ops[j] = native_interpret_expr (dest_type,
+ group->val + try_offset,
+ group->buf_size);
+ }
+
+ switch (split_store->orig_stores[0]->rhs_code)
+ {
+ case BIT_AND_EXPR:
+ case BIT_IOR_EXPR:
+ case BIT_XOR_EXPR:
+ FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
+ {
+ tree rhs1 = gimple_assign_rhs1 (info->stmt);
+ orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
+ }
+ location_t bit_loc;
+ bit_loc = get_location_for_stmts (orig_stmts);
+ orig_stmts.truncate (0);
+
+ stmt
+ = gimple_build_assign (make_ssa_name (dest_type),
+ split_store->orig_stores[0]->rhs_code,
+ ops[0], ops[1]);
+ gimple_set_location (stmt, bit_loc);
+ /* If there is just one load and there is a separate
+ load_seq[0], emit the bitwise op right after it. */
+ if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
+ gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
+ /* Otherwise, if at least one load is in seq, we need to
+ emit the bitwise op right before the store. If there
+ are two loads and are emitted somewhere else, it would
+ be better to emit the bitwise op as early as possible;
+ we don't track where that would be possible right now
+ though. */
+ else
+ gimple_seq_add_stmt_without_update (&seq, stmt);
+ src = gimple_assign_lhs (stmt);
+ tree xor_mask;
+ enum tree_code inv_op;
+ inv_op = invert_op (split_store, 2, dest_type, xor_mask);
+ if (inv_op != NOP_EXPR)
+ {
+ stmt = gimple_build_assign (make_ssa_name (dest_type),
+ inv_op, src, xor_mask);
+ gimple_set_location (stmt, bit_loc);
+ if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
+ gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
+ else
+ gimple_seq_add_stmt_without_update (&seq, stmt);
+ src = gimple_assign_lhs (stmt);
+ }
+ break;
+ case LROTATE_EXPR:
+ case NOP_EXPR:
+ src = ops[0];
+ if (!is_gimple_val (src))
+ {
+ stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
+ src);
+ gimple_seq_add_stmt_without_update (&seq, stmt);
+ src = gimple_assign_lhs (stmt);
+ }
+ if (!useless_type_conversion_p (dest_type, TREE_TYPE (src)))
+ {
+ stmt = gimple_build_assign (make_ssa_name (dest_type),
+ NOP_EXPR, src);
+ gimple_seq_add_stmt_without_update (&seq, stmt);
+ src = gimple_assign_lhs (stmt);
+ }
+ inv_op = invert_op (split_store, 2, dest_type, xor_mask);
+ if (inv_op != NOP_EXPR)
+ {
+ stmt = gimple_build_assign (make_ssa_name (dest_type),
+ inv_op, src, xor_mask);
+ gimple_set_location (stmt, loc);
+ gimple_seq_add_stmt_without_update (&seq, stmt);
+ src = gimple_assign_lhs (stmt);
+ }
+ break;
+ default:
+ src = ops[0];
+ break;
+ }
- tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
- int_type = build_aligned_type (int_type, align);
- tree dest = fold_build2 (MEM_REF, int_type, addr,
- build_int_cst (offset_type, try_pos));
+ /* If bit insertion is required, we use the source as an accumulator
+ into which the successive bit-field values are manually inserted.
+ FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
+ if (group->bit_insertion)
+ FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
+ if (info->rhs_code == BIT_INSERT_EXPR
+ && info->bitpos < try_bitpos + try_size
+ && info->bitpos + info->bitsize > try_bitpos)
+ {
+ /* Mask, truncate, convert to final type, shift and ior into
+ the accumulator. Note that every step can be a no-op. */
+ const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
+ const HOST_WIDE_INT end_gap
+ = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
+ tree tem = info->ops[0].val;
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (tem)))
+ {
+ const unsigned HOST_WIDE_INT size
+ = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem)));
+ tree integer_type
+ = build_nonstandard_integer_type (size, UNSIGNED);
+ tem = gimple_build (&seq, loc, VIEW_CONVERT_EXPR,
+ integer_type, tem);
+ }
+ if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
+ {
+ tree bitfield_type
+ = build_nonstandard_integer_type (info->bitsize,
+ UNSIGNED);
+ tem = gimple_convert (&seq, loc, bitfield_type, tem);
+ }
+ else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
+ {
+ const unsigned HOST_WIDE_INT imask
+ = (HOST_WIDE_INT_1U << info->bitsize) - 1;
+ tem = gimple_build (&seq, loc,
+ BIT_AND_EXPR, TREE_TYPE (tem), tem,
+ build_int_cst (TREE_TYPE (tem),
+ imask));
+ }
+ const HOST_WIDE_INT shift
+ = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
+ if (shift < 0)
+ tem = gimple_build (&seq, loc,
+ RSHIFT_EXPR, TREE_TYPE (tem), tem,
+ build_int_cst (NULL_TREE, -shift));
+ tem = gimple_convert (&seq, loc, dest_type, tem);
+ if (shift > 0)
+ tem = gimple_build (&seq, loc,
+ LSHIFT_EXPR, dest_type, tem,
+ build_int_cst (NULL_TREE, shift));
+ src = gimple_build (&seq, loc,
+ BIT_IOR_EXPR, dest_type, tem, src);
+ }
- tree src = native_interpret_expr (int_type,
- group->val + try_pos - start_byte_pos,
- group->buf_size);
+ if (!integer_zerop (mask))
+ {
+ tree tem = make_ssa_name (dest_type);
+ tree load_src = unshare_expr (dest);
+ /* The load might load some or all bits uninitialized,
+ avoid -W*uninitialized warnings in that case.
+ As optimization, it would be nice if all the bits are
+ provably uninitialized (no stores at all yet or previous
+ store a CLOBBER) we'd optimize away the load and replace
+ it e.g. with 0. */
+ TREE_NO_WARNING (load_src) = 1;
+ stmt = gimple_build_assign (tem, load_src);
+ gimple_set_location (stmt, loc);
+ gimple_set_vuse (stmt, new_vuse);
+ gimple_seq_add_stmt_without_update (&seq, stmt);
+
+ /* FIXME: If there is a single chunk of zero bits in mask,
+ perhaps use BIT_INSERT_EXPR instead? */
+ stmt = gimple_build_assign (make_ssa_name (dest_type),
+ BIT_AND_EXPR, tem, mask);
+ gimple_set_location (stmt, loc);
+ gimple_seq_add_stmt_without_update (&seq, stmt);
+ tem = gimple_assign_lhs (stmt);
+
+ if (TREE_CODE (src) == INTEGER_CST)
+ src = wide_int_to_tree (dest_type,
+ wi::bit_and_not (wi::to_wide (src),
+ wi::to_wide (mask)));
+ else
+ {
+ tree nmask
+ = wide_int_to_tree (dest_type,
+ wi::bit_not (wi::to_wide (mask)));
+ stmt = gimple_build_assign (make_ssa_name (dest_type),
+ BIT_AND_EXPR, src, nmask);
+ gimple_set_location (stmt, loc);
+ gimple_seq_add_stmt_without_update (&seq, stmt);
+ src = gimple_assign_lhs (stmt);
+ }
+ stmt = gimple_build_assign (make_ssa_name (dest_type),
+ BIT_IOR_EXPR, tem, src);
+ gimple_set_location (stmt, loc);
+ gimple_seq_add_stmt_without_update (&seq, stmt);
+ src = gimple_assign_lhs (stmt);
+ }
+ }
stmt = gimple_build_assign (dest, src);
gimple_set_location (stmt, loc);
gimple_set_vuse (stmt, new_vuse);
gimple_seq_add_stmt_without_update (&seq, stmt);
- /* We didn't manage to reduce the number of statements. Bail out. */
- if (++num_stmts == orig_num_stmts)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "Exceeded original number of stmts (%u)."
- " Not profitable to emit new sequence.\n",
- orig_num_stmts);
- }
- unsigned int ssa_count;
- tree ssa_name;
- /* Don't forget to cleanup the temporary SSA names. */
- FOR_EACH_VEC_ELT (new_ssa_names, ssa_count, ssa_name)
- release_ssa_name (ssa_name);
-
- fail = true;
- break;
- }
+ if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
+ add_stmt_to_eh_lp (stmt, group->lp_nr);
tree new_vdef;
if (i < split_stores.length () - 1)
- {
- new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
- new_ssa_names.safe_push (new_vdef);
- }
+ new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
else
new_vdef = last_vdef;
FOR_EACH_VEC_ELT (split_stores, i, split_store)
delete split_store;
- if (fail)
- return false;
-
gcc_assert (seq);
if (dump_file)
{
fprintf (dump_file,
- "New sequence of %u stmts to replace old one of %u stmts\n",
- num_stmts, orig_num_stmts);
+ "New sequence of %u stores to replace old one of %u stores\n",
+ split_stores.length (), orig_num_stmts);
if (dump_flags & TDF_DETAILS)
print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
}
- gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
+
+ if (gimple_clobber_p (group->last_stmt))
+ update_stmt (group->last_stmt);
+
+ if (group->lp_nr > 0)
+ {
+ /* We're going to insert a sequence of (potentially) throwing stores
+ into an active EH region. This means that we're going to create
+ new basic blocks with EH edges pointing to the post landing pad
+ and, therefore, to have to update its PHI nodes, if any. For the
+ virtual PHI node, we're going to use the VDEFs created above, but
+ for the other nodes, we need to record the original reaching defs. */
+ eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
+ basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
+ basic_block last_bb = gimple_bb (group->last_stmt);
+ edge last_edge = find_edge (last_bb, lp_bb);
+ auto_vec<tree, 16> last_defs;
+ gphi_iterator gpi;
+ for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
+ {
+ gphi *phi = gpi.phi ();
+ tree last_def;
+ if (virtual_operand_p (gimple_phi_result (phi)))
+ last_def = NULL_TREE;
+ else
+ last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
+ last_defs.safe_push (last_def);
+ }
+
+ /* Do the insertion. Then, if new basic blocks have been created in the
+ process, rewind the chain of VDEFs create above to walk the new basic
+ blocks and update the corresponding arguments of the PHI nodes. */
+ update_modified_stmts (seq);
+ if (gimple_find_sub_bbs (seq, &last_gsi))
+ while (last_vdef != gimple_vuse (group->last_stmt))
+ {
+ gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
+ if (stmt_could_throw_p (cfun, stmt))
+ {
+ edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
+ unsigned int i;
+ for (gpi = gsi_start_phis (lp_bb), i = 0;
+ !gsi_end_p (gpi);
+ gsi_next (&gpi), i++)
+ {
+ gphi *phi = gpi.phi ();
+ tree new_def;
+ if (virtual_operand_p (gimple_phi_result (phi)))
+ new_def = last_vdef;
+ else
+ new_def = last_defs[i];
+ add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
+ }
+ }
+ last_vdef = gimple_vuse (stmt);
+ }
+ }
+ else
+ gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
+
+ for (int j = 0; j < 2; ++j)
+ if (load_seq[j])
+ gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
return true;
}
bool ret = false;
FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
{
- if (output_merged_store (merged_store))
+ if (dbg_cnt (store_merging)
+ && output_merged_store (merged_store))
{
unsigned int j;
store_immediate_info *store;
{
gimple *stmt = store->stmt;
gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+ /* Don't remove clobbers, they are still useful even if
+ everything is overwritten afterwards. */
+ if (gimple_clobber_p (stmt))
+ continue;
gsi_remove (&gsi, true);
+ if (store->lp_nr)
+ remove_stmt_from_eh_lp (stmt);
if (stmt != merged_store->last_stmt)
{
unlink_stmt_vdef (stmt);
static bool
lhs_valid_for_store_merging_p (tree lhs)
{
- tree_code code = TREE_CODE (lhs);
-
- if (code == ARRAY_REF || code == ARRAY_RANGE_REF || code == MEM_REF
- || code == COMPONENT_REF || code == BIT_FIELD_REF)
+ if (DECL_P (lhs))
return true;
- return false;
+ switch (TREE_CODE (lhs))
+ {
+ case ARRAY_REF:
+ case ARRAY_RANGE_REF:
+ case BIT_FIELD_REF:
+ case COMPONENT_REF:
+ case MEM_REF:
+ case VIEW_CONVERT_EXPR:
+ return true;
+ default:
+ return false;
+ }
+
+ gcc_unreachable ();
}
/* Return true if the tree RHS is a constant we want to consider
static bool
rhs_valid_for_store_merging_p (tree rhs)
{
- tree type = TREE_TYPE (rhs);
- if (TREE_CODE_CLASS (TREE_CODE (rhs)) != tcc_constant
- || !can_native_encode_type_p (type))
+ unsigned HOST_WIDE_INT size;
+ if (TREE_CODE (rhs) == CONSTRUCTOR
+ && CONSTRUCTOR_NELTS (rhs) == 0
+ && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
+ && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
+ return true;
+ return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
+ && native_encode_expr (rhs, NULL, size) != 0);
+}
+
+/* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
+ and return true on success or false on failure. */
+
+static bool
+adjust_bit_pos (poly_offset_int byte_off,
+ poly_int64 *pbitpos,
+ poly_uint64 *pbitregion_start,
+ poly_uint64 *pbitregion_end)
+{
+ poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
+ bit_off += *pbitpos;
+
+ if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
+ {
+ if (maybe_ne (*pbitregion_end, 0U))
+ {
+ bit_off = byte_off << LOG2_BITS_PER_UNIT;
+ bit_off += *pbitregion_start;
+ if (bit_off.to_uhwi (pbitregion_start))
+ {
+ bit_off = byte_off << LOG2_BITS_PER_UNIT;
+ bit_off += *pbitregion_end;
+ if (!bit_off.to_uhwi (pbitregion_end))
+ *pbitregion_end = 0;
+ }
+ else
+ *pbitregion_end = 0;
+ }
+ return true;
+ }
+ else
return false;
+}
- return true;
+/* If MEM is a memory reference usable for store merging (either as
+ store destination or for loads), return the non-NULL base_addr
+ and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
+ Otherwise return NULL, *PBITPOS should be still valid even for that
+ case. */
+
+static tree
+mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
+ poly_uint64 *pbitpos,
+ poly_uint64 *pbitregion_start,
+ poly_uint64 *pbitregion_end)
+{
+ poly_int64 bitsize, bitpos;
+ poly_uint64 bitregion_start = 0, bitregion_end = 0;
+ machine_mode mode;
+ int unsignedp = 0, reversep = 0, volatilep = 0;
+ tree offset;
+ tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
+ &unsignedp, &reversep, &volatilep);
+ *pbitsize = bitsize;
+ if (known_eq (bitsize, 0))
+ return NULL_TREE;
+
+ if (TREE_CODE (mem) == COMPONENT_REF
+ && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
+ {
+ get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
+ if (maybe_ne (bitregion_end, 0U))
+ bitregion_end += 1;
+ }
+
+ if (reversep)
+ return NULL_TREE;
+
+ /* We do not want to rewrite TARGET_MEM_REFs. */
+ if (TREE_CODE (base_addr) == TARGET_MEM_REF)
+ return NULL_TREE;
+ /* In some cases get_inner_reference may return a
+ MEM_REF [ptr + byteoffset]. For the purposes of this pass
+ canonicalize the base_addr to MEM_REF [ptr] and take
+ byteoffset into account in the bitpos. This occurs in
+ PR 23684 and this way we can catch more chains. */
+ else if (TREE_CODE (base_addr) == MEM_REF)
+ {
+ if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
+ &bitregion_start, &bitregion_end))
+ return NULL_TREE;
+ base_addr = TREE_OPERAND (base_addr, 0);
+ }
+ /* get_inner_reference returns the base object, get at its
+ address now. */
+ else
+ {
+ if (maybe_lt (bitpos, 0))
+ return NULL_TREE;
+ base_addr = build_fold_addr_expr (base_addr);
+ }
+
+ if (offset)
+ {
+ /* If the access is variable offset then a base decl has to be
+ address-taken to be able to emit pointer-based stores to it.
+ ??? We might be able to get away with re-using the original
+ base up to the first variable part and then wrapping that inside
+ a BIT_FIELD_REF. */
+ tree base = get_base_address (base_addr);
+ if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
+ return NULL_TREE;
+
+ /* Similarly to above for the base, remove constant from the offset. */
+ if (TREE_CODE (offset) == PLUS_EXPR
+ && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
+ && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
+ &bitpos, &bitregion_start, &bitregion_end))
+ offset = TREE_OPERAND (offset, 0);
+
+ base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
+ base_addr, offset);
+ }
+
+ if (known_eq (bitregion_end, 0U))
+ {
+ bitregion_start = round_down_to_byte_boundary (bitpos);
+ bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
+ }
+
+ *pbitsize = bitsize;
+ *pbitpos = bitpos;
+ *pbitregion_start = bitregion_start;
+ *pbitregion_end = bitregion_end;
+ return base_addr;
+}
+
+/* Return true if STMT is a load that can be used for store merging.
+ In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
+ BITREGION_END are properties of the corresponding store. */
+
+static bool
+handled_load (gimple *stmt, store_operand_info *op,
+ poly_uint64 bitsize, poly_uint64 bitpos,
+ poly_uint64 bitregion_start, poly_uint64 bitregion_end)
+{
+ if (!is_gimple_assign (stmt))
+ return false;
+ if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
+ {
+ tree rhs1 = gimple_assign_rhs1 (stmt);
+ if (TREE_CODE (rhs1) == SSA_NAME
+ && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
+ bitregion_start, bitregion_end))
+ {
+ /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
+ been optimized earlier, but if allowed here, would confuse the
+ multiple uses counting. */
+ if (op->bit_not_p)
+ return false;
+ op->bit_not_p = !op->bit_not_p;
+ return true;
+ }
+ return false;
+ }
+ if (gimple_vuse (stmt)
+ && gimple_assign_load_p (stmt)
+ && !stmt_can_throw_internal (cfun, stmt)
+ && !gimple_has_volatile_ops (stmt))
+ {
+ tree mem = gimple_assign_rhs1 (stmt);
+ op->base_addr
+ = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
+ &op->bitregion_start,
+ &op->bitregion_end);
+ if (op->base_addr != NULL_TREE
+ && known_eq (op->bitsize, bitsize)
+ && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
+ && known_ge (op->bitpos - op->bitregion_start,
+ bitpos - bitregion_start)
+ && known_ge (op->bitregion_end - op->bitpos,
+ bitregion_end - bitpos))
+ {
+ op->stmt = stmt;
+ op->val = mem;
+ op->bit_not_p = false;
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Return the index number of the landing pad for STMT, if any. */
+
+static int
+lp_nr_for_store (gimple *stmt)
+{
+ if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
+ return 0;
+
+ if (!stmt_could_throw_p (cfun, stmt))
+ return 0;
+
+ return lookup_stmt_eh_lp (stmt);
+}
+
+/* Record the store STMT for store merging optimization if it can be
+ optimized. Return true if any changes were made. */
+
+bool
+pass_store_merging::process_store (gimple *stmt)
+{
+ tree lhs = gimple_assign_lhs (stmt);
+ tree rhs = gimple_assign_rhs1 (stmt);
+ poly_uint64 bitsize, bitpos = 0;
+ poly_uint64 bitregion_start = 0, bitregion_end = 0;
+ tree base_addr
+ = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
+ &bitregion_start, &bitregion_end);
+ if (known_eq (bitsize, 0U))
+ return false;
+
+ bool invalid = (base_addr == NULL_TREE
+ || (maybe_gt (bitsize,
+ (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
+ && TREE_CODE (rhs) != INTEGER_CST
+ && (TREE_CODE (rhs) != CONSTRUCTOR
+ || CONSTRUCTOR_NELTS (rhs) != 0)));
+ enum tree_code rhs_code = ERROR_MARK;
+ bool bit_not_p = false;
+ struct symbolic_number n;
+ gimple *ins_stmt = NULL;
+ store_operand_info ops[2];
+ if (invalid)
+ ;
+ else if (TREE_CODE (rhs) == STRING_CST)
+ {
+ rhs_code = STRING_CST;
+ ops[0].val = rhs;
+ }
+ else if (rhs_valid_for_store_merging_p (rhs))
+ {
+ rhs_code = INTEGER_CST;
+ ops[0].val = rhs;
+ }
+ else if (TREE_CODE (rhs) == SSA_NAME)
+ {
+ gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
+ if (!is_gimple_assign (def_stmt))
+ invalid = true;
+ else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
+ bitregion_start, bitregion_end))
+ rhs_code = MEM_REF;
+ else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
+ {
+ tree rhs1 = gimple_assign_rhs1 (def_stmt);
+ if (TREE_CODE (rhs1) == SSA_NAME
+ && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
+ {
+ bit_not_p = true;
+ def_stmt = SSA_NAME_DEF_STMT (rhs1);
+ }
+ }
+
+ if (rhs_code == ERROR_MARK && !invalid)
+ switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
+ {
+ case BIT_AND_EXPR:
+ case BIT_IOR_EXPR:
+ case BIT_XOR_EXPR:
+ tree rhs1, rhs2;
+ rhs1 = gimple_assign_rhs1 (def_stmt);
+ rhs2 = gimple_assign_rhs2 (def_stmt);
+ invalid = true;
+ if (TREE_CODE (rhs1) != SSA_NAME)
+ break;
+ def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
+ if (!is_gimple_assign (def_stmt1)
+ || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
+ bitregion_start, bitregion_end))
+ break;
+ if (rhs_valid_for_store_merging_p (rhs2))
+ ops[1].val = rhs2;
+ else if (TREE_CODE (rhs2) != SSA_NAME)
+ break;
+ else
+ {
+ def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
+ if (!is_gimple_assign (def_stmt2))
+ break;
+ else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
+ bitregion_start, bitregion_end))
+ break;
+ }
+ invalid = false;
+ break;
+ default:
+ invalid = true;
+ break;
+ }
+
+ unsigned HOST_WIDE_INT const_bitsize;
+ if (bitsize.is_constant (&const_bitsize)
+ && (const_bitsize % BITS_PER_UNIT) == 0
+ && const_bitsize <= 64
+ && multiple_p (bitpos, BITS_PER_UNIT))
+ {
+ ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
+ if (ins_stmt)
+ {
+ uint64_t nn = n.n;
+ for (unsigned HOST_WIDE_INT i = 0;
+ i < const_bitsize;
+ i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
+ if ((nn & MARKER_MASK) == 0
+ || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
+ {
+ ins_stmt = NULL;
+ break;
+ }
+ if (ins_stmt)
+ {
+ if (invalid)
+ {
+ rhs_code = LROTATE_EXPR;
+ ops[0].base_addr = NULL_TREE;
+ ops[1].base_addr = NULL_TREE;
+ }
+ invalid = false;
+ }
+ }
+ }
+
+ if (invalid
+ && bitsize.is_constant (&const_bitsize)
+ && ((const_bitsize % BITS_PER_UNIT) != 0
+ || !multiple_p (bitpos, BITS_PER_UNIT))
+ && const_bitsize <= MAX_FIXED_MODE_SIZE)
+ {
+ /* Bypass a conversion to the bit-field type. */
+ if (!bit_not_p
+ && is_gimple_assign (def_stmt)
+ && CONVERT_EXPR_CODE_P (rhs_code))
+ {
+ tree rhs1 = gimple_assign_rhs1 (def_stmt);
+ if (TREE_CODE (rhs1) == SSA_NAME
+ && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
+ rhs = rhs1;
+ }
+ rhs_code = BIT_INSERT_EXPR;
+ bit_not_p = false;
+ ops[0].val = rhs;
+ ops[0].base_addr = NULL_TREE;
+ ops[1].base_addr = NULL_TREE;
+ invalid = false;
+ }
+ }
+ else
+ invalid = true;
+
+ unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
+ unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
+ if (invalid
+ || !bitsize.is_constant (&const_bitsize)
+ || !bitpos.is_constant (&const_bitpos)
+ || !bitregion_start.is_constant (&const_bitregion_start)
+ || !bitregion_end.is_constant (&const_bitregion_end))
+ return terminate_all_aliasing_chains (NULL, stmt);
+
+ if (!ins_stmt)
+ memset (&n, 0, sizeof (n));
+
+ class imm_store_chain_info **chain_info = NULL;
+ bool ret = false;
+ if (base_addr)
+ chain_info = m_stores.get (base_addr);
+
+ store_immediate_info *info;
+ if (chain_info)
+ {
+ unsigned int ord = (*chain_info)->m_store_info.length ();
+ info = new store_immediate_info (const_bitsize, const_bitpos,
+ const_bitregion_start,
+ const_bitregion_end,
+ stmt, ord, rhs_code, n, ins_stmt,
+ bit_not_p, lp_nr_for_store (stmt),
+ ops[0], ops[1]);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Recording immediate store from stmt:\n");
+ print_gimple_stmt (dump_file, stmt, 0);
+ }
+ (*chain_info)->m_store_info.safe_push (info);
+ ret |= terminate_all_aliasing_chains (chain_info, stmt);
+ /* If we reach the limit of stores to merge in a chain terminate and
+ process the chain now. */
+ if ((*chain_info)->m_store_info.length ()
+ == (unsigned int) param_max_stores_to_merge)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "Reached maximum number of statements to merge:\n");
+ ret |= terminate_and_process_chain (*chain_info);
+ }
+ return ret;
+ }
+
+ /* Store aliases any existing chain? */
+ ret |= terminate_all_aliasing_chains (NULL, stmt);
+ /* Start a new chain. */
+ class imm_store_chain_info *new_chain
+ = new imm_store_chain_info (m_stores_head, base_addr);
+ info = new store_immediate_info (const_bitsize, const_bitpos,
+ const_bitregion_start,
+ const_bitregion_end,
+ stmt, 0, rhs_code, n, ins_stmt,
+ bit_not_p, lp_nr_for_store (stmt),
+ ops[0], ops[1]);
+ new_chain->m_store_info.safe_push (info);
+ m_stores.put (base_addr, new_chain);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Starting new chain with statement:\n");
+ print_gimple_stmt (dump_file, stmt, 0);
+ fprintf (dump_file, "The base object is:\n");
+ print_generic_expr (dump_file, base_addr);
+ fprintf (dump_file, "\n");
+ }
+ return ret;
+}
+
+/* Return true if STMT is a store valid for store merging. */
+
+static bool
+store_valid_for_store_merging_p (gimple *stmt)
+{
+ return gimple_assign_single_p (stmt)
+ && gimple_vdef (stmt)
+ && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
+ && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
+}
+
+enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
+
+/* Return the status of basic block BB wrt store merging. */
+
+static enum basic_block_status
+get_status_for_store_merging (basic_block bb)
+{
+ unsigned int num_statements = 0;
+ gimple_stmt_iterator gsi;
+ edge e;
+
+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+
+ if (is_gimple_debug (stmt))
+ continue;
+
+ if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
+ break;
+ }
+
+ if (num_statements == 0)
+ return BB_INVALID;
+
+ if (cfun->can_throw_non_call_exceptions && cfun->eh
+ && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb)))
+ && (e = find_fallthru_edge (bb->succs))
+ && e->dest == bb->next_bb)
+ return BB_EXTENDED_VALID;
+
+ return num_statements >= 2 ? BB_VALID : BB_INVALID;
}
/* Entry point for the pass. Go over each basic block recording chains of
- immediate stores. Upon encountering a terminating statement (as defined
- by stmt_terminates_chain_p) process the recorded stores and emit the widened
- variants. */
+ immediate stores. Upon encountering a terminating statement (as defined
+ by stmt_terminates_chain_p) process the recorded stores and emit the widened
+ variants. */
unsigned int
pass_store_merging::execute (function *fun)
{
basic_block bb;
hash_set<gimple *> orig_stmts;
+ bool changed = false, open_chains = false;
+
+ /* If the function can throw and catch non-call exceptions, we'll be trying
+ to merge stores across different basic blocks so we need to first unsplit
+ the EH edges in order to streamline the CFG of the function. */
+ if (cfun->can_throw_non_call_exceptions && cfun->eh)
+ unsplit_eh_edges ();
+
+ calculate_dominance_info (CDI_DOMINATORS);
FOR_EACH_BB_FN (bb, fun)
{
+ const basic_block_status bb_status = get_status_for_store_merging (bb);
gimple_stmt_iterator gsi;
- unsigned HOST_WIDE_INT num_statements = 0;
- /* Record the original statements so that we can keep track of
- statements emitted in this pass and not re-process new
- statements. */
- for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- if (is_gimple_debug (gsi_stmt (gsi)))
- continue;
- if (++num_statements > 2)
- break;
+ if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
+ {
+ changed |= terminate_and_process_all_chains ();
+ open_chains = false;
}
- if (num_statements < 2)
+ if (bb_status == BB_INVALID)
continue;
if (dump_file && (dump_flags & TDF_DETAILS))
if (is_gimple_debug (stmt))
continue;
- if (gimple_has_volatile_ops (stmt))
+ if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
{
/* Terminate all chains. */
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Volatile access terminates "
"all chains\n");
- terminate_and_process_all_chains ();
+ changed |= terminate_and_process_all_chains ();
+ open_chains = false;
continue;
}
- if (gimple_assign_single_p (stmt) && gimple_vdef (stmt)
- && !stmt_can_throw_internal (stmt)
- && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt)))
- {
- tree lhs = gimple_assign_lhs (stmt);
- tree rhs = gimple_assign_rhs1 (stmt);
-
- HOST_WIDE_INT bitsize, bitpos;
- machine_mode mode;
- int unsignedp = 0, reversep = 0, volatilep = 0;
- tree offset, base_addr;
- base_addr
- = get_inner_reference (lhs, &bitsize, &bitpos, &offset, &mode,
- &unsignedp, &reversep, &volatilep);
- /* As a future enhancement we could handle stores with the same
- base and offset. */
- bool invalid = reversep
- || ((bitsize > MAX_BITSIZE_MODE_ANY_INT)
- && (TREE_CODE (rhs) != INTEGER_CST))
- || !rhs_valid_for_store_merging_p (rhs);
-
- /* We do not want to rewrite TARGET_MEM_REFs. */
- if (TREE_CODE (base_addr) == TARGET_MEM_REF)
- invalid = true;
- /* In some cases get_inner_reference may return a
- MEM_REF [ptr + byteoffset]. For the purposes of this pass
- canonicalize the base_addr to MEM_REF [ptr] and take
- byteoffset into account in the bitpos. This occurs in
- PR 23684 and this way we can catch more chains. */
- else if (TREE_CODE (base_addr) == MEM_REF)
- {
- offset_int bit_off, byte_off = mem_ref_offset (base_addr);
- bit_off = byte_off << LOG2_BITS_PER_UNIT;
- bit_off += bitpos;
- if (!wi::neg_p (bit_off) && wi::fits_shwi_p (bit_off))
- bitpos = bit_off.to_shwi ();
- else
- invalid = true;
- base_addr = TREE_OPERAND (base_addr, 0);
- }
- /* get_inner_reference returns the base object, get at its
- address now. */
- else
- {
- if (bitpos < 0)
- invalid = true;
- base_addr = build_fold_addr_expr (base_addr);
- }
-
- if (! invalid
- && offset != NULL_TREE)
- {
- /* If the access is variable offset then a base
- decl has to be address-taken to be able to
- emit pointer-based stores to it.
- ??? We might be able to get away with
- re-using the original base up to the first
- variable part and then wrapping that inside
- a BIT_FIELD_REF. */
- tree base = get_base_address (base_addr);
- if (! base
- || (DECL_P (base)
- && ! TREE_ADDRESSABLE (base)))
- invalid = true;
- else
- base_addr = build2 (POINTER_PLUS_EXPR,
- TREE_TYPE (base_addr),
- base_addr, offset);
- }
-
- struct imm_store_chain_info **chain_info
- = m_stores.get (base_addr);
-
- if (!invalid)
- {
- store_immediate_info *info;
- if (chain_info)
- {
- info = new store_immediate_info (
- bitsize, bitpos, stmt,
- (*chain_info)->m_store_info.length ());
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file,
- "Recording immediate store from stmt:\n");
- print_gimple_stmt (dump_file, stmt, 0);
- }
- (*chain_info)->m_store_info.safe_push (info);
- /* If we reach the limit of stores to merge in a chain
- terminate and process the chain now. */
- if ((*chain_info)->m_store_info.length ()
- == (unsigned int)
- PARAM_VALUE (PARAM_MAX_STORES_TO_MERGE))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file,
- "Reached maximum number of statements"
- " to merge:\n");
- terminate_and_release_chain (*chain_info);
- }
- continue;
- }
+ if (store_valid_for_store_merging_p (stmt))
+ changed |= process_store (stmt);
+ else
+ changed |= terminate_all_aliasing_chains (NULL, stmt);
+ }
- /* Store aliases any existing chain? */
- terminate_all_aliasing_chains (chain_info, false, stmt);
- /* Start a new chain. */
- struct imm_store_chain_info *new_chain
- = new imm_store_chain_info (m_stores_head, base_addr);
- info = new store_immediate_info (bitsize, bitpos,
- stmt, 0);
- new_chain->m_store_info.safe_push (info);
- m_stores.put (base_addr, new_chain);
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file,
- "Starting new chain with statement:\n");
- print_gimple_stmt (dump_file, stmt, 0);
- fprintf (dump_file, "The base object is:\n");
- print_generic_expr (dump_file, base_addr);
- fprintf (dump_file, "\n");
- }
- }
- else
- terminate_all_aliasing_chains (chain_info,
- offset != NULL_TREE, stmt);
+ if (bb_status == BB_EXTENDED_VALID)
+ open_chains = true;
+ else
+ {
+ changed |= terminate_and_process_all_chains ();
+ open_chains = false;
+ }
+ }
- continue;
- }
+ if (open_chains)
+ changed |= terminate_and_process_all_chains ();
- terminate_all_aliasing_chains (NULL, false, stmt);
- }
- terminate_and_process_all_chains ();
+ /* If the function can throw and catch non-call exceptions and something
+ changed during the pass, then the CFG has (very likely) changed too. */
+ if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
+ {
+ free_dominance_info (CDI_DOMINATORS);
+ return TODO_cleanup_cfg;
}
+
return 0;
}
}
}
-/* Test shift_bytes_in_array and that it carries bits across between
+/* Test shift_bytes_in_array_left and that it carries bits across between
bytes correctly. */
static void
-verify_shift_bytes_in_array (void)
+verify_shift_bytes_in_array_left (void)
{
/* byte 1 | byte 0
00011111 | 11100000. */
memcpy (in, orig, sizeof orig);
unsigned char expected[2] = { 0x80, 0x7f };
- shift_bytes_in_array (in, sizeof (in), 2);
+ shift_bytes_in_array_left (in, sizeof (in), 2);
verify_array_eq (in, expected, sizeof (in));
memcpy (in, orig, sizeof orig);
memcpy (expected, orig, sizeof orig);
/* Check that shifting by zero doesn't change anything. */
- shift_bytes_in_array (in, sizeof (in), 0);
+ shift_bytes_in_array_left (in, sizeof (in), 0);
verify_array_eq (in, expected, sizeof (in));
}
verify_array_eq (in, expected, sizeof in);
}
-/* Test verify_clear_bit_region_be that it clears exactly the bits asked and
+/* Test clear_bit_region_be that it clears exactly the bits asked and
nothing more. */
static void
void
store_merging_c_tests (void)
{
- verify_shift_bytes_in_array ();
+ verify_shift_bytes_in_array_left ();
verify_shift_bytes_in_array_right ();
verify_clear_bit_region ();
verify_clear_bit_region_be ();