+2017-11-20 Jakub Jelinek <jakub@redhat.com>
+
+ * tree-ssa-math-opts.c (nop_stats, bswap_stats, struct symbolic_number,
+ BITS_PER_MARKER, MARKER_MASK, MARKER_BYTE_UNKNOWN, HEAD_MARKER, CMPNOP,
+ CMPXCHG, do_shift_rotate, verify_symbolic_number_p,
+ init_symbolic_number, find_bswap_or_nop_load, perform_symbolic_merge,
+ find_bswap_or_nop_1, find_bswap_or_nop, pass_data_optimize_bswap,
+ class pass_optimize_bswap, bswap_replace,
+ pass_optimize_bswap::execute): Moved to ...
+ * gimple-ssa-store-merging.c: ... this file.
+ Include optabs-tree.h.
+ (nop_stats, bswap_stats, do_shift_rotate, verify_symbolic_number_p,
+ init_symbolic_number, find_bswap_or_nop_load, perform_symbolic_merge,
+ find_bswap_or_nop_1, find_bswap_or_nop, bswap_replace): Put into
+ anonymous namespace, remove static keywords.
+ (pass_optimize_bswap::gate): Test BITS_PER_UNIT == 8 here...
+ (pass_optimize_bswap::execute): ... rather than here. Formatting fix.
+
2017-11-20 Jan Hubicka <hubicka@ucw.cz>
PR bootstrap/83062
-/* GIMPLE store merging pass.
- Copyright (C) 2016-2017 Free Software Foundation, Inc.
+/* GIMPLE store merging and byte swapping passes.
+ Copyright (C) 2009-2017 Free Software Foundation, Inc.
Contributed by ARM Ltd.
This file is part of GCC.
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
-/* The purpose of this pass is to combine multiple memory stores of
- constant values, values loaded from memory or bitwise operations
+/* The purpose of the store merging pass is to combine multiple memory
+ stores of constant values, values loaded from memory or bitwise operations
on those to consecutive memory locations into fewer wider stores.
For example, if we have a sequence peforming four byte stores to
consecutive memory locations:
#include "gimplify-me.h"
#include "rtl.h"
#include "expr.h" /* For get_bit_range. */
+#include "optabs-tree.h"
#include "selftest.h"
/* The maximum size (in bits) of the stores this pass should generate. */
namespace {
+struct
+{
+ /* Number of hand-written 16-bit nop / bswaps found. */
+ int found_16bit;
+
+ /* Number of hand-written 32-bit nop / bswaps found. */
+ int found_32bit;
+
+ /* Number of hand-written 64-bit nop / bswaps found. */
+ int found_64bit;
+} nop_stats, bswap_stats;
+
+/* A symbolic number structure is used to detect byte permutation and selection
+ patterns of a source. To achieve that, its field N contains an artificial
+ number consisting of BITS_PER_MARKER sized markers tracking where does each
+ byte come from in the source:
+
+ 0 - target byte has the value 0
+ FF - target byte has an unknown value (eg. due to sign extension)
+ 1..size - marker value is the byte index in the source (0 for lsb).
+
+ To detect permutations on memory sources (arrays and structures), a symbolic
+ number is also associated:
+ - a base address BASE_ADDR and an OFFSET giving the address of the source;
+ - a range which gives the difference between the highest and lowest accessed
+ memory location to make such a symbolic number;
+ - the address SRC of the source element of lowest address as a convenience
+ to easily get BASE_ADDR + offset + lowest bytepos;
+ - number of expressions N_OPS bitwise ored together to represent
+ approximate cost of the computation.
+
+ Note 1: the range is different from size as size reflects the size of the
+ type of the current expression. For instance, for an array char a[],
+ (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
+ (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
+ time a range of 1.
+
+ Note 2: for non-memory sources, range holds the same value as size.
+
+ Note 3: SRC points to the SSA_NAME in case of non-memory source. */
+
+struct symbolic_number {
+ uint64_t n;
+ tree type;
+ tree base_addr;
+ tree offset;
+ HOST_WIDE_INT bytepos;
+ tree src;
+ tree alias_set;
+ tree vuse;
+ unsigned HOST_WIDE_INT range;
+ int n_ops;
+};
+
+#define BITS_PER_MARKER 8
+#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
+#define MARKER_BYTE_UNKNOWN MARKER_MASK
+#define HEAD_MARKER(n, size) \
+ ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
+
+/* The number which the find_bswap_or_nop_1 result should match in
+ order to have a nop. The number is masked according to the size of
+ the symbolic number before using it. */
+#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
+ (uint64_t)0x08070605 << 32 | 0x04030201)
+
+/* The number which the find_bswap_or_nop_1 result should match in
+ order to have a byte swap. The number is masked according to the
+ size of the symbolic number before using it. */
+#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
+ (uint64_t)0x01020304 << 32 | 0x05060708)
+
+/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
+ number N. Return false if the requested operation is not permitted
+ on a symbolic number. */
+
+inline bool
+do_shift_rotate (enum tree_code code,
+ struct symbolic_number *n,
+ int count)
+{
+ int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+ unsigned head_marker;
+
+ if (count % BITS_PER_UNIT != 0)
+ return false;
+ count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
+
+ /* Zero out the extra bits of N in order to avoid them being shifted
+ into the significant bits. */
+ if (size < 64 / BITS_PER_MARKER)
+ n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
+
+ switch (code)
+ {
+ case LSHIFT_EXPR:
+ n->n <<= count;
+ break;
+ case RSHIFT_EXPR:
+ head_marker = HEAD_MARKER (n->n, size);
+ n->n >>= count;
+ /* Arithmetic shift of signed type: result is dependent on the value. */
+ if (!TYPE_UNSIGNED (n->type) && head_marker)
+ for (i = 0; i < count / BITS_PER_MARKER; i++)
+ n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
+ << ((size - 1 - i) * BITS_PER_MARKER);
+ break;
+ case LROTATE_EXPR:
+ n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
+ break;
+ case RROTATE_EXPR:
+ n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
+ break;
+ default:
+ return false;
+ }
+ /* Zero unused bits for size. */
+ if (size < 64 / BITS_PER_MARKER)
+ n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
+ return true;
+}
+
+/* Perform sanity checking for the symbolic number N and the gimple
+ statement STMT. */
+
+inline bool
+verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
+{
+ tree lhs_type;
+
+ lhs_type = gimple_expr_type (stmt);
+
+ if (TREE_CODE (lhs_type) != INTEGER_TYPE)
+ return false;
+
+ if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
+ return false;
+
+ return true;
+}
+
+/* Initialize the symbolic number N for the bswap pass from the base element
+ SRC manipulated by the bitwise OR expression. */
+
+bool
+init_symbolic_number (struct symbolic_number *n, tree src)
+{
+ int size;
+
+ if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
+ return false;
+
+ n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
+ n->src = src;
+
+ /* Set up the symbolic number N by setting each byte to a value between 1 and
+ the byte size of rhs1. The highest order byte is set to n->size and the
+ lowest order byte to 1. */
+ n->type = TREE_TYPE (src);
+ size = TYPE_PRECISION (n->type);
+ if (size % BITS_PER_UNIT != 0)
+ return false;
+ size /= BITS_PER_UNIT;
+ if (size > 64 / BITS_PER_MARKER)
+ return false;
+ n->range = size;
+ n->n = CMPNOP;
+ n->n_ops = 1;
+
+ if (size < 64 / BITS_PER_MARKER)
+ n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
+
+ return true;
+}
+
+/* Check if STMT might be a byte swap or a nop from a memory source and returns
+ the answer. If so, REF is that memory source and the base of the memory area
+ accessed and the offset of the access from that base are recorded in N. */
+
+bool
+find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
+{
+ /* Leaf node is an array or component ref. Memorize its base and
+ offset from base to compare to other such leaf node. */
+ HOST_WIDE_INT bitsize, bitpos;
+ machine_mode mode;
+ int unsignedp, reversep, volatilep;
+ tree offset, base_addr;
+
+ /* Not prepared to handle PDP endian. */
+ if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
+ return false;
+
+ if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
+ return false;
+
+ base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
+ &unsignedp, &reversep, &volatilep);
+
+ if (TREE_CODE (base_addr) == MEM_REF)
+ {
+ offset_int bit_offset = 0;
+ tree off = TREE_OPERAND (base_addr, 1);
+
+ if (!integer_zerop (off))
+ {
+ offset_int boff, coff = mem_ref_offset (base_addr);
+ boff = coff << LOG2_BITS_PER_UNIT;
+ bit_offset += boff;
+ }
+
+ base_addr = TREE_OPERAND (base_addr, 0);
+
+ /* Avoid returning a negative bitpos as this may wreak havoc later. */
+ if (wi::neg_p (bit_offset))
+ {
+ offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false);
+ offset_int tem = wi::bit_and_not (bit_offset, mask);
+ /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
+ Subtract it to BIT_OFFSET and add it (scaled) to OFFSET. */
+ bit_offset -= tem;
+ tem >>= LOG2_BITS_PER_UNIT;
+ if (offset)
+ offset = size_binop (PLUS_EXPR, offset,
+ wide_int_to_tree (sizetype, tem));
+ else
+ offset = wide_int_to_tree (sizetype, tem);
+ }
+
+ bitpos += bit_offset.to_shwi ();
+ }
+
+ if (bitpos % BITS_PER_UNIT)
+ return false;
+ if (bitsize % BITS_PER_UNIT)
+ return false;
+ if (reversep)
+ return false;
+
+ if (!init_symbolic_number (n, ref))
+ return false;
+ n->base_addr = base_addr;
+ n->offset = offset;
+ n->bytepos = bitpos / BITS_PER_UNIT;
+ n->alias_set = reference_alias_ptr_type (ref);
+ n->vuse = gimple_vuse (stmt);
+ return true;
+}
+
+/* Compute the symbolic number N representing the result of a bitwise OR on 2
+ symbolic number N1 and N2 whose source statements are respectively
+ SOURCE_STMT1 and SOURCE_STMT2. */
+
+gimple *
+perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
+ gimple *source_stmt2, struct symbolic_number *n2,
+ struct symbolic_number *n)
+{
+ int i, size;
+ uint64_t mask;
+ gimple *source_stmt;
+ struct symbolic_number *n_start;
+
+ tree rhs1 = gimple_assign_rhs1 (source_stmt1);
+ if (TREE_CODE (rhs1) == BIT_FIELD_REF
+ && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
+ rhs1 = TREE_OPERAND (rhs1, 0);
+ tree rhs2 = gimple_assign_rhs1 (source_stmt2);
+ if (TREE_CODE (rhs2) == BIT_FIELD_REF
+ && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
+ rhs2 = TREE_OPERAND (rhs2, 0);
+
+ /* Sources are different, cancel bswap if they are not memory location with
+ the same base (array, structure, ...). */
+ if (rhs1 != rhs2)
+ {
+ uint64_t inc;
+ HOST_WIDE_INT start_sub, end_sub, end1, end2, end;
+ struct symbolic_number *toinc_n_ptr, *n_end;
+ basic_block bb1, bb2;
+
+ if (!n1->base_addr || !n2->base_addr
+ || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
+ return NULL;
+
+ if (!n1->offset != !n2->offset
+ || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
+ return NULL;
+
+ if (n1->bytepos < n2->bytepos)
+ {
+ n_start = n1;
+ start_sub = n2->bytepos - n1->bytepos;
+ }
+ else
+ {
+ n_start = n2;
+ start_sub = n1->bytepos - n2->bytepos;
+ }
+
+ bb1 = gimple_bb (source_stmt1);
+ bb2 = gimple_bb (source_stmt2);
+ if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
+ source_stmt = source_stmt1;
+ else
+ source_stmt = source_stmt2;
+
+ /* Find the highest address at which a load is performed and
+ compute related info. */
+ end1 = n1->bytepos + (n1->range - 1);
+ end2 = n2->bytepos + (n2->range - 1);
+ if (end1 < end2)
+ {
+ end = end2;
+ end_sub = end2 - end1;
+ }
+ else
+ {
+ end = end1;
+ end_sub = end1 - end2;
+ }
+ n_end = (end2 > end1) ? n2 : n1;
+
+ /* Find symbolic number whose lsb is the most significant. */
+ if (BYTES_BIG_ENDIAN)
+ toinc_n_ptr = (n_end == n1) ? n2 : n1;
+ else
+ toinc_n_ptr = (n_start == n1) ? n2 : n1;
+
+ n->range = end - n_start->bytepos + 1;
+
+ /* Check that the range of memory covered can be represented by
+ a symbolic number. */
+ if (n->range > 64 / BITS_PER_MARKER)
+ return NULL;
+
+ /* Reinterpret byte marks in symbolic number holding the value of
+ bigger weight according to target endianness. */
+ inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
+ size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
+ for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
+ {
+ unsigned marker
+ = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
+ if (marker && marker != MARKER_BYTE_UNKNOWN)
+ toinc_n_ptr->n += inc;
+ }
+ }
+ else
+ {
+ n->range = n1->range;
+ n_start = n1;
+ source_stmt = source_stmt1;
+ }
+
+ if (!n1->alias_set
+ || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
+ n->alias_set = n1->alias_set;
+ else
+ n->alias_set = ptr_type_node;
+ n->vuse = n_start->vuse;
+ n->base_addr = n_start->base_addr;
+ n->offset = n_start->offset;
+ n->src = n_start->src;
+ n->bytepos = n_start->bytepos;
+ n->type = n_start->type;
+ size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+
+ for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
+ {
+ uint64_t masked1, masked2;
+
+ masked1 = n1->n & mask;
+ masked2 = n2->n & mask;
+ if (masked1 && masked2 && masked1 != masked2)
+ return NULL;
+ }
+ n->n = n1->n | n2->n;
+ n->n_ops = n1->n_ops + n2->n_ops;
+
+ return source_stmt;
+}
+
+/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
+ the operation given by the rhs of STMT on the result. If the operation
+ could successfully be executed the function returns a gimple stmt whose
+ rhs's first tree is the expression of the source operand and NULL
+ otherwise. */
+
+gimple *
+find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
+{
+ enum tree_code code;
+ tree rhs1, rhs2 = NULL;
+ gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
+ enum gimple_rhs_class rhs_class;
+
+ if (!limit || !is_gimple_assign (stmt))
+ return NULL;
+
+ rhs1 = gimple_assign_rhs1 (stmt);
+
+ if (find_bswap_or_nop_load (stmt, rhs1, n))
+ return stmt;
+
+ /* Handle BIT_FIELD_REF. */
+ if (TREE_CODE (rhs1) == BIT_FIELD_REF
+ && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
+ {
+ unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
+ unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
+ if (bitpos % BITS_PER_UNIT == 0
+ && bitsize % BITS_PER_UNIT == 0
+ && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
+ {
+ /* Handle big-endian bit numbering in BIT_FIELD_REF. */
+ if (BYTES_BIG_ENDIAN)
+ bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
+
+ /* Shift. */
+ if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
+ return NULL;
+
+ /* Mask. */
+ uint64_t mask = 0;
+ uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
+ for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
+ i++, tmp <<= BITS_PER_UNIT)
+ mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
+ n->n &= mask;
+
+ /* Convert. */
+ n->type = TREE_TYPE (rhs1);
+ if (!n->base_addr)
+ n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+
+ return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
+ }
+
+ return NULL;
+ }
+
+ if (TREE_CODE (rhs1) != SSA_NAME)
+ return NULL;
+
+ code = gimple_assign_rhs_code (stmt);
+ rhs_class = gimple_assign_rhs_class (stmt);
+ rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
+
+ if (rhs_class == GIMPLE_BINARY_RHS)
+ rhs2 = gimple_assign_rhs2 (stmt);
+
+ /* Handle unary rhs and binary rhs with integer constants as second
+ operand. */
+
+ if (rhs_class == GIMPLE_UNARY_RHS
+ || (rhs_class == GIMPLE_BINARY_RHS
+ && TREE_CODE (rhs2) == INTEGER_CST))
+ {
+ if (code != BIT_AND_EXPR
+ && code != LSHIFT_EXPR
+ && code != RSHIFT_EXPR
+ && code != LROTATE_EXPR
+ && code != RROTATE_EXPR
+ && !CONVERT_EXPR_CODE_P (code))
+ return NULL;
+
+ source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
+
+ /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
+ we have to initialize the symbolic number. */
+ if (!source_stmt1)
+ {
+ if (gimple_assign_load_p (stmt)
+ || !init_symbolic_number (n, rhs1))
+ return NULL;
+ source_stmt1 = stmt;
+ }
+
+ switch (code)
+ {
+ case BIT_AND_EXPR:
+ {
+ int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+ uint64_t val = int_cst_value (rhs2), mask = 0;
+ uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
+
+ /* Only constants masking full bytes are allowed. */
+ for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
+ if ((val & tmp) != 0 && (val & tmp) != tmp)
+ return NULL;
+ else if (val & tmp)
+ mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
+
+ n->n &= mask;
+ }
+ break;
+ case LSHIFT_EXPR:
+ case RSHIFT_EXPR:
+ case LROTATE_EXPR:
+ case RROTATE_EXPR:
+ if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
+ return NULL;
+ break;
+ CASE_CONVERT:
+ {
+ int i, type_size, old_type_size;
+ tree type;
+
+ type = gimple_expr_type (stmt);
+ type_size = TYPE_PRECISION (type);
+ if (type_size % BITS_PER_UNIT != 0)
+ return NULL;
+ type_size /= BITS_PER_UNIT;
+ if (type_size > 64 / BITS_PER_MARKER)
+ return NULL;
+
+ /* Sign extension: result is dependent on the value. */
+ old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+ if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
+ && HEAD_MARKER (n->n, old_type_size))
+ for (i = 0; i < type_size - old_type_size; i++)
+ n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
+ << ((type_size - 1 - i) * BITS_PER_MARKER);
+
+ if (type_size < 64 / BITS_PER_MARKER)
+ {
+ /* If STMT casts to a smaller type mask out the bits not
+ belonging to the target type. */
+ n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
+ }
+ n->type = type;
+ if (!n->base_addr)
+ n->range = type_size;
+ }
+ break;
+ default:
+ return NULL;
+ };
+ return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
+ }
+
+ /* Handle binary rhs. */
+
+ if (rhs_class == GIMPLE_BINARY_RHS)
+ {
+ struct symbolic_number n1, n2;
+ gimple *source_stmt, *source_stmt2;
+
+ if (code != BIT_IOR_EXPR)
+ return NULL;
+
+ if (TREE_CODE (rhs2) != SSA_NAME)
+ return NULL;
+
+ rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
+
+ switch (code)
+ {
+ case BIT_IOR_EXPR:
+ source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
+
+ if (!source_stmt1)
+ return NULL;
+
+ source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
+
+ if (!source_stmt2)
+ return NULL;
+
+ if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
+ return NULL;
+
+ if (!n1.vuse != !n2.vuse
+ || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
+ return NULL;
+
+ source_stmt
+ = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
+
+ if (!source_stmt)
+ return NULL;
+
+ if (!verify_symbolic_number_p (n, stmt))
+ return NULL;
+
+ break;
+ default:
+ return NULL;
+ }
+ return source_stmt;
+ }
+ return NULL;
+}
+
+/* Check if STMT completes a bswap implementation or a read in a given
+ endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
+ accordingly. It also sets N to represent the kind of operations
+ performed: size of the resulting expression and whether it works on
+ a memory source, and if so alias-set and vuse. At last, the
+ function returns a stmt whose rhs's first tree is the source
+ expression. */
+
+gimple *
+find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
+{
+ unsigned rsize;
+ uint64_t tmpn, mask;
+/* The number which the find_bswap_or_nop_1 result should match in order
+ to have a full byte swap. The number is shifted to the right
+ according to the size of the symbolic number before using it. */
+ uint64_t cmpxchg = CMPXCHG;
+ uint64_t cmpnop = CMPNOP;
+
+ gimple *ins_stmt;
+ int limit;
+
+ /* The last parameter determines the depth search limit. It usually
+ correlates directly to the number n of bytes to be touched. We
+ increase that number by log2(n) + 1 here in order to also
+ cover signed -> unsigned conversions of the src operand as can be seen
+ in libgcc, and for initial shift/and operation of the src operand. */
+ limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
+ limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
+ ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
+
+ if (!ins_stmt)
+ return NULL;
+
+ /* Find real size of result (highest non-zero byte). */
+ if (n->base_addr)
+ for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
+ else
+ rsize = n->range;
+
+ /* Zero out the bits corresponding to untouched bytes in original gimple
+ expression. */
+ if (n->range < (int) sizeof (int64_t))
+ {
+ mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
+ cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
+ cmpnop &= mask;
+ }
+
+ /* Zero out the bits corresponding to unused bytes in the result of the
+ gimple expression. */
+ if (rsize < n->range)
+ {
+ if (BYTES_BIG_ENDIAN)
+ {
+ mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
+ cmpxchg &= mask;
+ cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
+ }
+ else
+ {
+ mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
+ cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
+ cmpnop &= mask;
+ }
+ n->range = rsize;
+ }
+
+ /* A complete byte swap should make the symbolic number to start with
+ the largest digit in the highest order byte. Unchanged symbolic
+ number indicates a read with same endianness as target architecture. */
+ if (n->n == cmpnop)
+ *bswap = false;
+ else if (n->n == cmpxchg)
+ *bswap = true;
+ else
+ return NULL;
+
+ /* Useless bit manipulation performed by code. */
+ if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
+ return NULL;
+
+ n->range *= BITS_PER_UNIT;
+ return ins_stmt;
+}
+
+const pass_data pass_data_optimize_bswap =
+{
+ GIMPLE_PASS, /* type */
+ "bswap", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_optimize_bswap : public gimple_opt_pass
+{
+public:
+ pass_optimize_bswap (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+ return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
+ }
+
+ virtual unsigned int execute (function *);
+
+}; // class pass_optimize_bswap
+
+/* Perform the bswap optimization: replace the expression computed in the rhs
+ of CUR_STMT by an equivalent bswap, load or load + bswap expression.
+ Which of these alternatives replace the rhs is given by N->base_addr (non
+ null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
+ load to perform are also given in N while the builtin bswap invoke is given
+ in FNDEL. Finally, if a load is involved, SRC_STMT refers to one of the
+ load statements involved to construct the rhs in CUR_STMT and N->range gives
+ the size of the rhs expression for maintaining some statistics.
+
+ Note that if the replacement involve a load, CUR_STMT is moved just after
+ SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
+ changing of basic block. */
+
+bool
+bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
+ tree bswap_type, tree load_type, struct symbolic_number *n,
+ bool bswap)
+{
+ gimple_stmt_iterator gsi;
+ tree src, tmp, tgt;
+ gimple *bswap_stmt;
+
+ gsi = gsi_for_stmt (cur_stmt);
+ src = n->src;
+ tgt = gimple_assign_lhs (cur_stmt);
+
+ /* Need to load the value from memory first. */
+ if (n->base_addr)
+ {
+ gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt);
+ tree addr_expr, addr_tmp, val_expr, val_tmp;
+ tree load_offset_ptr, aligned_load_type;
+ gimple *addr_stmt, *load_stmt;
+ unsigned align;
+ HOST_WIDE_INT load_offset = 0;
+ basic_block ins_bb, cur_bb;
+
+ ins_bb = gimple_bb (ins_stmt);
+ cur_bb = gimple_bb (cur_stmt);
+ if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
+ return false;
+
+ align = get_object_alignment (src);
+
+ /* Move cur_stmt just before one of the load of the original
+ to ensure it has the same VUSE. See PR61517 for what could
+ go wrong. */
+ if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
+ reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
+ gsi_move_before (&gsi, &gsi_ins);
+ gsi = gsi_for_stmt (cur_stmt);
+
+ /* Compute address to load from and cast according to the size
+ of the load. */
+ addr_expr = build_fold_addr_expr (unshare_expr (src));
+ if (is_gimple_mem_ref_addr (addr_expr))
+ addr_tmp = addr_expr;
+ else
+ {
+ addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
+ "load_src");
+ addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
+ gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
+ }
+
+ /* Perform the load. */
+ aligned_load_type = load_type;
+ if (align < TYPE_ALIGN (load_type))
+ aligned_load_type = build_aligned_type (load_type, align);
+ load_offset_ptr = build_int_cst (n->alias_set, load_offset);
+ val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
+ load_offset_ptr);
+
+ if (!bswap)
+ {
+ if (n->range == 16)
+ nop_stats.found_16bit++;
+ else if (n->range == 32)
+ nop_stats.found_32bit++;
+ else
+ {
+ gcc_assert (n->range == 64);
+ nop_stats.found_64bit++;
+ }
+
+ /* Convert the result of load if necessary. */
+ if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
+ {
+ val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
+ "load_dst");
+ load_stmt = gimple_build_assign (val_tmp, val_expr);
+ gimple_set_vuse (load_stmt, n->vuse);
+ gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
+ gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
+ }
+ else
+ {
+ gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
+ gimple_set_vuse (cur_stmt, n->vuse);
+ }
+ update_stmt (cur_stmt);
+
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "%d bit load in target endianness found at: ",
+ (int) n->range);
+ print_gimple_stmt (dump_file, cur_stmt, 0);
+ }
+ return true;
+ }
+ else
+ {
+ val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
+ load_stmt = gimple_build_assign (val_tmp, val_expr);
+ gimple_set_vuse (load_stmt, n->vuse);
+ gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
+ }
+ src = val_tmp;
+ }
+ else if (!bswap)
+ {
+ gimple *g;
+ if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
+ {
+ if (!is_gimple_val (src))
+ return false;
+ g = gimple_build_assign (tgt, NOP_EXPR, src);
+ }
+ else
+ g = gimple_build_assign (tgt, src);
+ if (n->range == 16)
+ nop_stats.found_16bit++;
+ else if (n->range == 32)
+ nop_stats.found_32bit++;
+ else
+ {
+ gcc_assert (n->range == 64);
+ nop_stats.found_64bit++;
+ }
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "%d bit reshuffle in target endianness found at: ",
+ (int) n->range);
+ print_gimple_stmt (dump_file, cur_stmt, 0);
+ }
+ gsi_replace (&gsi, g, true);
+ return true;
+ }
+ else if (TREE_CODE (src) == BIT_FIELD_REF)
+ src = TREE_OPERAND (src, 0);
+
+ if (n->range == 16)
+ bswap_stats.found_16bit++;
+ else if (n->range == 32)
+ bswap_stats.found_32bit++;
+ else
+ {
+ gcc_assert (n->range == 64);
+ bswap_stats.found_64bit++;
+ }
+
+ tmp = src;
+
+ /* Convert the src expression if necessary. */
+ if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
+ {
+ gimple *convert_stmt;
+
+ tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
+ convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
+ gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
+ }
+
+ /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
+ are considered as rotation of 2N bit values by N bits is generally not
+ equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
+ gives 0x03040102 while a bswap for that value is 0x04030201. */
+ if (bswap && n->range == 16)
+ {
+ tree count = build_int_cst (NULL, BITS_PER_UNIT);
+ src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
+ bswap_stmt = gimple_build_assign (NULL, src);
+ }
+ else
+ bswap_stmt = gimple_build_call (fndecl, 1, tmp);
+
+ tmp = tgt;
+
+ /* Convert the result if necessary. */
+ if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
+ {
+ gimple *convert_stmt;
+
+ tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
+ convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
+ gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
+ }
+
+ gimple_set_lhs (bswap_stmt, tmp);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "%d bit bswap implementation found at: ",
+ (int) n->range);
+ print_gimple_stmt (dump_file, cur_stmt, 0);
+ }
+
+ gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
+ gsi_remove (&gsi, true);
+ return true;
+}
+
+/* Find manual byte swap implementations as well as load in a given
+ endianness. Byte swaps are turned into a bswap builtin invokation
+ while endian loads are converted to bswap builtin invokation or
+ simple load according to the target endianness. */
+
+unsigned int
+pass_optimize_bswap::execute (function *fun)
+{
+ basic_block bb;
+ bool bswap32_p, bswap64_p;
+ bool changed = false;
+ tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
+
+ bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
+ && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
+ bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
+ && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
+ || (bswap32_p && word_mode == SImode)));
+
+ /* Determine the argument type of the builtins. The code later on
+ assumes that the return and argument type are the same. */
+ if (bswap32_p)
+ {
+ tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
+ bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
+ }
+
+ if (bswap64_p)
+ {
+ tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
+ bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
+ }
+
+ memset (&nop_stats, 0, sizeof (nop_stats));
+ memset (&bswap_stats, 0, sizeof (bswap_stats));
+ calculate_dominance_info (CDI_DOMINATORS);
+
+ FOR_EACH_BB_FN (bb, fun)
+ {
+ gimple_stmt_iterator gsi;
+
+ /* We do a reverse scan for bswap patterns to make sure we get the
+ widest match. As bswap pattern matching doesn't handle previously
+ inserted smaller bswap replacements as sub-patterns, the wider
+ variant wouldn't be detected. */
+ for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
+ {
+ gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
+ tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
+ enum tree_code code;
+ struct symbolic_number n;
+ bool bswap;
+
+ /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
+ might be moved to a different basic block by bswap_replace and gsi
+ must not points to it if that's the case. Moving the gsi_prev
+ there make sure that gsi points to the statement previous to
+ cur_stmt while still making sure that all statements are
+ considered in this basic block. */
+ gsi_prev (&gsi);
+
+ if (!is_gimple_assign (cur_stmt))
+ continue;
+
+ code = gimple_assign_rhs_code (cur_stmt);
+ switch (code)
+ {
+ case LROTATE_EXPR:
+ case RROTATE_EXPR:
+ if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
+ || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
+ % BITS_PER_UNIT)
+ continue;
+ /* Fall through. */
+ case BIT_IOR_EXPR:
+ break;
+ default:
+ continue;
+ }
+
+ ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
+
+ if (!ins_stmt)
+ continue;
+
+ switch (n.range)
+ {
+ case 16:
+ /* Already in canonical form, nothing to do. */
+ if (code == LROTATE_EXPR || code == RROTATE_EXPR)
+ continue;
+ load_type = bswap_type = uint16_type_node;
+ break;
+ case 32:
+ load_type = uint32_type_node;
+ if (bswap32_p)
+ {
+ fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
+ bswap_type = bswap32_type;
+ }
+ break;
+ case 64:
+ load_type = uint64_type_node;
+ if (bswap64_p)
+ {
+ fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
+ bswap_type = bswap64_type;
+ }
+ break;
+ default:
+ continue;
+ }
+
+ if (bswap && !fndecl && n.range != 16)
+ continue;
+
+ if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type,
+ &n, bswap))
+ changed = true;
+ }
+ }
+
+ statistics_counter_event (fun, "16-bit nop implementations found",
+ nop_stats.found_16bit);
+ statistics_counter_event (fun, "32-bit nop implementations found",
+ nop_stats.found_32bit);
+ statistics_counter_event (fun, "64-bit nop implementations found",
+ nop_stats.found_64bit);
+ statistics_counter_event (fun, "16-bit bswap implementations found",
+ bswap_stats.found_16bit);
+ statistics_counter_event (fun, "32-bit bswap implementations found",
+ bswap_stats.found_32bit);
+ statistics_counter_event (fun, "64-bit bswap implementations found",
+ bswap_stats.found_64bit);
+
+ return (changed ? TODO_update_ssa : 0);
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_optimize_bswap (gcc::context *ctxt)
+{
+ return new pass_optimize_bswap (ctxt);
+}
+
+namespace {
+
/* Struct recording one operand for the store, which is either a constant,
then VAL represents the constant and all the other fields are zero,
or a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
int inserted;
} sincos_stats;
-static struct
-{
- /* Number of hand-written 16-bit nop / bswaps found. */
- int found_16bit;
-
- /* Number of hand-written 32-bit nop / bswaps found. */
- int found_32bit;
-
- /* Number of hand-written 64-bit nop / bswaps found. */
- int found_64bit;
-} nop_stats, bswap_stats;
-
static struct
{
/* Number of widening multiplication ops inserted. */
return new pass_cse_sincos (ctxt);
}
-/* A symbolic number structure is used to detect byte permutation and selection
- patterns of a source. To achieve that, its field N contains an artificial
- number consisting of BITS_PER_MARKER sized markers tracking where does each
- byte come from in the source:
-
- 0 - target byte has the value 0
- FF - target byte has an unknown value (eg. due to sign extension)
- 1..size - marker value is the byte index in the source (0 for lsb).
-
- To detect permutations on memory sources (arrays and structures), a symbolic
- number is also associated:
- - a base address BASE_ADDR and an OFFSET giving the address of the source;
- - a range which gives the difference between the highest and lowest accessed
- memory location to make such a symbolic number;
- - the address SRC of the source element of lowest address as a convenience
- to easily get BASE_ADDR + offset + lowest bytepos;
- - number of expressions N_OPS bitwise ored together to represent
- approximate cost of the computation.
-
- Note 1: the range is different from size as size reflects the size of the
- type of the current expression. For instance, for an array char a[],
- (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
- (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
- time a range of 1.
-
- Note 2: for non-memory sources, range holds the same value as size.
-
- Note 3: SRC points to the SSA_NAME in case of non-memory source. */
-
-struct symbolic_number {
- uint64_t n;
- tree type;
- tree base_addr;
- tree offset;
- HOST_WIDE_INT bytepos;
- tree src;
- tree alias_set;
- tree vuse;
- unsigned HOST_WIDE_INT range;
- int n_ops;
-};
-
-#define BITS_PER_MARKER 8
-#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
-#define MARKER_BYTE_UNKNOWN MARKER_MASK
-#define HEAD_MARKER(n, size) \
- ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
-
-/* The number which the find_bswap_or_nop_1 result should match in
- order to have a nop. The number is masked according to the size of
- the symbolic number before using it. */
-#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
- (uint64_t)0x08070605 << 32 | 0x04030201)
-
-/* The number which the find_bswap_or_nop_1 result should match in
- order to have a byte swap. The number is masked according to the
- size of the symbolic number before using it. */
-#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
- (uint64_t)0x01020304 << 32 | 0x05060708)
-
-/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
- number N. Return false if the requested operation is not permitted
- on a symbolic number. */
-
-static inline bool
-do_shift_rotate (enum tree_code code,
- struct symbolic_number *n,
- int count)
-{
- int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
- unsigned head_marker;
-
- if (count % BITS_PER_UNIT != 0)
- return false;
- count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
-
- /* Zero out the extra bits of N in order to avoid them being shifted
- into the significant bits. */
- if (size < 64 / BITS_PER_MARKER)
- n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
-
- switch (code)
- {
- case LSHIFT_EXPR:
- n->n <<= count;
- break;
- case RSHIFT_EXPR:
- head_marker = HEAD_MARKER (n->n, size);
- n->n >>= count;
- /* Arithmetic shift of signed type: result is dependent on the value. */
- if (!TYPE_UNSIGNED (n->type) && head_marker)
- for (i = 0; i < count / BITS_PER_MARKER; i++)
- n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
- << ((size - 1 - i) * BITS_PER_MARKER);
- break;
- case LROTATE_EXPR:
- n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
- break;
- case RROTATE_EXPR:
- n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
- break;
- default:
- return false;
- }
- /* Zero unused bits for size. */
- if (size < 64 / BITS_PER_MARKER)
- n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
- return true;
-}
-
-/* Perform sanity checking for the symbolic number N and the gimple
- statement STMT. */
-
-static inline bool
-verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
-{
- tree lhs_type;
-
- lhs_type = gimple_expr_type (stmt);
-
- if (TREE_CODE (lhs_type) != INTEGER_TYPE)
- return false;
-
- if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
- return false;
-
- return true;
-}
-
-/* Initialize the symbolic number N for the bswap pass from the base element
- SRC manipulated by the bitwise OR expression. */
-
-static bool
-init_symbolic_number (struct symbolic_number *n, tree src)
-{
- int size;
-
- if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
- return false;
-
- n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
- n->src = src;
-
- /* Set up the symbolic number N by setting each byte to a value between 1 and
- the byte size of rhs1. The highest order byte is set to n->size and the
- lowest order byte to 1. */
- n->type = TREE_TYPE (src);
- size = TYPE_PRECISION (n->type);
- if (size % BITS_PER_UNIT != 0)
- return false;
- size /= BITS_PER_UNIT;
- if (size > 64 / BITS_PER_MARKER)
- return false;
- n->range = size;
- n->n = CMPNOP;
- n->n_ops = 1;
-
- if (size < 64 / BITS_PER_MARKER)
- n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
-
- return true;
-}
-
-/* Check if STMT might be a byte swap or a nop from a memory source and returns
- the answer. If so, REF is that memory source and the base of the memory area
- accessed and the offset of the access from that base are recorded in N. */
-
-bool
-find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
-{
- /* Leaf node is an array or component ref. Memorize its base and
- offset from base to compare to other such leaf node. */
- HOST_WIDE_INT bitsize, bitpos;
- machine_mode mode;
- int unsignedp, reversep, volatilep;
- tree offset, base_addr;
-
- /* Not prepared to handle PDP endian. */
- if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
- return false;
-
- if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
- return false;
-
- base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
- &unsignedp, &reversep, &volatilep);
-
- if (TREE_CODE (base_addr) == MEM_REF)
- {
- offset_int bit_offset = 0;
- tree off = TREE_OPERAND (base_addr, 1);
-
- if (!integer_zerop (off))
- {
- offset_int boff, coff = mem_ref_offset (base_addr);
- boff = coff << LOG2_BITS_PER_UNIT;
- bit_offset += boff;
- }
-
- base_addr = TREE_OPERAND (base_addr, 0);
-
- /* Avoid returning a negative bitpos as this may wreak havoc later. */
- if (wi::neg_p (bit_offset))
- {
- offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false);
- offset_int tem = wi::bit_and_not (bit_offset, mask);
- /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
- Subtract it to BIT_OFFSET and add it (scaled) to OFFSET. */
- bit_offset -= tem;
- tem >>= LOG2_BITS_PER_UNIT;
- if (offset)
- offset = size_binop (PLUS_EXPR, offset,
- wide_int_to_tree (sizetype, tem));
- else
- offset = wide_int_to_tree (sizetype, tem);
- }
-
- bitpos += bit_offset.to_shwi ();
- }
-
- if (bitpos % BITS_PER_UNIT)
- return false;
- if (bitsize % BITS_PER_UNIT)
- return false;
- if (reversep)
- return false;
-
- if (!init_symbolic_number (n, ref))
- return false;
- n->base_addr = base_addr;
- n->offset = offset;
- n->bytepos = bitpos / BITS_PER_UNIT;
- n->alias_set = reference_alias_ptr_type (ref);
- n->vuse = gimple_vuse (stmt);
- return true;
-}
-
-/* Compute the symbolic number N representing the result of a bitwise OR on 2
- symbolic number N1 and N2 whose source statements are respectively
- SOURCE_STMT1 and SOURCE_STMT2. */
-
-static gimple *
-perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
- gimple *source_stmt2, struct symbolic_number *n2,
- struct symbolic_number *n)
-{
- int i, size;
- uint64_t mask;
- gimple *source_stmt;
- struct symbolic_number *n_start;
-
- tree rhs1 = gimple_assign_rhs1 (source_stmt1);
- if (TREE_CODE (rhs1) == BIT_FIELD_REF
- && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
- rhs1 = TREE_OPERAND (rhs1, 0);
- tree rhs2 = gimple_assign_rhs1 (source_stmt2);
- if (TREE_CODE (rhs2) == BIT_FIELD_REF
- && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
- rhs2 = TREE_OPERAND (rhs2, 0);
-
- /* Sources are different, cancel bswap if they are not memory location with
- the same base (array, structure, ...). */
- if (rhs1 != rhs2)
- {
- uint64_t inc;
- HOST_WIDE_INT start_sub, end_sub, end1, end2, end;
- struct symbolic_number *toinc_n_ptr, *n_end;
- basic_block bb1, bb2;
-
- if (!n1->base_addr || !n2->base_addr
- || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
- return NULL;
-
- if (!n1->offset != !n2->offset
- || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
- return NULL;
-
- if (n1->bytepos < n2->bytepos)
- {
- n_start = n1;
- start_sub = n2->bytepos - n1->bytepos;
- }
- else
- {
- n_start = n2;
- start_sub = n1->bytepos - n2->bytepos;
- }
-
- bb1 = gimple_bb (source_stmt1);
- bb2 = gimple_bb (source_stmt2);
- if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
- source_stmt = source_stmt1;
- else
- source_stmt = source_stmt2;
-
- /* Find the highest address at which a load is performed and
- compute related info. */
- end1 = n1->bytepos + (n1->range - 1);
- end2 = n2->bytepos + (n2->range - 1);
- if (end1 < end2)
- {
- end = end2;
- end_sub = end2 - end1;
- }
- else
- {
- end = end1;
- end_sub = end1 - end2;
- }
- n_end = (end2 > end1) ? n2 : n1;
-
- /* Find symbolic number whose lsb is the most significant. */
- if (BYTES_BIG_ENDIAN)
- toinc_n_ptr = (n_end == n1) ? n2 : n1;
- else
- toinc_n_ptr = (n_start == n1) ? n2 : n1;
-
- n->range = end - n_start->bytepos + 1;
-
- /* Check that the range of memory covered can be represented by
- a symbolic number. */
- if (n->range > 64 / BITS_PER_MARKER)
- return NULL;
-
- /* Reinterpret byte marks in symbolic number holding the value of
- bigger weight according to target endianness. */
- inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
- size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
- for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
- {
- unsigned marker
- = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
- if (marker && marker != MARKER_BYTE_UNKNOWN)
- toinc_n_ptr->n += inc;
- }
- }
- else
- {
- n->range = n1->range;
- n_start = n1;
- source_stmt = source_stmt1;
- }
-
- if (!n1->alias_set
- || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
- n->alias_set = n1->alias_set;
- else
- n->alias_set = ptr_type_node;
- n->vuse = n_start->vuse;
- n->base_addr = n_start->base_addr;
- n->offset = n_start->offset;
- n->src = n_start->src;
- n->bytepos = n_start->bytepos;
- n->type = n_start->type;
- size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
-
- for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
- {
- uint64_t masked1, masked2;
-
- masked1 = n1->n & mask;
- masked2 = n2->n & mask;
- if (masked1 && masked2 && masked1 != masked2)
- return NULL;
- }
- n->n = n1->n | n2->n;
- n->n_ops = n1->n_ops + n2->n_ops;
-
- return source_stmt;
-}
-
-/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
- the operation given by the rhs of STMT on the result. If the operation
- could successfully be executed the function returns a gimple stmt whose
- rhs's first tree is the expression of the source operand and NULL
- otherwise. */
-
-static gimple *
-find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
-{
- enum tree_code code;
- tree rhs1, rhs2 = NULL;
- gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
- enum gimple_rhs_class rhs_class;
-
- if (!limit || !is_gimple_assign (stmt))
- return NULL;
-
- rhs1 = gimple_assign_rhs1 (stmt);
-
- if (find_bswap_or_nop_load (stmt, rhs1, n))
- return stmt;
-
- /* Handle BIT_FIELD_REF. */
- if (TREE_CODE (rhs1) == BIT_FIELD_REF
- && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
- {
- unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
- unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
- if (bitpos % BITS_PER_UNIT == 0
- && bitsize % BITS_PER_UNIT == 0
- && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
- {
- /* Handle big-endian bit numbering in BIT_FIELD_REF. */
- if (BYTES_BIG_ENDIAN)
- bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
-
- /* Shift. */
- if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
- return NULL;
-
- /* Mask. */
- uint64_t mask = 0;
- uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
- for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
- i++, tmp <<= BITS_PER_UNIT)
- mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
- n->n &= mask;
-
- /* Convert. */
- n->type = TREE_TYPE (rhs1);
- if (!n->base_addr)
- n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
-
- return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
- }
-
- return NULL;
- }
-
- if (TREE_CODE (rhs1) != SSA_NAME)
- return NULL;
-
- code = gimple_assign_rhs_code (stmt);
- rhs_class = gimple_assign_rhs_class (stmt);
- rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
-
- if (rhs_class == GIMPLE_BINARY_RHS)
- rhs2 = gimple_assign_rhs2 (stmt);
-
- /* Handle unary rhs and binary rhs with integer constants as second
- operand. */
-
- if (rhs_class == GIMPLE_UNARY_RHS
- || (rhs_class == GIMPLE_BINARY_RHS
- && TREE_CODE (rhs2) == INTEGER_CST))
- {
- if (code != BIT_AND_EXPR
- && code != LSHIFT_EXPR
- && code != RSHIFT_EXPR
- && code != LROTATE_EXPR
- && code != RROTATE_EXPR
- && !CONVERT_EXPR_CODE_P (code))
- return NULL;
-
- source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
-
- /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
- we have to initialize the symbolic number. */
- if (!source_stmt1)
- {
- if (gimple_assign_load_p (stmt)
- || !init_symbolic_number (n, rhs1))
- return NULL;
- source_stmt1 = stmt;
- }
-
- switch (code)
- {
- case BIT_AND_EXPR:
- {
- int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
- uint64_t val = int_cst_value (rhs2), mask = 0;
- uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
-
- /* Only constants masking full bytes are allowed. */
- for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
- if ((val & tmp) != 0 && (val & tmp) != tmp)
- return NULL;
- else if (val & tmp)
- mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
-
- n->n &= mask;
- }
- break;
- case LSHIFT_EXPR:
- case RSHIFT_EXPR:
- case LROTATE_EXPR:
- case RROTATE_EXPR:
- if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
- return NULL;
- break;
- CASE_CONVERT:
- {
- int i, type_size, old_type_size;
- tree type;
-
- type = gimple_expr_type (stmt);
- type_size = TYPE_PRECISION (type);
- if (type_size % BITS_PER_UNIT != 0)
- return NULL;
- type_size /= BITS_PER_UNIT;
- if (type_size > 64 / BITS_PER_MARKER)
- return NULL;
-
- /* Sign extension: result is dependent on the value. */
- old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
- if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
- && HEAD_MARKER (n->n, old_type_size))
- for (i = 0; i < type_size - old_type_size; i++)
- n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
- << ((type_size - 1 - i) * BITS_PER_MARKER);
-
- if (type_size < 64 / BITS_PER_MARKER)
- {
- /* If STMT casts to a smaller type mask out the bits not
- belonging to the target type. */
- n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
- }
- n->type = type;
- if (!n->base_addr)
- n->range = type_size;
- }
- break;
- default:
- return NULL;
- };
- return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
- }
-
- /* Handle binary rhs. */
-
- if (rhs_class == GIMPLE_BINARY_RHS)
- {
- struct symbolic_number n1, n2;
- gimple *source_stmt, *source_stmt2;
-
- if (code != BIT_IOR_EXPR)
- return NULL;
-
- if (TREE_CODE (rhs2) != SSA_NAME)
- return NULL;
-
- rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
-
- switch (code)
- {
- case BIT_IOR_EXPR:
- source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
-
- if (!source_stmt1)
- return NULL;
-
- source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
-
- if (!source_stmt2)
- return NULL;
-
- if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
- return NULL;
-
- if (!n1.vuse != !n2.vuse
- || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
- return NULL;
-
- source_stmt
- = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
-
- if (!source_stmt)
- return NULL;
-
- if (!verify_symbolic_number_p (n, stmt))
- return NULL;
-
- break;
- default:
- return NULL;
- }
- return source_stmt;
- }
- return NULL;
-}
-
-/* Check if STMT completes a bswap implementation or a read in a given
- endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
- accordingly. It also sets N to represent the kind of operations
- performed: size of the resulting expression and whether it works on
- a memory source, and if so alias-set and vuse. At last, the
- function returns a stmt whose rhs's first tree is the source
- expression. */
-
-static gimple *
-find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
-{
- unsigned rsize;
- uint64_t tmpn, mask;
-/* The number which the find_bswap_or_nop_1 result should match in order
- to have a full byte swap. The number is shifted to the right
- according to the size of the symbolic number before using it. */
- uint64_t cmpxchg = CMPXCHG;
- uint64_t cmpnop = CMPNOP;
-
- gimple *ins_stmt;
- int limit;
-
- /* The last parameter determines the depth search limit. It usually
- correlates directly to the number n of bytes to be touched. We
- increase that number by log2(n) + 1 here in order to also
- cover signed -> unsigned conversions of the src operand as can be seen
- in libgcc, and for initial shift/and operation of the src operand. */
- limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
- limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
- ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
-
- if (!ins_stmt)
- return NULL;
-
- /* Find real size of result (highest non-zero byte). */
- if (n->base_addr)
- for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
- else
- rsize = n->range;
-
- /* Zero out the bits corresponding to untouched bytes in original gimple
- expression. */
- if (n->range < (int) sizeof (int64_t))
- {
- mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
- cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
- cmpnop &= mask;
- }
-
- /* Zero out the bits corresponding to unused bytes in the result of the
- gimple expression. */
- if (rsize < n->range)
- {
- if (BYTES_BIG_ENDIAN)
- {
- mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
- cmpxchg &= mask;
- cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
- }
- else
- {
- mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
- cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
- cmpnop &= mask;
- }
- n->range = rsize;
- }
-
- /* A complete byte swap should make the symbolic number to start with
- the largest digit in the highest order byte. Unchanged symbolic
- number indicates a read with same endianness as target architecture. */
- if (n->n == cmpnop)
- *bswap = false;
- else if (n->n == cmpxchg)
- *bswap = true;
- else
- return NULL;
-
- /* Useless bit manipulation performed by code. */
- if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
- return NULL;
-
- n->range *= BITS_PER_UNIT;
- return ins_stmt;
-}
-
-namespace {
-
-const pass_data pass_data_optimize_bswap =
-{
- GIMPLE_PASS, /* type */
- "bswap", /* name */
- OPTGROUP_NONE, /* optinfo_flags */
- TV_NONE, /* tv_id */
- PROP_ssa, /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- 0, /* todo_flags_finish */
-};
-
-class pass_optimize_bswap : public gimple_opt_pass
-{
-public:
- pass_optimize_bswap (gcc::context *ctxt)
- : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
- {}
-
- /* opt_pass methods: */
- virtual bool gate (function *)
- {
- return flag_expensive_optimizations && optimize;
- }
-
- virtual unsigned int execute (function *);
-
-}; // class pass_optimize_bswap
-
-/* Perform the bswap optimization: replace the expression computed in the rhs
- of CUR_STMT by an equivalent bswap, load or load + bswap expression.
- Which of these alternatives replace the rhs is given by N->base_addr (non
- null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
- load to perform are also given in N while the builtin bswap invoke is given
- in FNDEL. Finally, if a load is involved, SRC_STMT refers to one of the
- load statements involved to construct the rhs in CUR_STMT and N->range gives
- the size of the rhs expression for maintaining some statistics.
-
- Note that if the replacement involve a load, CUR_STMT is moved just after
- SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
- changing of basic block. */
-
-static bool
-bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
- tree bswap_type, tree load_type, struct symbolic_number *n,
- bool bswap)
-{
- gimple_stmt_iterator gsi;
- tree src, tmp, tgt;
- gimple *bswap_stmt;
-
- gsi = gsi_for_stmt (cur_stmt);
- src = n->src;
- tgt = gimple_assign_lhs (cur_stmt);
-
- /* Need to load the value from memory first. */
- if (n->base_addr)
- {
- gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt);
- tree addr_expr, addr_tmp, val_expr, val_tmp;
- tree load_offset_ptr, aligned_load_type;
- gimple *addr_stmt, *load_stmt;
- unsigned align;
- HOST_WIDE_INT load_offset = 0;
- basic_block ins_bb, cur_bb;
-
- ins_bb = gimple_bb (ins_stmt);
- cur_bb = gimple_bb (cur_stmt);
- if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
- return false;
-
- align = get_object_alignment (src);
-
- /* Move cur_stmt just before one of the load of the original
- to ensure it has the same VUSE. See PR61517 for what could
- go wrong. */
- if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
- reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
- gsi_move_before (&gsi, &gsi_ins);
- gsi = gsi_for_stmt (cur_stmt);
-
- /* Compute address to load from and cast according to the size
- of the load. */
- addr_expr = build_fold_addr_expr (unshare_expr (src));
- if (is_gimple_mem_ref_addr (addr_expr))
- addr_tmp = addr_expr;
- else
- {
- addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
- "load_src");
- addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
- gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
- }
-
- /* Perform the load. */
- aligned_load_type = load_type;
- if (align < TYPE_ALIGN (load_type))
- aligned_load_type = build_aligned_type (load_type, align);
- load_offset_ptr = build_int_cst (n->alias_set, load_offset);
- val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
- load_offset_ptr);
-
- if (!bswap)
- {
- if (n->range == 16)
- nop_stats.found_16bit++;
- else if (n->range == 32)
- nop_stats.found_32bit++;
- else
- {
- gcc_assert (n->range == 64);
- nop_stats.found_64bit++;
- }
-
- /* Convert the result of load if necessary. */
- if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
- {
- val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
- "load_dst");
- load_stmt = gimple_build_assign (val_tmp, val_expr);
- gimple_set_vuse (load_stmt, n->vuse);
- gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
- gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
- }
- else
- {
- gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
- gimple_set_vuse (cur_stmt, n->vuse);
- }
- update_stmt (cur_stmt);
-
- if (dump_file)
- {
- fprintf (dump_file,
- "%d bit load in target endianness found at: ",
- (int) n->range);
- print_gimple_stmt (dump_file, cur_stmt, 0);
- }
- return true;
- }
- else
- {
- val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
- load_stmt = gimple_build_assign (val_tmp, val_expr);
- gimple_set_vuse (load_stmt, n->vuse);
- gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
- }
- src = val_tmp;
- }
- else if (!bswap)
- {
- gimple *g;
- if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
- {
- if (!is_gimple_val (src))
- return false;
- g = gimple_build_assign (tgt, NOP_EXPR, src);
- }
- else
- g = gimple_build_assign (tgt, src);
- if (n->range == 16)
- nop_stats.found_16bit++;
- else if (n->range == 32)
- nop_stats.found_32bit++;
- else
- {
- gcc_assert (n->range == 64);
- nop_stats.found_64bit++;
- }
- if (dump_file)
- {
- fprintf (dump_file,
- "%d bit reshuffle in target endianness found at: ",
- (int) n->range);
- print_gimple_stmt (dump_file, cur_stmt, 0);
- }
- gsi_replace (&gsi, g, true);
- return true;
- }
- else if (TREE_CODE (src) == BIT_FIELD_REF)
- src = TREE_OPERAND (src, 0);
-
- if (n->range == 16)
- bswap_stats.found_16bit++;
- else if (n->range == 32)
- bswap_stats.found_32bit++;
- else
- {
- gcc_assert (n->range == 64);
- bswap_stats.found_64bit++;
- }
-
- tmp = src;
-
- /* Convert the src expression if necessary. */
- if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
- {
- gimple *convert_stmt;
-
- tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
- convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
- gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
- }
-
- /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
- are considered as rotation of 2N bit values by N bits is generally not
- equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
- gives 0x03040102 while a bswap for that value is 0x04030201. */
- if (bswap && n->range == 16)
- {
- tree count = build_int_cst (NULL, BITS_PER_UNIT);
- src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
- bswap_stmt = gimple_build_assign (NULL, src);
- }
- else
- bswap_stmt = gimple_build_call (fndecl, 1, tmp);
-
- tmp = tgt;
-
- /* Convert the result if necessary. */
- if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
- {
- gimple *convert_stmt;
-
- tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
- convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
- gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
- }
-
- gimple_set_lhs (bswap_stmt, tmp);
-
- if (dump_file)
- {
- fprintf (dump_file, "%d bit bswap implementation found at: ",
- (int) n->range);
- print_gimple_stmt (dump_file, cur_stmt, 0);
- }
-
- gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
- gsi_remove (&gsi, true);
- return true;
-}
-
-/* Find manual byte swap implementations as well as load in a given
- endianness. Byte swaps are turned into a bswap builtin invokation
- while endian loads are converted to bswap builtin invokation or
- simple load according to the target endianness. */
-
-unsigned int
-pass_optimize_bswap::execute (function *fun)
-{
- basic_block bb;
- bool bswap32_p, bswap64_p;
- bool changed = false;
- tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
-
- if (BITS_PER_UNIT != 8)
- return 0;
-
- bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
- && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
- bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
- && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
- || (bswap32_p && word_mode == SImode)));
-
- /* Determine the argument type of the builtins. The code later on
- assumes that the return and argument type are the same. */
- if (bswap32_p)
- {
- tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
- bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
- }
-
- if (bswap64_p)
- {
- tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
- bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
- }
-
- memset (&nop_stats, 0, sizeof (nop_stats));
- memset (&bswap_stats, 0, sizeof (bswap_stats));
- calculate_dominance_info (CDI_DOMINATORS);
-
- FOR_EACH_BB_FN (bb, fun)
- {
- gimple_stmt_iterator gsi;
-
- /* We do a reverse scan for bswap patterns to make sure we get the
- widest match. As bswap pattern matching doesn't handle previously
- inserted smaller bswap replacements as sub-patterns, the wider
- variant wouldn't be detected. */
- for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
- {
- gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
- tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
- enum tree_code code;
- struct symbolic_number n;
- bool bswap;
-
- /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
- might be moved to a different basic block by bswap_replace and gsi
- must not points to it if that's the case. Moving the gsi_prev
- there make sure that gsi points to the statement previous to
- cur_stmt while still making sure that all statements are
- considered in this basic block. */
- gsi_prev (&gsi);
-
- if (!is_gimple_assign (cur_stmt))
- continue;
-
- code = gimple_assign_rhs_code (cur_stmt);
- switch (code)
- {
- case LROTATE_EXPR:
- case RROTATE_EXPR:
- if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
- || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
- % BITS_PER_UNIT)
- continue;
- /* Fall through. */
- case BIT_IOR_EXPR:
- break;
- default:
- continue;
- }
-
- ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
-
- if (!ins_stmt)
- continue;
-
- switch (n.range)
- {
- case 16:
- /* Already in canonical form, nothing to do. */
- if (code == LROTATE_EXPR || code == RROTATE_EXPR)
- continue;
- load_type = bswap_type = uint16_type_node;
- break;
- case 32:
- load_type = uint32_type_node;
- if (bswap32_p)
- {
- fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
- bswap_type = bswap32_type;
- }
- break;
- case 64:
- load_type = uint64_type_node;
- if (bswap64_p)
- {
- fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
- bswap_type = bswap64_type;
- }
- break;
- default:
- continue;
- }
-
- if (bswap && !fndecl && n.range != 16)
- continue;
-
- if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type,
- &n, bswap))
- changed = true;
- }
- }
-
- statistics_counter_event (fun, "16-bit nop implementations found",
- nop_stats.found_16bit);
- statistics_counter_event (fun, "32-bit nop implementations found",
- nop_stats.found_32bit);
- statistics_counter_event (fun, "64-bit nop implementations found",
- nop_stats.found_64bit);
- statistics_counter_event (fun, "16-bit bswap implementations found",
- bswap_stats.found_16bit);
- statistics_counter_event (fun, "32-bit bswap implementations found",
- bswap_stats.found_32bit);
- statistics_counter_event (fun, "64-bit bswap implementations found",
- bswap_stats.found_64bit);
-
- return (changed ? TODO_update_ssa : 0);
-}
-
-} // anon namespace
-
-gimple_opt_pass *
-make_pass_optimize_bswap (gcc::context *ctxt)
-{
- return new pass_optimize_bswap (ctxt);
-}
-
/* Return true if stmt is a type conversion operation that can be stripped
when used in a widening multiply operation. */
static bool