From dffec8ebdb449be77bf02fe0cf59237362be991a Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Mon, 20 Nov 2017 11:08:48 +0100 Subject: [PATCH] tree-ssa-math-opts.c (nop_stats, [...]): Moved to ... * tree-ssa-math-opts.c (nop_stats, bswap_stats, struct symbolic_number, BITS_PER_MARKER, MARKER_MASK, MARKER_BYTE_UNKNOWN, HEAD_MARKER, CMPNOP, CMPXCHG, do_shift_rotate, verify_symbolic_number_p, init_symbolic_number, find_bswap_or_nop_load, perform_symbolic_merge, find_bswap_or_nop_1, find_bswap_or_nop, pass_data_optimize_bswap, class pass_optimize_bswap, bswap_replace, pass_optimize_bswap::execute): Moved to ... * gimple-ssa-store-merging.c: ... this file. Include optabs-tree.h. (nop_stats, bswap_stats, do_shift_rotate, verify_symbolic_number_p, init_symbolic_number, find_bswap_or_nop_load, perform_symbolic_merge, find_bswap_or_nop_1, find_bswap_or_nop, bswap_replace): Put into anonymous namespace, remove static keywords. (pass_optimize_bswap::gate): Test BITS_PER_UNIT == 8 here... (pass_optimize_bswap::execute): ... rather than here. Formatting fix. From-SVN: r254947 --- gcc/ChangeLog | 18 + gcc/gimple-ssa-store-merging.c | 1082 +++++++++++++++++++++++++++++++- gcc/tree-ssa-math-opts.c | 1076 ------------------------------- 3 files changed, 1096 insertions(+), 1080 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3cdba2e321c..e6ffa2640f6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2017-11-20 Jakub Jelinek + + * tree-ssa-math-opts.c (nop_stats, bswap_stats, struct symbolic_number, + BITS_PER_MARKER, MARKER_MASK, MARKER_BYTE_UNKNOWN, HEAD_MARKER, CMPNOP, + CMPXCHG, do_shift_rotate, verify_symbolic_number_p, + init_symbolic_number, find_bswap_or_nop_load, perform_symbolic_merge, + find_bswap_or_nop_1, find_bswap_or_nop, pass_data_optimize_bswap, + class pass_optimize_bswap, bswap_replace, + pass_optimize_bswap::execute): Moved to ... + * gimple-ssa-store-merging.c: ... this file. + Include optabs-tree.h. + (nop_stats, bswap_stats, do_shift_rotate, verify_symbolic_number_p, + init_symbolic_number, find_bswap_or_nop_load, perform_symbolic_merge, + find_bswap_or_nop_1, find_bswap_or_nop, bswap_replace): Put into + anonymous namespace, remove static keywords. + (pass_optimize_bswap::gate): Test BITS_PER_UNIT == 8 here... + (pass_optimize_bswap::execute): ... rather than here. Formatting fix. + 2017-11-20 Jan Hubicka PR bootstrap/83062 diff --git a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c index b8920d92b1d..b84f892e881 100644 --- a/gcc/gimple-ssa-store-merging.c +++ b/gcc/gimple-ssa-store-merging.c @@ -1,5 +1,5 @@ -/* GIMPLE store merging pass. - Copyright (C) 2016-2017 Free Software Foundation, Inc. +/* GIMPLE store merging and byte swapping passes. + Copyright (C) 2009-2017 Free Software Foundation, Inc. Contributed by ARM Ltd. This file is part of GCC. @@ -18,8 +18,8 @@ along with GCC; see the file COPYING3. If not see . */ -/* The purpose of this pass is to combine multiple memory stores of - constant values, values loaded from memory or bitwise operations +/* The purpose of the store merging pass is to combine multiple memory + stores of constant values, values loaded from memory or bitwise operations on those to consecutive memory locations into fewer wider stores. For example, if we have a sequence peforming four byte stores to consecutive memory locations: @@ -157,6 +157,7 @@ #include "gimplify-me.h" #include "rtl.h" #include "expr.h" /* For get_bit_range. */ +#include "optabs-tree.h" #include "selftest.h" /* The maximum size (in bits) of the stores this pass should generate. */ @@ -169,6 +170,1079 @@ namespace { +struct +{ + /* Number of hand-written 16-bit nop / bswaps found. */ + int found_16bit; + + /* Number of hand-written 32-bit nop / bswaps found. */ + int found_32bit; + + /* Number of hand-written 64-bit nop / bswaps found. */ + int found_64bit; +} nop_stats, bswap_stats; + +/* A symbolic number structure is used to detect byte permutation and selection + patterns of a source. To achieve that, its field N contains an artificial + number consisting of BITS_PER_MARKER sized markers tracking where does each + byte come from in the source: + + 0 - target byte has the value 0 + FF - target byte has an unknown value (eg. due to sign extension) + 1..size - marker value is the byte index in the source (0 for lsb). + + To detect permutations on memory sources (arrays and structures), a symbolic + number is also associated: + - a base address BASE_ADDR and an OFFSET giving the address of the source; + - a range which gives the difference between the highest and lowest accessed + memory location to make such a symbolic number; + - the address SRC of the source element of lowest address as a convenience + to easily get BASE_ADDR + offset + lowest bytepos; + - number of expressions N_OPS bitwise ored together to represent + approximate cost of the computation. + + Note 1: the range is different from size as size reflects the size of the + type of the current expression. For instance, for an array char a[], + (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while + (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this + time a range of 1. + + Note 2: for non-memory sources, range holds the same value as size. + + Note 3: SRC points to the SSA_NAME in case of non-memory source. */ + +struct symbolic_number { + uint64_t n; + tree type; + tree base_addr; + tree offset; + HOST_WIDE_INT bytepos; + tree src; + tree alias_set; + tree vuse; + unsigned HOST_WIDE_INT range; + int n_ops; +}; + +#define BITS_PER_MARKER 8 +#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1) +#define MARKER_BYTE_UNKNOWN MARKER_MASK +#define HEAD_MARKER(n, size) \ + ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER))) + +/* The number which the find_bswap_or_nop_1 result should match in + order to have a nop. The number is masked according to the size of + the symbolic number before using it. */ +#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \ + (uint64_t)0x08070605 << 32 | 0x04030201) + +/* The number which the find_bswap_or_nop_1 result should match in + order to have a byte swap. The number is masked according to the + size of the symbolic number before using it. */ +#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \ + (uint64_t)0x01020304 << 32 | 0x05060708) + +/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic + number N. Return false if the requested operation is not permitted + on a symbolic number. */ + +inline bool +do_shift_rotate (enum tree_code code, + struct symbolic_number *n, + int count) +{ + int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; + unsigned head_marker; + + if (count % BITS_PER_UNIT != 0) + return false; + count = (count / BITS_PER_UNIT) * BITS_PER_MARKER; + + /* Zero out the extra bits of N in order to avoid them being shifted + into the significant bits. */ + if (size < 64 / BITS_PER_MARKER) + n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; + + switch (code) + { + case LSHIFT_EXPR: + n->n <<= count; + break; + case RSHIFT_EXPR: + head_marker = HEAD_MARKER (n->n, size); + n->n >>= count; + /* Arithmetic shift of signed type: result is dependent on the value. */ + if (!TYPE_UNSIGNED (n->type) && head_marker) + for (i = 0; i < count / BITS_PER_MARKER; i++) + n->n |= (uint64_t) MARKER_BYTE_UNKNOWN + << ((size - 1 - i) * BITS_PER_MARKER); + break; + case LROTATE_EXPR: + n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count)); + break; + case RROTATE_EXPR: + n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count)); + break; + default: + return false; + } + /* Zero unused bits for size. */ + if (size < 64 / BITS_PER_MARKER) + n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; + return true; +} + +/* Perform sanity checking for the symbolic number N and the gimple + statement STMT. */ + +inline bool +verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt) +{ + tree lhs_type; + + lhs_type = gimple_expr_type (stmt); + + if (TREE_CODE (lhs_type) != INTEGER_TYPE) + return false; + + if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type)) + return false; + + return true; +} + +/* Initialize the symbolic number N for the bswap pass from the base element + SRC manipulated by the bitwise OR expression. */ + +bool +init_symbolic_number (struct symbolic_number *n, tree src) +{ + int size; + + if (! INTEGRAL_TYPE_P (TREE_TYPE (src))) + return false; + + n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE; + n->src = src; + + /* Set up the symbolic number N by setting each byte to a value between 1 and + the byte size of rhs1. The highest order byte is set to n->size and the + lowest order byte to 1. */ + n->type = TREE_TYPE (src); + size = TYPE_PRECISION (n->type); + if (size % BITS_PER_UNIT != 0) + return false; + size /= BITS_PER_UNIT; + if (size > 64 / BITS_PER_MARKER) + return false; + n->range = size; + n->n = CMPNOP; + n->n_ops = 1; + + if (size < 64 / BITS_PER_MARKER) + n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; + + return true; +} + +/* Check if STMT might be a byte swap or a nop from a memory source and returns + the answer. If so, REF is that memory source and the base of the memory area + accessed and the offset of the access from that base are recorded in N. */ + +bool +find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n) +{ + /* Leaf node is an array or component ref. Memorize its base and + offset from base to compare to other such leaf node. */ + HOST_WIDE_INT bitsize, bitpos; + machine_mode mode; + int unsignedp, reversep, volatilep; + tree offset, base_addr; + + /* Not prepared to handle PDP endian. */ + if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) + return false; + + if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt)) + return false; + + base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode, + &unsignedp, &reversep, &volatilep); + + if (TREE_CODE (base_addr) == MEM_REF) + { + offset_int bit_offset = 0; + tree off = TREE_OPERAND (base_addr, 1); + + if (!integer_zerop (off)) + { + offset_int boff, coff = mem_ref_offset (base_addr); + boff = coff << LOG2_BITS_PER_UNIT; + bit_offset += boff; + } + + base_addr = TREE_OPERAND (base_addr, 0); + + /* Avoid returning a negative bitpos as this may wreak havoc later. */ + if (wi::neg_p (bit_offset)) + { + offset_int mask = wi::mask (LOG2_BITS_PER_UNIT, false); + offset_int tem = wi::bit_and_not (bit_offset, mask); + /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf. + Subtract it to BIT_OFFSET and add it (scaled) to OFFSET. */ + bit_offset -= tem; + tem >>= LOG2_BITS_PER_UNIT; + if (offset) + offset = size_binop (PLUS_EXPR, offset, + wide_int_to_tree (sizetype, tem)); + else + offset = wide_int_to_tree (sizetype, tem); + } + + bitpos += bit_offset.to_shwi (); + } + + if (bitpos % BITS_PER_UNIT) + return false; + if (bitsize % BITS_PER_UNIT) + return false; + if (reversep) + return false; + + if (!init_symbolic_number (n, ref)) + return false; + n->base_addr = base_addr; + n->offset = offset; + n->bytepos = bitpos / BITS_PER_UNIT; + n->alias_set = reference_alias_ptr_type (ref); + n->vuse = gimple_vuse (stmt); + return true; +} + +/* Compute the symbolic number N representing the result of a bitwise OR on 2 + symbolic number N1 and N2 whose source statements are respectively + SOURCE_STMT1 and SOURCE_STMT2. */ + +gimple * +perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1, + gimple *source_stmt2, struct symbolic_number *n2, + struct symbolic_number *n) +{ + int i, size; + uint64_t mask; + gimple *source_stmt; + struct symbolic_number *n_start; + + tree rhs1 = gimple_assign_rhs1 (source_stmt1); + if (TREE_CODE (rhs1) == BIT_FIELD_REF + && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME) + rhs1 = TREE_OPERAND (rhs1, 0); + tree rhs2 = gimple_assign_rhs1 (source_stmt2); + if (TREE_CODE (rhs2) == BIT_FIELD_REF + && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME) + rhs2 = TREE_OPERAND (rhs2, 0); + + /* Sources are different, cancel bswap if they are not memory location with + the same base (array, structure, ...). */ + if (rhs1 != rhs2) + { + uint64_t inc; + HOST_WIDE_INT start_sub, end_sub, end1, end2, end; + struct symbolic_number *toinc_n_ptr, *n_end; + basic_block bb1, bb2; + + if (!n1->base_addr || !n2->base_addr + || !operand_equal_p (n1->base_addr, n2->base_addr, 0)) + return NULL; + + if (!n1->offset != !n2->offset + || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0))) + return NULL; + + if (n1->bytepos < n2->bytepos) + { + n_start = n1; + start_sub = n2->bytepos - n1->bytepos; + } + else + { + n_start = n2; + start_sub = n1->bytepos - n2->bytepos; + } + + bb1 = gimple_bb (source_stmt1); + bb2 = gimple_bb (source_stmt2); + if (dominated_by_p (CDI_DOMINATORS, bb1, bb2)) + source_stmt = source_stmt1; + else + source_stmt = source_stmt2; + + /* Find the highest address at which a load is performed and + compute related info. */ + end1 = n1->bytepos + (n1->range - 1); + end2 = n2->bytepos + (n2->range - 1); + if (end1 < end2) + { + end = end2; + end_sub = end2 - end1; + } + else + { + end = end1; + end_sub = end1 - end2; + } + n_end = (end2 > end1) ? n2 : n1; + + /* Find symbolic number whose lsb is the most significant. */ + if (BYTES_BIG_ENDIAN) + toinc_n_ptr = (n_end == n1) ? n2 : n1; + else + toinc_n_ptr = (n_start == n1) ? n2 : n1; + + n->range = end - n_start->bytepos + 1; + + /* Check that the range of memory covered can be represented by + a symbolic number. */ + if (n->range > 64 / BITS_PER_MARKER) + return NULL; + + /* Reinterpret byte marks in symbolic number holding the value of + bigger weight according to target endianness. */ + inc = BYTES_BIG_ENDIAN ? end_sub : start_sub; + size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT; + for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER) + { + unsigned marker + = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK; + if (marker && marker != MARKER_BYTE_UNKNOWN) + toinc_n_ptr->n += inc; + } + } + else + { + n->range = n1->range; + n_start = n1; + source_stmt = source_stmt1; + } + + if (!n1->alias_set + || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set)) + n->alias_set = n1->alias_set; + else + n->alias_set = ptr_type_node; + n->vuse = n_start->vuse; + n->base_addr = n_start->base_addr; + n->offset = n_start->offset; + n->src = n_start->src; + n->bytepos = n_start->bytepos; + n->type = n_start->type; + size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; + + for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER) + { + uint64_t masked1, masked2; + + masked1 = n1->n & mask; + masked2 = n2->n & mask; + if (masked1 && masked2 && masked1 != masked2) + return NULL; + } + n->n = n1->n | n2->n; + n->n_ops = n1->n_ops + n2->n_ops; + + return source_stmt; +} + +/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform + the operation given by the rhs of STMT on the result. If the operation + could successfully be executed the function returns a gimple stmt whose + rhs's first tree is the expression of the source operand and NULL + otherwise. */ + +gimple * +find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit) +{ + enum tree_code code; + tree rhs1, rhs2 = NULL; + gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1; + enum gimple_rhs_class rhs_class; + + if (!limit || !is_gimple_assign (stmt)) + return NULL; + + rhs1 = gimple_assign_rhs1 (stmt); + + if (find_bswap_or_nop_load (stmt, rhs1, n)) + return stmt; + + /* Handle BIT_FIELD_REF. */ + if (TREE_CODE (rhs1) == BIT_FIELD_REF + && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME) + { + unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1)); + unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2)); + if (bitpos % BITS_PER_UNIT == 0 + && bitsize % BITS_PER_UNIT == 0 + && init_symbolic_number (n, TREE_OPERAND (rhs1, 0))) + { + /* Handle big-endian bit numbering in BIT_FIELD_REF. */ + if (BYTES_BIG_ENDIAN) + bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize; + + /* Shift. */ + if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos)) + return NULL; + + /* Mask. */ + uint64_t mask = 0; + uint64_t tmp = (1 << BITS_PER_UNIT) - 1; + for (unsigned i = 0; i < bitsize / BITS_PER_UNIT; + i++, tmp <<= BITS_PER_UNIT) + mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER); + n->n &= mask; + + /* Convert. */ + n->type = TREE_TYPE (rhs1); + if (!n->base_addr) + n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT; + + return verify_symbolic_number_p (n, stmt) ? stmt : NULL; + } + + return NULL; + } + + if (TREE_CODE (rhs1) != SSA_NAME) + return NULL; + + code = gimple_assign_rhs_code (stmt); + rhs_class = gimple_assign_rhs_class (stmt); + rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); + + if (rhs_class == GIMPLE_BINARY_RHS) + rhs2 = gimple_assign_rhs2 (stmt); + + /* Handle unary rhs and binary rhs with integer constants as second + operand. */ + + if (rhs_class == GIMPLE_UNARY_RHS + || (rhs_class == GIMPLE_BINARY_RHS + && TREE_CODE (rhs2) == INTEGER_CST)) + { + if (code != BIT_AND_EXPR + && code != LSHIFT_EXPR + && code != RSHIFT_EXPR + && code != LROTATE_EXPR + && code != RROTATE_EXPR + && !CONVERT_EXPR_CODE_P (code)) + return NULL; + + source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1); + + /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and + we have to initialize the symbolic number. */ + if (!source_stmt1) + { + if (gimple_assign_load_p (stmt) + || !init_symbolic_number (n, rhs1)) + return NULL; + source_stmt1 = stmt; + } + + switch (code) + { + case BIT_AND_EXPR: + { + int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; + uint64_t val = int_cst_value (rhs2), mask = 0; + uint64_t tmp = (1 << BITS_PER_UNIT) - 1; + + /* Only constants masking full bytes are allowed. */ + for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT) + if ((val & tmp) != 0 && (val & tmp) != tmp) + return NULL; + else if (val & tmp) + mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER); + + n->n &= mask; + } + break; + case LSHIFT_EXPR: + case RSHIFT_EXPR: + case LROTATE_EXPR: + case RROTATE_EXPR: + if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2))) + return NULL; + break; + CASE_CONVERT: + { + int i, type_size, old_type_size; + tree type; + + type = gimple_expr_type (stmt); + type_size = TYPE_PRECISION (type); + if (type_size % BITS_PER_UNIT != 0) + return NULL; + type_size /= BITS_PER_UNIT; + if (type_size > 64 / BITS_PER_MARKER) + return NULL; + + /* Sign extension: result is dependent on the value. */ + old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; + if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size + && HEAD_MARKER (n->n, old_type_size)) + for (i = 0; i < type_size - old_type_size; i++) + n->n |= (uint64_t) MARKER_BYTE_UNKNOWN + << ((type_size - 1 - i) * BITS_PER_MARKER); + + if (type_size < 64 / BITS_PER_MARKER) + { + /* If STMT casts to a smaller type mask out the bits not + belonging to the target type. */ + n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1; + } + n->type = type; + if (!n->base_addr) + n->range = type_size; + } + break; + default: + return NULL; + }; + return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL; + } + + /* Handle binary rhs. */ + + if (rhs_class == GIMPLE_BINARY_RHS) + { + struct symbolic_number n1, n2; + gimple *source_stmt, *source_stmt2; + + if (code != BIT_IOR_EXPR) + return NULL; + + if (TREE_CODE (rhs2) != SSA_NAME) + return NULL; + + rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); + + switch (code) + { + case BIT_IOR_EXPR: + source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1); + + if (!source_stmt1) + return NULL; + + source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1); + + if (!source_stmt2) + return NULL; + + if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type)) + return NULL; + + if (!n1.vuse != !n2.vuse + || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0))) + return NULL; + + source_stmt + = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n); + + if (!source_stmt) + return NULL; + + if (!verify_symbolic_number_p (n, stmt)) + return NULL; + + break; + default: + return NULL; + } + return source_stmt; + } + return NULL; +} + +/* Check if STMT completes a bswap implementation or a read in a given + endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP + accordingly. It also sets N to represent the kind of operations + performed: size of the resulting expression and whether it works on + a memory source, and if so alias-set and vuse. At last, the + function returns a stmt whose rhs's first tree is the source + expression. */ + +gimple * +find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap) +{ + unsigned rsize; + uint64_t tmpn, mask; +/* The number which the find_bswap_or_nop_1 result should match in order + to have a full byte swap. The number is shifted to the right + according to the size of the symbolic number before using it. */ + uint64_t cmpxchg = CMPXCHG; + uint64_t cmpnop = CMPNOP; + + gimple *ins_stmt; + int limit; + + /* The last parameter determines the depth search limit. It usually + correlates directly to the number n of bytes to be touched. We + increase that number by log2(n) + 1 here in order to also + cover signed -> unsigned conversions of the src operand as can be seen + in libgcc, and for initial shift/and operation of the src operand. */ + limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt))); + limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit); + ins_stmt = find_bswap_or_nop_1 (stmt, n, limit); + + if (!ins_stmt) + return NULL; + + /* Find real size of result (highest non-zero byte). */ + if (n->base_addr) + for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++); + else + rsize = n->range; + + /* Zero out the bits corresponding to untouched bytes in original gimple + expression. */ + if (n->range < (int) sizeof (int64_t)) + { + mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1; + cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER; + cmpnop &= mask; + } + + /* Zero out the bits corresponding to unused bytes in the result of the + gimple expression. */ + if (rsize < n->range) + { + if (BYTES_BIG_ENDIAN) + { + mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1; + cmpxchg &= mask; + cmpnop >>= (n->range - rsize) * BITS_PER_MARKER; + } + else + { + mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1; + cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER; + cmpnop &= mask; + } + n->range = rsize; + } + + /* A complete byte swap should make the symbolic number to start with + the largest digit in the highest order byte. Unchanged symbolic + number indicates a read with same endianness as target architecture. */ + if (n->n == cmpnop) + *bswap = false; + else if (n->n == cmpxchg) + *bswap = true; + else + return NULL; + + /* Useless bit manipulation performed by code. */ + if (!n->base_addr && n->n == cmpnop && n->n_ops == 1) + return NULL; + + n->range *= BITS_PER_UNIT; + return ins_stmt; +} + +const pass_data pass_data_optimize_bswap = +{ + GIMPLE_PASS, /* type */ + "bswap", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_optimize_bswap : public gimple_opt_pass +{ +public: + pass_optimize_bswap (gcc::context *ctxt) + : gimple_opt_pass (pass_data_optimize_bswap, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8; + } + + virtual unsigned int execute (function *); + +}; // class pass_optimize_bswap + +/* Perform the bswap optimization: replace the expression computed in the rhs + of CUR_STMT by an equivalent bswap, load or load + bswap expression. + Which of these alternatives replace the rhs is given by N->base_addr (non + null if a load is needed) and BSWAP. The type, VUSE and set-alias of the + load to perform are also given in N while the builtin bswap invoke is given + in FNDEL. Finally, if a load is involved, SRC_STMT refers to one of the + load statements involved to construct the rhs in CUR_STMT and N->range gives + the size of the rhs expression for maintaining some statistics. + + Note that if the replacement involve a load, CUR_STMT is moved just after + SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT + changing of basic block. */ + +bool +bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl, + tree bswap_type, tree load_type, struct symbolic_number *n, + bool bswap) +{ + gimple_stmt_iterator gsi; + tree src, tmp, tgt; + gimple *bswap_stmt; + + gsi = gsi_for_stmt (cur_stmt); + src = n->src; + tgt = gimple_assign_lhs (cur_stmt); + + /* Need to load the value from memory first. */ + if (n->base_addr) + { + gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt); + tree addr_expr, addr_tmp, val_expr, val_tmp; + tree load_offset_ptr, aligned_load_type; + gimple *addr_stmt, *load_stmt; + unsigned align; + HOST_WIDE_INT load_offset = 0; + basic_block ins_bb, cur_bb; + + ins_bb = gimple_bb (ins_stmt); + cur_bb = gimple_bb (cur_stmt); + if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb)) + return false; + + align = get_object_alignment (src); + + /* Move cur_stmt just before one of the load of the original + to ensure it has the same VUSE. See PR61517 for what could + go wrong. */ + if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt)) + reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt)); + gsi_move_before (&gsi, &gsi_ins); + gsi = gsi_for_stmt (cur_stmt); + + /* Compute address to load from and cast according to the size + of the load. */ + addr_expr = build_fold_addr_expr (unshare_expr (src)); + if (is_gimple_mem_ref_addr (addr_expr)) + addr_tmp = addr_expr; + else + { + addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL, + "load_src"); + addr_stmt = gimple_build_assign (addr_tmp, addr_expr); + gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT); + } + + /* Perform the load. */ + aligned_load_type = load_type; + if (align < TYPE_ALIGN (load_type)) + aligned_load_type = build_aligned_type (load_type, align); + load_offset_ptr = build_int_cst (n->alias_set, load_offset); + val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp, + load_offset_ptr); + + if (!bswap) + { + if (n->range == 16) + nop_stats.found_16bit++; + else if (n->range == 32) + nop_stats.found_32bit++; + else + { + gcc_assert (n->range == 64); + nop_stats.found_64bit++; + } + + /* Convert the result of load if necessary. */ + if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type)) + { + val_tmp = make_temp_ssa_name (aligned_load_type, NULL, + "load_dst"); + load_stmt = gimple_build_assign (val_tmp, val_expr); + gimple_set_vuse (load_stmt, n->vuse); + gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT); + gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp); + } + else + { + gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr); + gimple_set_vuse (cur_stmt, n->vuse); + } + update_stmt (cur_stmt); + + if (dump_file) + { + fprintf (dump_file, + "%d bit load in target endianness found at: ", + (int) n->range); + print_gimple_stmt (dump_file, cur_stmt, 0); + } + return true; + } + else + { + val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst"); + load_stmt = gimple_build_assign (val_tmp, val_expr); + gimple_set_vuse (load_stmt, n->vuse); + gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT); + } + src = val_tmp; + } + else if (!bswap) + { + gimple *g; + if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src))) + { + if (!is_gimple_val (src)) + return false; + g = gimple_build_assign (tgt, NOP_EXPR, src); + } + else + g = gimple_build_assign (tgt, src); + if (n->range == 16) + nop_stats.found_16bit++; + else if (n->range == 32) + nop_stats.found_32bit++; + else + { + gcc_assert (n->range == 64); + nop_stats.found_64bit++; + } + if (dump_file) + { + fprintf (dump_file, + "%d bit reshuffle in target endianness found at: ", + (int) n->range); + print_gimple_stmt (dump_file, cur_stmt, 0); + } + gsi_replace (&gsi, g, true); + return true; + } + else if (TREE_CODE (src) == BIT_FIELD_REF) + src = TREE_OPERAND (src, 0); + + if (n->range == 16) + bswap_stats.found_16bit++; + else if (n->range == 32) + bswap_stats.found_32bit++; + else + { + gcc_assert (n->range == 64); + bswap_stats.found_64bit++; + } + + tmp = src; + + /* Convert the src expression if necessary. */ + if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type)) + { + gimple *convert_stmt; + + tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc"); + convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src); + gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT); + } + + /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values + are considered as rotation of 2N bit values by N bits is generally not + equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which + gives 0x03040102 while a bswap for that value is 0x04030201. */ + if (bswap && n->range == 16) + { + tree count = build_int_cst (NULL, BITS_PER_UNIT); + src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count); + bswap_stmt = gimple_build_assign (NULL, src); + } + else + bswap_stmt = gimple_build_call (fndecl, 1, tmp); + + tmp = tgt; + + /* Convert the result if necessary. */ + if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type)) + { + gimple *convert_stmt; + + tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst"); + convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp); + gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT); + } + + gimple_set_lhs (bswap_stmt, tmp); + + if (dump_file) + { + fprintf (dump_file, "%d bit bswap implementation found at: ", + (int) n->range); + print_gimple_stmt (dump_file, cur_stmt, 0); + } + + gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT); + gsi_remove (&gsi, true); + return true; +} + +/* Find manual byte swap implementations as well as load in a given + endianness. Byte swaps are turned into a bswap builtin invokation + while endian loads are converted to bswap builtin invokation or + simple load according to the target endianness. */ + +unsigned int +pass_optimize_bswap::execute (function *fun) +{ + basic_block bb; + bool bswap32_p, bswap64_p; + bool changed = false; + tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE; + + bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32) + && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing); + bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64) + && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing + || (bswap32_p && word_mode == SImode))); + + /* Determine the argument type of the builtins. The code later on + assumes that the return and argument type are the same. */ + if (bswap32_p) + { + tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32); + bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); + } + + if (bswap64_p) + { + tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64); + bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); + } + + memset (&nop_stats, 0, sizeof (nop_stats)); + memset (&bswap_stats, 0, sizeof (bswap_stats)); + calculate_dominance_info (CDI_DOMINATORS); + + FOR_EACH_BB_FN (bb, fun) + { + gimple_stmt_iterator gsi; + + /* We do a reverse scan for bswap patterns to make sure we get the + widest match. As bswap pattern matching doesn't handle previously + inserted smaller bswap replacements as sub-patterns, the wider + variant wouldn't be detected. */ + for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);) + { + gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi); + tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type; + enum tree_code code; + struct symbolic_number n; + bool bswap; + + /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt + might be moved to a different basic block by bswap_replace and gsi + must not points to it if that's the case. Moving the gsi_prev + there make sure that gsi points to the statement previous to + cur_stmt while still making sure that all statements are + considered in this basic block. */ + gsi_prev (&gsi); + + if (!is_gimple_assign (cur_stmt)) + continue; + + code = gimple_assign_rhs_code (cur_stmt); + switch (code) + { + case LROTATE_EXPR: + case RROTATE_EXPR: + if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt)) + || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt)) + % BITS_PER_UNIT) + continue; + /* Fall through. */ + case BIT_IOR_EXPR: + break; + default: + continue; + } + + ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap); + + if (!ins_stmt) + continue; + + switch (n.range) + { + case 16: + /* Already in canonical form, nothing to do. */ + if (code == LROTATE_EXPR || code == RROTATE_EXPR) + continue; + load_type = bswap_type = uint16_type_node; + break; + case 32: + load_type = uint32_type_node; + if (bswap32_p) + { + fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32); + bswap_type = bswap32_type; + } + break; + case 64: + load_type = uint64_type_node; + if (bswap64_p) + { + fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64); + bswap_type = bswap64_type; + } + break; + default: + continue; + } + + if (bswap && !fndecl && n.range != 16) + continue; + + if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type, + &n, bswap)) + changed = true; + } + } + + statistics_counter_event (fun, "16-bit nop implementations found", + nop_stats.found_16bit); + statistics_counter_event (fun, "32-bit nop implementations found", + nop_stats.found_32bit); + statistics_counter_event (fun, "64-bit nop implementations found", + nop_stats.found_64bit); + statistics_counter_event (fun, "16-bit bswap implementations found", + bswap_stats.found_16bit); + statistics_counter_event (fun, "32-bit bswap implementations found", + bswap_stats.found_32bit); + statistics_counter_event (fun, "64-bit bswap implementations found", + bswap_stats.found_64bit); + + return (changed ? TODO_update_ssa : 0); +} + +} // anon namespace + +gimple_opt_pass * +make_pass_optimize_bswap (gcc::context *ctxt) +{ + return new pass_optimize_bswap (ctxt); +} + +namespace { + /* Struct recording one operand for the store, which is either a constant, then VAL represents the constant and all the other fields are zero, or a memory load, then VAL represents the reference, BASE_ADDR is non-NULL diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index 5986ac1da38..07030439c7a 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -165,18 +165,6 @@ static struct int inserted; } sincos_stats; -static struct -{ - /* Number of hand-written 16-bit nop / bswaps found. */ - int found_16bit; - - /* Number of hand-written 32-bit nop / bswaps found. */ - int found_32bit; - - /* Number of hand-written 64-bit nop / bswaps found. */ - int found_64bit; -} nop_stats, bswap_stats; - static struct { /* Number of widening multiplication ops inserted. */ @@ -1934,1070 +1922,6 @@ make_pass_cse_sincos (gcc::context *ctxt) return new pass_cse_sincos (ctxt); } -/* A symbolic number structure is used to detect byte permutation and selection - patterns of a source. To achieve that, its field N contains an artificial - number consisting of BITS_PER_MARKER sized markers tracking where does each - byte come from in the source: - - 0 - target byte has the value 0 - FF - target byte has an unknown value (eg. due to sign extension) - 1..size - marker value is the byte index in the source (0 for lsb). - - To detect permutations on memory sources (arrays and structures), a symbolic - number is also associated: - - a base address BASE_ADDR and an OFFSET giving the address of the source; - - a range which gives the difference between the highest and lowest accessed - memory location to make such a symbolic number; - - the address SRC of the source element of lowest address as a convenience - to easily get BASE_ADDR + offset + lowest bytepos; - - number of expressions N_OPS bitwise ored together to represent - approximate cost of the computation. - - Note 1: the range is different from size as size reflects the size of the - type of the current expression. For instance, for an array char a[], - (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while - (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this - time a range of 1. - - Note 2: for non-memory sources, range holds the same value as size. - - Note 3: SRC points to the SSA_NAME in case of non-memory source. */ - -struct symbolic_number { - uint64_t n; - tree type; - tree base_addr; - tree offset; - HOST_WIDE_INT bytepos; - tree src; - tree alias_set; - tree vuse; - unsigned HOST_WIDE_INT range; - int n_ops; -}; - -#define BITS_PER_MARKER 8 -#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1) -#define MARKER_BYTE_UNKNOWN MARKER_MASK -#define HEAD_MARKER(n, size) \ - ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER))) - -/* The number which the find_bswap_or_nop_1 result should match in - order to have a nop. The number is masked according to the size of - the symbolic number before using it. */ -#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \ - (uint64_t)0x08070605 << 32 | 0x04030201) - -/* The number which the find_bswap_or_nop_1 result should match in - order to have a byte swap. The number is masked according to the - size of the symbolic number before using it. */ -#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \ - (uint64_t)0x01020304 << 32 | 0x05060708) - -/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic - number N. Return false if the requested operation is not permitted - on a symbolic number. */ - -static inline bool -do_shift_rotate (enum tree_code code, - struct symbolic_number *n, - int count) -{ - int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; - unsigned head_marker; - - if (count % BITS_PER_UNIT != 0) - return false; - count = (count / BITS_PER_UNIT) * BITS_PER_MARKER; - - /* Zero out the extra bits of N in order to avoid them being shifted - into the significant bits. */ - if (size < 64 / BITS_PER_MARKER) - n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; - - switch (code) - { - case LSHIFT_EXPR: - n->n <<= count; - break; - case RSHIFT_EXPR: - head_marker = HEAD_MARKER (n->n, size); - n->n >>= count; - /* Arithmetic shift of signed type: result is dependent on the value. */ - if (!TYPE_UNSIGNED (n->type) && head_marker) - for (i = 0; i < count / BITS_PER_MARKER; i++) - n->n |= (uint64_t) MARKER_BYTE_UNKNOWN - << ((size - 1 - i) * BITS_PER_MARKER); - break; - case LROTATE_EXPR: - n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count)); - break; - case RROTATE_EXPR: - n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count)); - break; - default: - return false; - } - /* Zero unused bits for size. */ - if (size < 64 / BITS_PER_MARKER) - n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; - return true; -} - -/* Perform sanity checking for the symbolic number N and the gimple - statement STMT. */ - -static inline bool -verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt) -{ - tree lhs_type; - - lhs_type = gimple_expr_type (stmt); - - if (TREE_CODE (lhs_type) != INTEGER_TYPE) - return false; - - if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type)) - return false; - - return true; -} - -/* Initialize the symbolic number N for the bswap pass from the base element - SRC manipulated by the bitwise OR expression. */ - -static bool -init_symbolic_number (struct symbolic_number *n, tree src) -{ - int size; - - if (! INTEGRAL_TYPE_P (TREE_TYPE (src))) - return false; - - n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE; - n->src = src; - - /* Set up the symbolic number N by setting each byte to a value between 1 and - the byte size of rhs1. The highest order byte is set to n->size and the - lowest order byte to 1. */ - n->type = TREE_TYPE (src); - size = TYPE_PRECISION (n->type); - if (size % BITS_PER_UNIT != 0) - return false; - size /= BITS_PER_UNIT; - if (size > 64 / BITS_PER_MARKER) - return false; - n->range = size; - n->n = CMPNOP; - n->n_ops = 1; - - if (size < 64 / BITS_PER_MARKER) - n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; - - return true; -} - -/* Check if STMT might be a byte swap or a nop from a memory source and returns - the answer. If so, REF is that memory source and the base of the memory area - accessed and the offset of the access from that base are recorded in N. */ - -bool -find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n) -{ - /* Leaf node is an array or component ref. Memorize its base and - offset from base to compare to other such leaf node. */ - HOST_WIDE_INT bitsize, bitpos; - machine_mode mode; - int unsignedp, reversep, volatilep; - tree offset, base_addr; - - /* Not prepared to handle PDP endian. */ - if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) - return false; - - if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt)) - return false; - - base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode, - &unsignedp, &reversep, &volatilep); - - if (TREE_CODE (base_addr) == MEM_REF) - { - offset_int bit_offset = 0; - tree off = TREE_OPERAND (base_addr, 1); - - if (!integer_zerop (off)) - { - offset_int boff, coff = mem_ref_offset (base_addr); - boff = coff << LOG2_BITS_PER_UNIT; - bit_offset += boff; - } - - base_addr = TREE_OPERAND (base_addr, 0); - - /* Avoid returning a negative bitpos as this may wreak havoc later. */ - if (wi::neg_p (bit_offset)) - { - offset_int mask = wi::mask (LOG2_BITS_PER_UNIT, false); - offset_int tem = wi::bit_and_not (bit_offset, mask); - /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf. - Subtract it to BIT_OFFSET and add it (scaled) to OFFSET. */ - bit_offset -= tem; - tem >>= LOG2_BITS_PER_UNIT; - if (offset) - offset = size_binop (PLUS_EXPR, offset, - wide_int_to_tree (sizetype, tem)); - else - offset = wide_int_to_tree (sizetype, tem); - } - - bitpos += bit_offset.to_shwi (); - } - - if (bitpos % BITS_PER_UNIT) - return false; - if (bitsize % BITS_PER_UNIT) - return false; - if (reversep) - return false; - - if (!init_symbolic_number (n, ref)) - return false; - n->base_addr = base_addr; - n->offset = offset; - n->bytepos = bitpos / BITS_PER_UNIT; - n->alias_set = reference_alias_ptr_type (ref); - n->vuse = gimple_vuse (stmt); - return true; -} - -/* Compute the symbolic number N representing the result of a bitwise OR on 2 - symbolic number N1 and N2 whose source statements are respectively - SOURCE_STMT1 and SOURCE_STMT2. */ - -static gimple * -perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1, - gimple *source_stmt2, struct symbolic_number *n2, - struct symbolic_number *n) -{ - int i, size; - uint64_t mask; - gimple *source_stmt; - struct symbolic_number *n_start; - - tree rhs1 = gimple_assign_rhs1 (source_stmt1); - if (TREE_CODE (rhs1) == BIT_FIELD_REF - && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME) - rhs1 = TREE_OPERAND (rhs1, 0); - tree rhs2 = gimple_assign_rhs1 (source_stmt2); - if (TREE_CODE (rhs2) == BIT_FIELD_REF - && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME) - rhs2 = TREE_OPERAND (rhs2, 0); - - /* Sources are different, cancel bswap if they are not memory location with - the same base (array, structure, ...). */ - if (rhs1 != rhs2) - { - uint64_t inc; - HOST_WIDE_INT start_sub, end_sub, end1, end2, end; - struct symbolic_number *toinc_n_ptr, *n_end; - basic_block bb1, bb2; - - if (!n1->base_addr || !n2->base_addr - || !operand_equal_p (n1->base_addr, n2->base_addr, 0)) - return NULL; - - if (!n1->offset != !n2->offset - || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0))) - return NULL; - - if (n1->bytepos < n2->bytepos) - { - n_start = n1; - start_sub = n2->bytepos - n1->bytepos; - } - else - { - n_start = n2; - start_sub = n1->bytepos - n2->bytepos; - } - - bb1 = gimple_bb (source_stmt1); - bb2 = gimple_bb (source_stmt2); - if (dominated_by_p (CDI_DOMINATORS, bb1, bb2)) - source_stmt = source_stmt1; - else - source_stmt = source_stmt2; - - /* Find the highest address at which a load is performed and - compute related info. */ - end1 = n1->bytepos + (n1->range - 1); - end2 = n2->bytepos + (n2->range - 1); - if (end1 < end2) - { - end = end2; - end_sub = end2 - end1; - } - else - { - end = end1; - end_sub = end1 - end2; - } - n_end = (end2 > end1) ? n2 : n1; - - /* Find symbolic number whose lsb is the most significant. */ - if (BYTES_BIG_ENDIAN) - toinc_n_ptr = (n_end == n1) ? n2 : n1; - else - toinc_n_ptr = (n_start == n1) ? n2 : n1; - - n->range = end - n_start->bytepos + 1; - - /* Check that the range of memory covered can be represented by - a symbolic number. */ - if (n->range > 64 / BITS_PER_MARKER) - return NULL; - - /* Reinterpret byte marks in symbolic number holding the value of - bigger weight according to target endianness. */ - inc = BYTES_BIG_ENDIAN ? end_sub : start_sub; - size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT; - for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER) - { - unsigned marker - = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK; - if (marker && marker != MARKER_BYTE_UNKNOWN) - toinc_n_ptr->n += inc; - } - } - else - { - n->range = n1->range; - n_start = n1; - source_stmt = source_stmt1; - } - - if (!n1->alias_set - || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set)) - n->alias_set = n1->alias_set; - else - n->alias_set = ptr_type_node; - n->vuse = n_start->vuse; - n->base_addr = n_start->base_addr; - n->offset = n_start->offset; - n->src = n_start->src; - n->bytepos = n_start->bytepos; - n->type = n_start->type; - size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; - - for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER) - { - uint64_t masked1, masked2; - - masked1 = n1->n & mask; - masked2 = n2->n & mask; - if (masked1 && masked2 && masked1 != masked2) - return NULL; - } - n->n = n1->n | n2->n; - n->n_ops = n1->n_ops + n2->n_ops; - - return source_stmt; -} - -/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform - the operation given by the rhs of STMT on the result. If the operation - could successfully be executed the function returns a gimple stmt whose - rhs's first tree is the expression of the source operand and NULL - otherwise. */ - -static gimple * -find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit) -{ - enum tree_code code; - tree rhs1, rhs2 = NULL; - gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1; - enum gimple_rhs_class rhs_class; - - if (!limit || !is_gimple_assign (stmt)) - return NULL; - - rhs1 = gimple_assign_rhs1 (stmt); - - if (find_bswap_or_nop_load (stmt, rhs1, n)) - return stmt; - - /* Handle BIT_FIELD_REF. */ - if (TREE_CODE (rhs1) == BIT_FIELD_REF - && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME) - { - unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1)); - unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2)); - if (bitpos % BITS_PER_UNIT == 0 - && bitsize % BITS_PER_UNIT == 0 - && init_symbolic_number (n, TREE_OPERAND (rhs1, 0))) - { - /* Handle big-endian bit numbering in BIT_FIELD_REF. */ - if (BYTES_BIG_ENDIAN) - bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize; - - /* Shift. */ - if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos)) - return NULL; - - /* Mask. */ - uint64_t mask = 0; - uint64_t tmp = (1 << BITS_PER_UNIT) - 1; - for (unsigned i = 0; i < bitsize / BITS_PER_UNIT; - i++, tmp <<= BITS_PER_UNIT) - mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER); - n->n &= mask; - - /* Convert. */ - n->type = TREE_TYPE (rhs1); - if (!n->base_addr) - n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT; - - return verify_symbolic_number_p (n, stmt) ? stmt : NULL; - } - - return NULL; - } - - if (TREE_CODE (rhs1) != SSA_NAME) - return NULL; - - code = gimple_assign_rhs_code (stmt); - rhs_class = gimple_assign_rhs_class (stmt); - rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); - - if (rhs_class == GIMPLE_BINARY_RHS) - rhs2 = gimple_assign_rhs2 (stmt); - - /* Handle unary rhs and binary rhs with integer constants as second - operand. */ - - if (rhs_class == GIMPLE_UNARY_RHS - || (rhs_class == GIMPLE_BINARY_RHS - && TREE_CODE (rhs2) == INTEGER_CST)) - { - if (code != BIT_AND_EXPR - && code != LSHIFT_EXPR - && code != RSHIFT_EXPR - && code != LROTATE_EXPR - && code != RROTATE_EXPR - && !CONVERT_EXPR_CODE_P (code)) - return NULL; - - source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1); - - /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and - we have to initialize the symbolic number. */ - if (!source_stmt1) - { - if (gimple_assign_load_p (stmt) - || !init_symbolic_number (n, rhs1)) - return NULL; - source_stmt1 = stmt; - } - - switch (code) - { - case BIT_AND_EXPR: - { - int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; - uint64_t val = int_cst_value (rhs2), mask = 0; - uint64_t tmp = (1 << BITS_PER_UNIT) - 1; - - /* Only constants masking full bytes are allowed. */ - for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT) - if ((val & tmp) != 0 && (val & tmp) != tmp) - return NULL; - else if (val & tmp) - mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER); - - n->n &= mask; - } - break; - case LSHIFT_EXPR: - case RSHIFT_EXPR: - case LROTATE_EXPR: - case RROTATE_EXPR: - if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2))) - return NULL; - break; - CASE_CONVERT: - { - int i, type_size, old_type_size; - tree type; - - type = gimple_expr_type (stmt); - type_size = TYPE_PRECISION (type); - if (type_size % BITS_PER_UNIT != 0) - return NULL; - type_size /= BITS_PER_UNIT; - if (type_size > 64 / BITS_PER_MARKER) - return NULL; - - /* Sign extension: result is dependent on the value. */ - old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; - if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size - && HEAD_MARKER (n->n, old_type_size)) - for (i = 0; i < type_size - old_type_size; i++) - n->n |= (uint64_t) MARKER_BYTE_UNKNOWN - << ((type_size - 1 - i) * BITS_PER_MARKER); - - if (type_size < 64 / BITS_PER_MARKER) - { - /* If STMT casts to a smaller type mask out the bits not - belonging to the target type. */ - n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1; - } - n->type = type; - if (!n->base_addr) - n->range = type_size; - } - break; - default: - return NULL; - }; - return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL; - } - - /* Handle binary rhs. */ - - if (rhs_class == GIMPLE_BINARY_RHS) - { - struct symbolic_number n1, n2; - gimple *source_stmt, *source_stmt2; - - if (code != BIT_IOR_EXPR) - return NULL; - - if (TREE_CODE (rhs2) != SSA_NAME) - return NULL; - - rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); - - switch (code) - { - case BIT_IOR_EXPR: - source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1); - - if (!source_stmt1) - return NULL; - - source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1); - - if (!source_stmt2) - return NULL; - - if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type)) - return NULL; - - if (!n1.vuse != !n2.vuse - || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0))) - return NULL; - - source_stmt - = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n); - - if (!source_stmt) - return NULL; - - if (!verify_symbolic_number_p (n, stmt)) - return NULL; - - break; - default: - return NULL; - } - return source_stmt; - } - return NULL; -} - -/* Check if STMT completes a bswap implementation or a read in a given - endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP - accordingly. It also sets N to represent the kind of operations - performed: size of the resulting expression and whether it works on - a memory source, and if so alias-set and vuse. At last, the - function returns a stmt whose rhs's first tree is the source - expression. */ - -static gimple * -find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap) -{ - unsigned rsize; - uint64_t tmpn, mask; -/* The number which the find_bswap_or_nop_1 result should match in order - to have a full byte swap. The number is shifted to the right - according to the size of the symbolic number before using it. */ - uint64_t cmpxchg = CMPXCHG; - uint64_t cmpnop = CMPNOP; - - gimple *ins_stmt; - int limit; - - /* The last parameter determines the depth search limit. It usually - correlates directly to the number n of bytes to be touched. We - increase that number by log2(n) + 1 here in order to also - cover signed -> unsigned conversions of the src operand as can be seen - in libgcc, and for initial shift/and operation of the src operand. */ - limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt))); - limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit); - ins_stmt = find_bswap_or_nop_1 (stmt, n, limit); - - if (!ins_stmt) - return NULL; - - /* Find real size of result (highest non-zero byte). */ - if (n->base_addr) - for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++); - else - rsize = n->range; - - /* Zero out the bits corresponding to untouched bytes in original gimple - expression. */ - if (n->range < (int) sizeof (int64_t)) - { - mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1; - cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER; - cmpnop &= mask; - } - - /* Zero out the bits corresponding to unused bytes in the result of the - gimple expression. */ - if (rsize < n->range) - { - if (BYTES_BIG_ENDIAN) - { - mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1; - cmpxchg &= mask; - cmpnop >>= (n->range - rsize) * BITS_PER_MARKER; - } - else - { - mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1; - cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER; - cmpnop &= mask; - } - n->range = rsize; - } - - /* A complete byte swap should make the symbolic number to start with - the largest digit in the highest order byte. Unchanged symbolic - number indicates a read with same endianness as target architecture. */ - if (n->n == cmpnop) - *bswap = false; - else if (n->n == cmpxchg) - *bswap = true; - else - return NULL; - - /* Useless bit manipulation performed by code. */ - if (!n->base_addr && n->n == cmpnop && n->n_ops == 1) - return NULL; - - n->range *= BITS_PER_UNIT; - return ins_stmt; -} - -namespace { - -const pass_data pass_data_optimize_bswap = -{ - GIMPLE_PASS, /* type */ - "bswap", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - TV_NONE, /* tv_id */ - PROP_ssa, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0, /* todo_flags_finish */ -}; - -class pass_optimize_bswap : public gimple_opt_pass -{ -public: - pass_optimize_bswap (gcc::context *ctxt) - : gimple_opt_pass (pass_data_optimize_bswap, ctxt) - {} - - /* opt_pass methods: */ - virtual bool gate (function *) - { - return flag_expensive_optimizations && optimize; - } - - virtual unsigned int execute (function *); - -}; // class pass_optimize_bswap - -/* Perform the bswap optimization: replace the expression computed in the rhs - of CUR_STMT by an equivalent bswap, load or load + bswap expression. - Which of these alternatives replace the rhs is given by N->base_addr (non - null if a load is needed) and BSWAP. The type, VUSE and set-alias of the - load to perform are also given in N while the builtin bswap invoke is given - in FNDEL. Finally, if a load is involved, SRC_STMT refers to one of the - load statements involved to construct the rhs in CUR_STMT and N->range gives - the size of the rhs expression for maintaining some statistics. - - Note that if the replacement involve a load, CUR_STMT is moved just after - SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT - changing of basic block. */ - -static bool -bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl, - tree bswap_type, tree load_type, struct symbolic_number *n, - bool bswap) -{ - gimple_stmt_iterator gsi; - tree src, tmp, tgt; - gimple *bswap_stmt; - - gsi = gsi_for_stmt (cur_stmt); - src = n->src; - tgt = gimple_assign_lhs (cur_stmt); - - /* Need to load the value from memory first. */ - if (n->base_addr) - { - gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt); - tree addr_expr, addr_tmp, val_expr, val_tmp; - tree load_offset_ptr, aligned_load_type; - gimple *addr_stmt, *load_stmt; - unsigned align; - HOST_WIDE_INT load_offset = 0; - basic_block ins_bb, cur_bb; - - ins_bb = gimple_bb (ins_stmt); - cur_bb = gimple_bb (cur_stmt); - if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb)) - return false; - - align = get_object_alignment (src); - - /* Move cur_stmt just before one of the load of the original - to ensure it has the same VUSE. See PR61517 for what could - go wrong. */ - if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt)) - reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt)); - gsi_move_before (&gsi, &gsi_ins); - gsi = gsi_for_stmt (cur_stmt); - - /* Compute address to load from and cast according to the size - of the load. */ - addr_expr = build_fold_addr_expr (unshare_expr (src)); - if (is_gimple_mem_ref_addr (addr_expr)) - addr_tmp = addr_expr; - else - { - addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL, - "load_src"); - addr_stmt = gimple_build_assign (addr_tmp, addr_expr); - gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT); - } - - /* Perform the load. */ - aligned_load_type = load_type; - if (align < TYPE_ALIGN (load_type)) - aligned_load_type = build_aligned_type (load_type, align); - load_offset_ptr = build_int_cst (n->alias_set, load_offset); - val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp, - load_offset_ptr); - - if (!bswap) - { - if (n->range == 16) - nop_stats.found_16bit++; - else if (n->range == 32) - nop_stats.found_32bit++; - else - { - gcc_assert (n->range == 64); - nop_stats.found_64bit++; - } - - /* Convert the result of load if necessary. */ - if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type)) - { - val_tmp = make_temp_ssa_name (aligned_load_type, NULL, - "load_dst"); - load_stmt = gimple_build_assign (val_tmp, val_expr); - gimple_set_vuse (load_stmt, n->vuse); - gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT); - gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp); - } - else - { - gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr); - gimple_set_vuse (cur_stmt, n->vuse); - } - update_stmt (cur_stmt); - - if (dump_file) - { - fprintf (dump_file, - "%d bit load in target endianness found at: ", - (int) n->range); - print_gimple_stmt (dump_file, cur_stmt, 0); - } - return true; - } - else - { - val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst"); - load_stmt = gimple_build_assign (val_tmp, val_expr); - gimple_set_vuse (load_stmt, n->vuse); - gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT); - } - src = val_tmp; - } - else if (!bswap) - { - gimple *g; - if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src))) - { - if (!is_gimple_val (src)) - return false; - g = gimple_build_assign (tgt, NOP_EXPR, src); - } - else - g = gimple_build_assign (tgt, src); - if (n->range == 16) - nop_stats.found_16bit++; - else if (n->range == 32) - nop_stats.found_32bit++; - else - { - gcc_assert (n->range == 64); - nop_stats.found_64bit++; - } - if (dump_file) - { - fprintf (dump_file, - "%d bit reshuffle in target endianness found at: ", - (int) n->range); - print_gimple_stmt (dump_file, cur_stmt, 0); - } - gsi_replace (&gsi, g, true); - return true; - } - else if (TREE_CODE (src) == BIT_FIELD_REF) - src = TREE_OPERAND (src, 0); - - if (n->range == 16) - bswap_stats.found_16bit++; - else if (n->range == 32) - bswap_stats.found_32bit++; - else - { - gcc_assert (n->range == 64); - bswap_stats.found_64bit++; - } - - tmp = src; - - /* Convert the src expression if necessary. */ - if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type)) - { - gimple *convert_stmt; - - tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc"); - convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src); - gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT); - } - - /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values - are considered as rotation of 2N bit values by N bits is generally not - equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which - gives 0x03040102 while a bswap for that value is 0x04030201. */ - if (bswap && n->range == 16) - { - tree count = build_int_cst (NULL, BITS_PER_UNIT); - src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count); - bswap_stmt = gimple_build_assign (NULL, src); - } - else - bswap_stmt = gimple_build_call (fndecl, 1, tmp); - - tmp = tgt; - - /* Convert the result if necessary. */ - if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type)) - { - gimple *convert_stmt; - - tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst"); - convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp); - gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT); - } - - gimple_set_lhs (bswap_stmt, tmp); - - if (dump_file) - { - fprintf (dump_file, "%d bit bswap implementation found at: ", - (int) n->range); - print_gimple_stmt (dump_file, cur_stmt, 0); - } - - gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT); - gsi_remove (&gsi, true); - return true; -} - -/* Find manual byte swap implementations as well as load in a given - endianness. Byte swaps are turned into a bswap builtin invokation - while endian loads are converted to bswap builtin invokation or - simple load according to the target endianness. */ - -unsigned int -pass_optimize_bswap::execute (function *fun) -{ - basic_block bb; - bool bswap32_p, bswap64_p; - bool changed = false; - tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE; - - if (BITS_PER_UNIT != 8) - return 0; - - bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32) - && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing); - bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64) - && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing - || (bswap32_p && word_mode == SImode))); - - /* Determine the argument type of the builtins. The code later on - assumes that the return and argument type are the same. */ - if (bswap32_p) - { - tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32); - bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); - } - - if (bswap64_p) - { - tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64); - bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); - } - - memset (&nop_stats, 0, sizeof (nop_stats)); - memset (&bswap_stats, 0, sizeof (bswap_stats)); - calculate_dominance_info (CDI_DOMINATORS); - - FOR_EACH_BB_FN (bb, fun) - { - gimple_stmt_iterator gsi; - - /* We do a reverse scan for bswap patterns to make sure we get the - widest match. As bswap pattern matching doesn't handle previously - inserted smaller bswap replacements as sub-patterns, the wider - variant wouldn't be detected. */ - for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);) - { - gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi); - tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type; - enum tree_code code; - struct symbolic_number n; - bool bswap; - - /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt - might be moved to a different basic block by bswap_replace and gsi - must not points to it if that's the case. Moving the gsi_prev - there make sure that gsi points to the statement previous to - cur_stmt while still making sure that all statements are - considered in this basic block. */ - gsi_prev (&gsi); - - if (!is_gimple_assign (cur_stmt)) - continue; - - code = gimple_assign_rhs_code (cur_stmt); - switch (code) - { - case LROTATE_EXPR: - case RROTATE_EXPR: - if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt)) - || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt)) - % BITS_PER_UNIT) - continue; - /* Fall through. */ - case BIT_IOR_EXPR: - break; - default: - continue; - } - - ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap); - - if (!ins_stmt) - continue; - - switch (n.range) - { - case 16: - /* Already in canonical form, nothing to do. */ - if (code == LROTATE_EXPR || code == RROTATE_EXPR) - continue; - load_type = bswap_type = uint16_type_node; - break; - case 32: - load_type = uint32_type_node; - if (bswap32_p) - { - fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32); - bswap_type = bswap32_type; - } - break; - case 64: - load_type = uint64_type_node; - if (bswap64_p) - { - fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64); - bswap_type = bswap64_type; - } - break; - default: - continue; - } - - if (bswap && !fndecl && n.range != 16) - continue; - - if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type, - &n, bswap)) - changed = true; - } - } - - statistics_counter_event (fun, "16-bit nop implementations found", - nop_stats.found_16bit); - statistics_counter_event (fun, "32-bit nop implementations found", - nop_stats.found_32bit); - statistics_counter_event (fun, "64-bit nop implementations found", - nop_stats.found_64bit); - statistics_counter_event (fun, "16-bit bswap implementations found", - bswap_stats.found_16bit); - statistics_counter_event (fun, "32-bit bswap implementations found", - bswap_stats.found_32bit); - statistics_counter_event (fun, "64-bit bswap implementations found", - bswap_stats.found_64bit); - - return (changed ? TODO_update_ssa : 0); -} - -} // anon namespace - -gimple_opt_pass * -make_pass_optimize_bswap (gcc::context *ctxt) -{ - return new pass_optimize_bswap (ctxt); -} - /* Return true if stmt is a type conversion operation that can be stripped when used in a widening multiply operation. */ static bool -- 2.30.2