base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
&unsignedp, &reversep, &volatilep);
- if (TREE_CODE (base_addr) == MEM_REF)
+ if (TREE_CODE (base_addr) == TARGET_MEM_REF)
+ /* Do not rewrite TARGET_MEM_REF. */
+ return false;
+ else if (TREE_CODE (base_addr) == MEM_REF)
{
offset_int bit_offset = 0;
tree off = TREE_OPERAND (base_addr, 1);
bitpos += bit_offset.to_shwi ();
}
+ else
+ base_addr = build_fold_addr_expr (base_addr);
if (bitpos % BITS_PER_UNIT)
return false;
if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
return NULL;
- if (!n1.vuse != !n2.vuse
- || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
+ if (n1.vuse != n2.vuse)
return NULL;
source_stmt
return NULL;
}
-/* Check if STMT completes a bswap implementation or a read in a given
- endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
- accordingly. It also sets N to represent the kind of operations
- performed: size of the resulting expression and whether it works on
- a memory source, and if so alias-set and vuse. At last, the
- function returns a stmt whose rhs's first tree is the source
- expression. */
+/* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
+ *CMPXCHG, *CMPNOP and adjust *N. */
-gimple *
-find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
+void
+find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
+ uint64_t *cmpnop)
{
unsigned rsize;
uint64_t tmpn, mask;
-/* The number which the find_bswap_or_nop_1 result should match in order
- to have a full byte swap. The number is shifted to the right
- according to the size of the symbolic number before using it. */
- uint64_t cmpxchg = CMPXCHG;
- uint64_t cmpnop = CMPNOP;
- gimple *ins_stmt;
- int limit;
-
- /* The last parameter determines the depth search limit. It usually
- correlates directly to the number n of bytes to be touched. We
- increase that number by log2(n) + 1 here in order to also
- cover signed -> unsigned conversions of the src operand as can be seen
- in libgcc, and for initial shift/and operation of the src operand. */
- limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
- limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
- ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
-
- if (!ins_stmt)
- return NULL;
+ /* The number which the find_bswap_or_nop_1 result should match in order
+ to have a full byte swap. The number is shifted to the right
+ according to the size of the symbolic number before using it. */
+ *cmpxchg = CMPXCHG;
+ *cmpnop = CMPNOP;
/* Find real size of result (highest non-zero byte). */
if (n->base_addr)
if (n->range < (int) sizeof (int64_t))
{
mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
- cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
- cmpnop &= mask;
+ *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
+ *cmpnop &= mask;
}
/* Zero out the bits corresponding to unused bytes in the result of the
if (BYTES_BIG_ENDIAN)
{
mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
- cmpxchg &= mask;
- cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
+ *cmpxchg &= mask;
+ *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
}
else
{
mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
- cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
- cmpnop &= mask;
+ *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
+ *cmpnop &= mask;
}
n->range = rsize;
}
+ n->range *= BITS_PER_UNIT;
+}
+
+/* Check if STMT completes a bswap implementation or a read in a given
+ endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
+ accordingly. It also sets N to represent the kind of operations
+ performed: size of the resulting expression and whether it works on
+ a memory source, and if so alias-set and vuse. At last, the
+ function returns a stmt whose rhs's first tree is the source
+ expression. */
+
+gimple *
+find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
+{
+ /* The last parameter determines the depth search limit. It usually
+ correlates directly to the number n of bytes to be touched. We
+ increase that number by log2(n) + 1 here in order to also
+ cover signed -> unsigned conversions of the src operand as can be seen
+ in libgcc, and for initial shift/and operation of the src operand. */
+ int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
+ limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
+ gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
+
+ if (!ins_stmt)
+ return NULL;
+
+ uint64_t cmpxchg, cmpnop;
+ find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
+
/* A complete byte swap should make the symbolic number to start with
the largest digit in the highest order byte. Unchanged symbolic
number indicates a read with same endianness as target architecture. */
if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
return NULL;
- n->range *= BITS_PER_UNIT;
return ins_stmt;
}
}; // class pass_optimize_bswap
/* Perform the bswap optimization: replace the expression computed in the rhs
- of CUR_STMT by an equivalent bswap, load or load + bswap expression.
+ of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
+ bswap, load or load + bswap expression.
Which of these alternatives replace the rhs is given by N->base_addr (non
null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
load to perform are also given in N while the builtin bswap invoke is given
- in FNDEL. Finally, if a load is involved, SRC_STMT refers to one of the
- load statements involved to construct the rhs in CUR_STMT and N->range gives
- the size of the rhs expression for maintaining some statistics.
+ in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
+ load statements involved to construct the rhs in gsi_stmt (GSI) and
+ N->range gives the size of the rhs expression for maintaining some
+ statistics.
- Note that if the replacement involve a load, CUR_STMT is moved just after
- SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
- changing of basic block. */
+ Note that if the replacement involve a load and if gsi_stmt (GSI) is
+ non-NULL, that stmt is moved just after INS_STMT to do the load with the
+ same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
-bool
-bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
+tree
+bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
tree bswap_type, tree load_type, struct symbolic_number *n,
bool bswap)
{
- gimple_stmt_iterator gsi;
- tree src, tmp, tgt;
+ tree src, tmp, tgt = NULL_TREE;
gimple *bswap_stmt;
- gsi = gsi_for_stmt (cur_stmt);
+ gimple *cur_stmt = gsi_stmt (gsi);
src = n->src;
- tgt = gimple_assign_lhs (cur_stmt);
+ if (cur_stmt)
+ tgt = gimple_assign_lhs (cur_stmt);
/* Need to load the value from memory first. */
if (n->base_addr)
{
- gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt);
+ gimple_stmt_iterator gsi_ins = gsi;
+ if (ins_stmt)
+ gsi_ins = gsi_for_stmt (ins_stmt);
tree addr_expr, addr_tmp, val_expr, val_tmp;
tree load_offset_ptr, aligned_load_type;
- gimple *addr_stmt, *load_stmt;
- unsigned align;
+ gimple *load_stmt;
+ unsigned align = get_object_alignment (src);
HOST_WIDE_INT load_offset = 0;
- basic_block ins_bb, cur_bb;
-
- ins_bb = gimple_bb (ins_stmt);
- cur_bb = gimple_bb (cur_stmt);
- if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
- return false;
- align = get_object_alignment (src);
-
- /* Move cur_stmt just before one of the load of the original
- to ensure it has the same VUSE. See PR61517 for what could
- go wrong. */
- if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
- reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
- gsi_move_before (&gsi, &gsi_ins);
- gsi = gsi_for_stmt (cur_stmt);
+ if (cur_stmt)
+ {
+ basic_block ins_bb = gimple_bb (ins_stmt);
+ basic_block cur_bb = gimple_bb (cur_stmt);
+ if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
+ return NULL_TREE;
+
+ /* Move cur_stmt just before one of the load of the original
+ to ensure it has the same VUSE. See PR61517 for what could
+ go wrong. */
+ if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
+ reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
+ gsi_move_before (&gsi, &gsi_ins);
+ gsi = gsi_for_stmt (cur_stmt);
+ }
+ else
+ gsi = gsi_ins;
/* Compute address to load from and cast according to the size
of the load. */
- addr_expr = build_fold_addr_expr (unshare_expr (src));
+ addr_expr = build_fold_addr_expr (src);
if (is_gimple_mem_ref_addr (addr_expr))
- addr_tmp = addr_expr;
+ addr_tmp = unshare_expr (addr_expr);
else
{
- addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
- "load_src");
- addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
- gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
+ addr_tmp = unshare_expr (n->base_addr);
+ if (!is_gimple_mem_ref_addr (addr_tmp))
+ addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
+ is_gimple_mem_ref_addr,
+ NULL_TREE, true,
+ GSI_SAME_STMT);
+ load_offset = n->bytepos;
+ if (n->offset)
+ {
+ tree off
+ = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
+ true, NULL_TREE, true,
+ GSI_SAME_STMT);
+ gimple *stmt
+ = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
+ POINTER_PLUS_EXPR, addr_tmp, off);
+ gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+ addr_tmp = gimple_assign_lhs (stmt);
+ }
}
/* Perform the load. */
}
/* Convert the result of load if necessary. */
- if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
+ if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
{
val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
"load_dst");
gimple_set_vuse (load_stmt, n->vuse);
gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
+ update_stmt (cur_stmt);
}
- else
+ else if (cur_stmt)
{
gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
gimple_set_vuse (cur_stmt, n->vuse);
+ update_stmt (cur_stmt);
+ }
+ else
+ {
+ tgt = make_ssa_name (load_type);
+ cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
+ gimple_set_vuse (cur_stmt, n->vuse);
+ gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
}
- update_stmt (cur_stmt);
if (dump_file)
{
(int) n->range);
print_gimple_stmt (dump_file, cur_stmt, 0);
}
- return true;
+ return tgt;
}
else
{
}
else if (!bswap)
{
- gimple *g;
- if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
+ gimple *g = NULL;
+ if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
{
if (!is_gimple_val (src))
- return false;
+ return NULL_TREE;
g = gimple_build_assign (tgt, NOP_EXPR, src);
}
- else
+ else if (cur_stmt)
g = gimple_build_assign (tgt, src);
+ else
+ tgt = src;
if (n->range == 16)
nop_stats.found_16bit++;
else if (n->range == 32)
fprintf (dump_file,
"%d bit reshuffle in target endianness found at: ",
(int) n->range);
- print_gimple_stmt (dump_file, cur_stmt, 0);
+ if (cur_stmt)
+ print_gimple_stmt (dump_file, cur_stmt, 0);
+ else
+ {
+ print_generic_expr (dump_file, tgt, 0);
+ fprintf (dump_file, "\n");
+ }
}
- gsi_replace (&gsi, g, true);
- return true;
+ if (cur_stmt)
+ gsi_replace (&gsi, g, true);
+ return tgt;
}
else if (TREE_CODE (src) == BIT_FIELD_REF)
src = TREE_OPERAND (src, 0);
else
bswap_stmt = gimple_build_call (fndecl, 1, tmp);
+ if (tgt == NULL_TREE)
+ tgt = make_ssa_name (bswap_type);
tmp = tgt;
/* Convert the result if necessary. */
{
fprintf (dump_file, "%d bit bswap implementation found at: ",
(int) n->range);
- print_gimple_stmt (dump_file, cur_stmt, 0);
+ if (cur_stmt)
+ print_gimple_stmt (dump_file, cur_stmt, 0);
+ else
+ {
+ print_generic_expr (dump_file, tgt, 0);
+ fprintf (dump_file, "\n");
+ }
}
- gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
- gsi_remove (&gsi, true);
- return true;
+ if (cur_stmt)
+ {
+ gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
+ gsi_remove (&gsi, true);
+ }
+ else
+ gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
+ return tgt;
}
/* Find manual byte swap implementations as well as load in a given
if (bswap && !fndecl && n.range != 16)
continue;
- if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type,
- &n, bswap))
+ if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
+ bswap_type, load_type, &n, bswap))
changed = true;
}
}
gimple *stmt;
unsigned int order;
/* INTEGER_CST for constant stores, MEM_REF for memory copy or
- BIT_*_EXPR for logical bitwise operation. */
+ BIT_*_EXPR for logical bitwise operation.
+ LROTATE_EXPR if it can be only bswap optimized and
+ ops are not really meaningful.
+ NOP_EXPR if bswap optimization detected identity, ops
+ are not meaningful. */
enum tree_code rhs_code;
+ /* Two fields for bswap optimization purposes. */
+ struct symbolic_number n;
+ gimple *ins_stmt;
/* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
bool bit_not_p;
/* True if ops have been swapped and thus ops[1] represents
store_operand_info ops[2];
store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
- gimple *, unsigned int, enum tree_code, bool,
+ gimple *, unsigned int, enum tree_code,
+ struct symbolic_number &, gimple *, bool,
const store_operand_info &,
const store_operand_info &);
};
gimple *st,
unsigned int ord,
enum tree_code rhscode,
+ struct symbolic_number &nr,
+ gimple *ins_stmtp,
bool bitnotp,
const store_operand_info &op0r,
const store_operand_info &op1r)
: bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
- stmt (st), order (ord), rhs_code (rhscode), bit_not_p (bitnotp),
- ops_swapped_p (false)
+ stmt (st), order (ord), rhs_code (rhscode), n (nr),
+ ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false)
#if __cplusplus >= 201103L
, ops { op0r, op1r }
{
fprintf (dump_file, "After writing ");
print_generic_expr (dump_file, cst, 0);
fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
- " at position %d the merged region contains:\n",
- info->bitsize, pos_in_buffer);
+ " at position %d the merged region contains:\n",
+ info->bitsize, pos_in_buffer);
dump_char_array (dump_file, val, buf_size);
}
else
fprintf (dump_file, "Failed to merge stores\n");
- }
+ }
if (!ret)
return false;
unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
else
clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
}
+ stores.qsort (sort_by_bitpos);
return true;
}
}
}
bool terminate_and_process_chain ();
+ bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
bool coalesce_immediate_stores ();
bool output_merged_store (merged_store_group *);
bool output_merged_stores ();
/* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
may clobber REF. FIRST and LAST must be in the same basic block and
- have non-NULL vdef. */
+ have non-NULL vdef. We want to be able to sink load of REF across
+ stores between FIRST and LAST, up to right before LAST. */
bool
stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
stmt = SSA_NAME_DEF_STMT (vop);
if (stmt_may_clobber_ref_p_1 (stmt, &r))
return true;
+ if (gimple_store_p (stmt)
+ && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
+ return true;
/* Avoid quadratic compile time by bounding the number of checks
we perform. */
if (++count > MAX_STORE_ALIAS_CHECKS)
return true;
}
+/* Add all refs loaded to compute VAL to REFS vector. */
+
+void
+gather_bswap_load_refs (vec<tree> *refs, tree val)
+{
+ if (TREE_CODE (val) != SSA_NAME)
+ return;
+
+ gimple *stmt = SSA_NAME_DEF_STMT (val);
+ if (!is_gimple_assign (stmt))
+ return;
+
+ if (gimple_assign_load_p (stmt))
+ {
+ refs->safe_push (gimple_assign_rhs1 (stmt));
+ return;
+ }
+
+ switch (gimple_assign_rhs_class (stmt))
+ {
+ case GIMPLE_BINARY_RHS:
+ gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
+ /* FALLTHRU */
+ case GIMPLE_UNARY_RHS:
+ gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return true if m_store_info[first] and at least one following store
+ form a group which store try_size bitsize value which is byte swapped
+ from a memory load or some value, or identity from some value.
+ This uses the bswap pass APIs. */
+
+bool
+imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
+ unsigned int first,
+ unsigned int try_size)
+{
+ unsigned int len = m_store_info.length (), last = first;
+ unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
+ if (width >= try_size)
+ return false;
+ for (unsigned int i = first + 1; i < len; ++i)
+ {
+ if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
+ || m_store_info[i]->ins_stmt == NULL)
+ return false;
+ width += m_store_info[i]->bitsize;
+ if (width >= try_size)
+ {
+ last = i;
+ break;
+ }
+ }
+ if (width != try_size)
+ return false;
+
+ bool allow_unaligned
+ = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
+ /* Punt if the combined store would not be aligned and we need alignment. */
+ if (!allow_unaligned)
+ {
+ unsigned int align = merged_store->align;
+ unsigned HOST_WIDE_INT align_base = merged_store->align_base;
+ for (unsigned int i = first + 1; i <= last; ++i)
+ {
+ unsigned int this_align;
+ unsigned HOST_WIDE_INT align_bitpos = 0;
+ get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
+ &this_align, &align_bitpos);
+ if (this_align > align)
+ {
+ align = this_align;
+ align_base = m_store_info[i]->bitpos - align_bitpos;
+ }
+ }
+ unsigned HOST_WIDE_INT align_bitpos
+ = (m_store_info[first]->bitpos - align_base) & (align - 1);
+ if (align_bitpos)
+ align = least_bit_hwi (align_bitpos);
+ if (align < try_size)
+ return false;
+ }
+
+ tree type;
+ switch (try_size)
+ {
+ case 16: type = uint16_type_node; break;
+ case 32: type = uint32_type_node; break;
+ case 64: type = uint64_type_node; break;
+ default: gcc_unreachable ();
+ }
+ struct symbolic_number n;
+ gimple *ins_stmt = NULL;
+ int vuse_store = -1;
+ unsigned int first_order = merged_store->first_order;
+ unsigned int last_order = merged_store->last_order;
+ gimple *first_stmt = merged_store->first_stmt;
+ gimple *last_stmt = merged_store->last_stmt;
+ store_immediate_info *infof = m_store_info[first];
+
+ for (unsigned int i = first; i <= last; ++i)
+ {
+ store_immediate_info *info = m_store_info[i];
+ struct symbolic_number this_n = info->n;
+ this_n.type = type;
+ if (!this_n.base_addr)
+ this_n.range = try_size / BITS_PER_UNIT;
+ unsigned int bitpos = info->bitpos - infof->bitpos;
+ if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
+ BYTES_BIG_ENDIAN
+ ? try_size - info->bitsize - bitpos
+ : bitpos))
+ return false;
+ if (n.base_addr && vuse_store)
+ {
+ unsigned int j;
+ for (j = first; j <= last; ++j)
+ if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
+ break;
+ if (j > last)
+ {
+ if (vuse_store == 1)
+ return false;
+ vuse_store = 0;
+ }
+ }
+ if (i == first)
+ {
+ n = this_n;
+ ins_stmt = info->ins_stmt;
+ }
+ else
+ {
+ if (n.base_addr)
+ {
+ if (n.vuse != this_n.vuse)
+ {
+ if (vuse_store == 0)
+ return false;
+ vuse_store = 1;
+ }
+ if (info->order > last_order)
+ {
+ last_order = info->order;
+ last_stmt = info->stmt;
+ }
+ else if (info->order < first_order)
+ {
+ first_order = info->order;
+ first_stmt = info->stmt;
+ }
+ }
+
+ ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
+ &this_n, &n);
+ if (ins_stmt == NULL)
+ return false;
+ }
+ }
+
+ uint64_t cmpxchg, cmpnop;
+ find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop);
+
+ /* A complete byte swap should make the symbolic number to start with
+ the largest digit in the highest order byte. Unchanged symbolic
+ number indicates a read with same endianness as target architecture. */
+ if (n.n != cmpnop && n.n != cmpxchg)
+ return false;
+
+ if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
+ return false;
+
+ /* Don't handle memory copy this way if normal non-bswap processing
+ would handle it too. */
+ if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
+ {
+ unsigned int i;
+ for (i = first; i <= last; ++i)
+ if (m_store_info[i]->rhs_code != MEM_REF)
+ break;
+ if (i == last + 1)
+ return false;
+ }
+
+ if (n.n == cmpxchg)
+ switch (try_size)
+ {
+ case 16:
+ /* Will emit LROTATE_EXPR. */
+ break;
+ case 32:
+ if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
+ && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
+ break;
+ return false;
+ case 64:
+ if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
+ && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
+ break;
+ return false;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (!allow_unaligned && n.base_addr)
+ {
+ unsigned int align = get_object_alignment (n.src);
+ if (align < try_size)
+ return false;
+ }
+
+ /* If each load has vuse of the corresponding store, need to verify
+ the loads can be sunk right before the last store. */
+ if (vuse_store == 1)
+ {
+ auto_vec<tree, 64> refs;
+ for (unsigned int i = first; i <= last; ++i)
+ gather_bswap_load_refs (&refs,
+ gimple_assign_rhs1 (m_store_info[i]->stmt));
+
+ unsigned int i;
+ tree ref;
+ FOR_EACH_VEC_ELT (refs, i, ref)
+ if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
+ return false;
+ n.vuse = NULL_TREE;
+ }
+
+ infof->n = n;
+ infof->ins_stmt = ins_stmt;
+ for (unsigned int i = first; i <= last; ++i)
+ {
+ m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
+ m_store_info[i]->ops[0].base_addr = NULL_TREE;
+ m_store_info[i]->ops[1].base_addr = NULL_TREE;
+ if (i != first)
+ merged_store->merge_into (m_store_info[i]);
+ }
+
+ return true;
+}
+
/* Go through the candidate stores recorded in m_store_info and merge them
into merged_store_group objects recorded into m_merged_store_groups
representing the widened stores. Return true if coalescing was successful
m_store_info.length ());
store_immediate_info *info;
- unsigned int i;
+ unsigned int i, ignore = 0;
/* Order the stores by the bitposition they write to. */
m_store_info.qsort (sort_by_bitpos);
fprintf (dump_file, "\n------------\n");
}
- if (i == 0)
+ if (i <= ignore)
continue;
+ /* First try to handle group of stores like:
+ p[0] = data >> 24;
+ p[1] = data >> 16;
+ p[2] = data >> 8;
+ p[3] = data;
+ using the bswap framework. */
+ if (info->bitpos == merged_store->start + merged_store->width
+ && merged_store->stores.length () == 1
+ && merged_store->stores[0]->ins_stmt != NULL
+ && info->ins_stmt != NULL)
+ {
+ unsigned int try_size;
+ for (try_size = 64; try_size >= 16; try_size >>= 1)
+ if (try_coalesce_bswap (merged_store, i - 1, try_size))
+ break;
+
+ if (try_size >= 16)
+ {
+ ignore = i + merged_store->stores.length () - 1;
+ m_merged_store_groups.safe_push (merged_store);
+ if (ignore < m_store_info.length ())
+ merged_store = new merged_store_group (m_store_info[ignore]);
+ else
+ merged_store = NULL;
+ continue;
+ }
+ }
+
/* |---store 1---|
|---store 2---|
- Overlapping stores. */
- unsigned HOST_WIDE_INT start = info->bitpos;
- if (IN_RANGE (start, merged_store->start,
+ Overlapping stores. */
+ if (IN_RANGE (info->bitpos, merged_store->start,
merged_store->start + merged_store->width - 1))
{
/* Only allow overlapping stores of constants. */
This store is consecutive to the previous one.
Merge it into the current store group. There can be gaps in between
the stores, but there can't be gaps in between bitregions. */
- else if (info->bitregion_start <= merged_store->bitregion_end
+ else if (info->rhs_code != LROTATE_EXPR
+ && info->bitregion_start <= merged_store->bitregion_end
&& info->rhs_code == merged_store->stores[0]->rhs_code)
{
store_immediate_info *infof = merged_store->stores[0];
}
/* Record or discard the last store group. */
- if (!merged_store->apply_stores ())
- delete merged_store;
- else
- m_merged_store_groups.safe_push (merged_store);
+ if (merged_store)
+ {
+ if (!merged_store->apply_stores ())
+ delete merged_store;
+ else
+ m_merged_store_groups.safe_push (merged_store);
+ }
gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
bool success
gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
+ if (group->stores[0]->rhs_code == LROTATE_EXPR
+ || group->stores[0]->rhs_code == NOP_EXPR)
+ {
+ /* For bswap framework using sets of stores, all the checking
+ has been done earlier in try_coalesce_bswap and needs to be
+ emitted as a single store. */
+ if (total_orig)
+ {
+ /* Avoid the old/new stmt count heuristics. It should be
+ always beneficial. */
+ total_new[0] = 1;
+ total_orig[0] = 2;
+ }
+
+ if (split_stores)
+ {
+ unsigned HOST_WIDE_INT align_bitpos
+ = (group->start - align_base) & (group_align - 1);
+ unsigned HOST_WIDE_INT align = group_align;
+ if (align_bitpos)
+ align = least_bit_hwi (align_bitpos);
+ bytepos = group->start / BITS_PER_UNIT;
+ struct split_store *store
+ = new split_store (bytepos, group->width, align);
+ unsigned int first = 0;
+ find_constituent_stores (group, &store->orig_stores,
+ &first, group->start, group->width);
+ split_stores->safe_push (store);
+ }
+
+ return 1;
+ }
+
unsigned int ret = 0, first = 0;
unsigned HOST_WIDE_INT try_pos = bytepos;
- group->stores.qsort (sort_by_bitpos);
if (total_orig)
{
" not larger than estimated number of new"
" stmts (%u).\n",
total_orig, total_new);
+ return false;
}
gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
tree last_vdef, new_vuse;
last_vdef = gimple_vdef (group->last_stmt);
new_vuse = gimple_vuse (group->last_stmt);
+ tree bswap_res = NULL_TREE;
+
+ if (group->stores[0]->rhs_code == LROTATE_EXPR
+ || group->stores[0]->rhs_code == NOP_EXPR)
+ {
+ tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
+ gimple *ins_stmt = group->stores[0]->ins_stmt;
+ struct symbolic_number *n = &group->stores[0]->n;
+ bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
+
+ switch (n->range)
+ {
+ case 16:
+ load_type = bswap_type = uint16_type_node;
+ break;
+ case 32:
+ load_type = uint32_type_node;
+ if (bswap)
+ {
+ fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
+ bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
+ }
+ break;
+ case 64:
+ load_type = uint64_type_node;
+ if (bswap)
+ {
+ fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
+ bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* If the loads have each vuse of the corresponding store,
+ we've checked the aliasing already in try_coalesce_bswap and
+ we want to sink the need load into seq. So need to use new_vuse
+ on the load. */
+ if (n->base_addr && n->vuse == NULL)
+ {
+ n->vuse = new_vuse;
+ ins_stmt = NULL;
+ }
+ bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
+ bswap_type, load_type, n, bswap);
+ gcc_assert (bswap_res);
+ }
gimple *stmt = NULL;
split_store *split_store;
unsigned int i;
auto_vec<gimple *, 32> orig_stmts;
- tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &seq,
+ gimple_seq this_seq;
+ tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
is_gimple_mem_ref_addr, NULL_TREE);
+ gimple_seq_add_seq_without_update (&seq, this_seq);
tree load_addr[2] = { NULL_TREE, NULL_TREE };
gimple_seq load_seq[2] = { NULL, NULL };
load_addr[j] = addr;
else
{
- gimple_seq this_seq;
load_addr[j]
= force_gimple_operand_1 (unshare_expr (op.base_addr),
&this_seq, is_gimple_mem_ref_addr,
MR_DEPENDENCE_BASE (dest) = base;
}
- tree mask
- = native_interpret_expr (int_type,
- group->mask + try_pos - start_byte_pos,
- group->buf_size);
+ tree mask = integer_zero_node;
+ if (!bswap_res)
+ mask = native_interpret_expr (int_type,
+ group->mask + try_pos
+ - start_byte_pos,
+ group->buf_size);
tree ops[2];
for (int j = 0;
++j)
{
store_operand_info &op = split_store->orig_stores[0]->ops[j];
- if (op.base_addr)
+ if (bswap_res)
+ ops[j] = bswap_res;
+ else if (op.base_addr)
{
FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
orig_stmts.safe_push (info->ops[j].stmt);
src = gimple_assign_lhs (stmt);
}
break;
+ case LROTATE_EXPR:
+ case NOP_EXPR:
+ src = ops[0];
+ if (!is_gimple_val (src))
+ {
+ stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
+ src);
+ gimple_seq_add_stmt_without_update (&seq, stmt);
+ src = gimple_assign_lhs (stmt);
+ }
+ if (!useless_type_conversion_p (int_type, TREE_TYPE (src)))
+ {
+ stmt = gimple_build_assign (make_ssa_name (int_type),
+ NOP_EXPR, src);
+ gimple_seq_add_stmt_without_update (&seq, stmt);
+ src = gimple_assign_lhs (stmt);
+ }
+ break;
default:
src = ops[0];
break;
&& (TREE_CODE (rhs) != INTEGER_CST)));
enum tree_code rhs_code = ERROR_MARK;
bool bit_not_p = false;
+ struct symbolic_number n;
+ gimple *ins_stmt = NULL;
store_operand_info ops[2];
if (invalid)
;
invalid = true;
break;
}
+ if ((bitsize % BITS_PER_UNIT) == 0
+ && (bitpos % BITS_PER_UNIT) == 0
+ && bitsize <= 64
+ && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
+ {
+ ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
+ if (ins_stmt)
+ {
+ uint64_t nn = n.n;
+ for (unsigned HOST_WIDE_INT i = 0;
+ i < bitsize; i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
+ if ((nn & MARKER_MASK) == 0
+ || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
+ {
+ ins_stmt = NULL;
+ break;
+ }
+ if (ins_stmt)
+ {
+ if (invalid)
+ {
+ rhs_code = LROTATE_EXPR;
+ ops[0].base_addr = NULL_TREE;
+ ops[1].base_addr = NULL_TREE;
+ }
+ invalid = false;
+ }
+ }
+ }
}
if (invalid)
return;
}
+ if (!ins_stmt)
+ memset (&n, 0, sizeof (n));
+
struct imm_store_chain_info **chain_info = NULL;
if (base_addr)
chain_info = m_stores.get (base_addr);
unsigned int ord = (*chain_info)->m_store_info.length ();
info = new store_immediate_info (bitsize, bitpos, bitregion_start,
bitregion_end, stmt, ord, rhs_code,
+ n, ins_stmt,
bit_not_p, ops[0], ops[1]);
if (dump_file && (dump_flags & TDF_DETAILS))
{
= new imm_store_chain_info (m_stores_head, base_addr);
info = new store_immediate_info (bitsize, bitpos, bitregion_start,
bitregion_end, stmt, 0, rhs_code,
+ n, ins_stmt,
bit_not_p, ops[0], ops[1]);
new_chain->m_store_info.safe_push (info);
m_stores.put (base_addr, new_chain);
basic_block bb;
hash_set<gimple *> orig_stmts;
+ calculate_dominance_info (CDI_DOMINATORS);
+
FOR_EACH_BB_FN (bb, fun)
{
gimple_stmt_iterator gsi;