return tgt;
}
+/* Try to optimize an assignment CUR_STMT with CONSTRUCTOR on the rhs
+ using bswap optimizations. CDI_DOMINATORS need to be
+ computed on entry. Return true if it has been optimized and
+ TODO_update_ssa is needed. */
+
+static bool
+maybe_optimize_vector_constructor (gimple *cur_stmt)
+{
+ tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
+ struct symbolic_number n;
+ bool bswap;
+
+ gcc_assert (is_gimple_assign (cur_stmt)
+ && gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR);
+
+ tree rhs = gimple_assign_rhs1 (cur_stmt);
+ if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
+ || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
+ || gimple_assign_lhs (cur_stmt) == NULL_TREE)
+ return false;
+
+ HOST_WIDE_INT sz = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
+ switch (sz)
+ {
+ case 16:
+ load_type = bswap_type = uint16_type_node;
+ break;
+ case 32:
+ if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
+ && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
+ {
+ load_type = uint32_type_node;
+ fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
+ bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
+ }
+ else
+ return false;
+ break;
+ case 64:
+ if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
+ && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
+ || (word_mode == SImode
+ && builtin_decl_explicit_p (BUILT_IN_BSWAP32)
+ && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)))
+ {
+ load_type = uint64_type_node;
+ fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
+ bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
+ }
+ else
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
+ if (!ins_stmt || n.range != (unsigned HOST_WIDE_INT) sz)
+ return false;
+
+ if (bswap && !fndecl && n.range != 16)
+ return false;
+
+ memset (&nop_stats, 0, sizeof (nop_stats));
+ memset (&bswap_stats, 0, sizeof (bswap_stats));
+ return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
+ bswap_type, load_type, &n, bswap) != NULL_TREE;
+}
+
/* Find manual byte swap implementations as well as load in a given
endianness. Byte swaps are turned into a bswap builtin invokation
while endian loads are converted to bswap builtin invokation or
get_status_for_store_merging (basic_block bb)
{
unsigned int num_statements = 0;
+ unsigned int num_constructors = 0;
gimple_stmt_iterator gsi;
edge e;
if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
break;
+
+ if (is_gimple_assign (stmt)
+ && gimple_assign_rhs_code (stmt) == CONSTRUCTOR)
+ {
+ tree rhs = gimple_assign_rhs1 (stmt);
+ if (VECTOR_TYPE_P (TREE_TYPE (rhs))
+ && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
+ && gimple_assign_lhs (stmt) != NULL_TREE)
+ {
+ HOST_WIDE_INT sz
+ = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
+ if (sz == 16 || sz == 32 || sz == 64)
+ {
+ num_constructors = 1;
+ break;
+ }
+ }
+ }
}
- if (num_statements == 0)
+ if (num_statements == 0 && num_constructors == 0)
return BB_INVALID;
if (cfun->can_throw_non_call_exceptions && cfun->eh
&& e->dest == bb->next_bb)
return BB_EXTENDED_VALID;
- return num_statements >= 2 ? BB_VALID : BB_INVALID;
+ return (num_statements >= 2 || num_constructors) ? BB_VALID : BB_INVALID;
}
/* Entry point for the pass. Go over each basic block recording chains of
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
- for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); )
{
gimple *stmt = gsi_stmt (gsi);
+ gsi_next (&gsi);
if (is_gimple_debug (stmt))
continue;
continue;
}
+ if (is_gimple_assign (stmt)
+ && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
+ && maybe_optimize_vector_constructor (stmt))
+ continue;
+
if (store_valid_for_store_merging_p (stmt))
changed |= process_store (stmt);
else