marshalling to implement data sharing and copying clauses.
Contributed by Diego Novillo <dnovillo@redhat.com>
- Copyright (C) 2005-2013 Free Software Foundation, Inc.
+ Copyright (C) 2005-2014 Free Software Foundation, Inc.
This file is part of GCC.
#include "coretypes.h"
#include "tm.h"
#include "tree.h"
+#include "stringpool.h"
+#include "stor-layout.h"
#include "rtl.h"
+#include "pointer-set.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "gimple-expr.h"
+#include "is-a.h"
#include "gimple.h"
#include "gimplify.h"
#include "gimple-iterator.h"
#include "ssa-iterators.h"
#include "tree-ssanames.h"
#include "tree-into-ssa.h"
+#include "expr.h"
#include "tree-dfa.h"
#include "tree-ssa.h"
#include "flags.h"
#include "function.h"
#include "expr.h"
#include "tree-pass.h"
-#include "ggc.h"
#include "except.h"
#include "splay-tree.h"
#include "optabs.h"
#include "omp-low.h"
#include "gimple-low.h"
#include "tree-cfgcleanup.h"
+#include "pretty-print.h"
+#include "ipa-prop.h"
#include "tree-nested.h"
+#include "tree-eh.h"
/* Lowering of OpenMP parallel and workshare constructs proceeds in two
}
-/* Return the parallel region associated with STMT. */
-
/* Debugging dumps for parallel regions. */
void dump_omp_region (FILE *, struct omp_region *, int);
void debug_omp_region (struct omp_region *);
/* Create a new name for omp child function. Returns an identifier. */
-static GTY(()) unsigned int tmp_ompfn_id_num;
-
static tree
create_omp_child_function_name (bool task_copy)
{
{
if (!optimize
|| optimize_debug
+ || !flag_tree_loop_optimize
|| (!flag_tree_loop_vectorize
&& (global_options_set.x_flag_tree_loop_vectorize
|| global_options_set.x_flag_tree_vectorize)))
}
else if (TREE_CONSTANT (x))
{
- const char *name = NULL;
- if (DECL_NAME (var))
- name = IDENTIFIER_POINTER (DECL_NAME (new_var));
-
- x = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (new_var)),
- name);
- gimple_add_tmp_var (x);
- TREE_ADDRESSABLE (x) = 1;
- x = build_fold_addr_expr_loc (clause_loc, x);
+ /* For reduction with placeholder in SIMD loop,
+ defer adding the initialization of the reference,
+ because if we decide to use SIMD array for it,
+ the initilization could cause expansion ICE. */
+ if (c_kind == OMP_CLAUSE_REDUCTION
+ && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)
+ && is_simd)
+ x = NULL_TREE;
+ else
+ {
+ const char *name = NULL;
+ if (DECL_NAME (var))
+ name = IDENTIFIER_POINTER (DECL_NAME (new_var));
+
+ x = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (new_var)),
+ name);
+ gimple_add_tmp_var (x);
+ TREE_ADDRESSABLE (x) = 1;
+ x = build_fold_addr_expr_loc (clause_loc, x);
+ }
}
else
{
x = build_call_expr_loc (clause_loc, atmp, 1, x);
}
- x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
- gimplify_assign (new_var, x, ilist);
+ if (x)
+ {
+ x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
+ gimplify_assign (new_var, x, ilist);
+ }
new_var = build_simple_mem_ref_loc (clause_loc, new_var);
}
}
break;
}
+ /* If this is a reference to constant size reduction var
+ with placeholder, we haven't emitted the initializer
+ for it because it is undesirable if SIMD arrays are used.
+ But if they aren't used, we need to emit the deferred
+ initialization now. */
+ else if (is_reference (var) && is_simd)
+ {
+ tree z
+ = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_vard)));
+ if (TREE_CONSTANT (z))
+ {
+ const char *name = NULL;
+ if (DECL_NAME (var))
+ name = IDENTIFIER_POINTER (DECL_NAME (new_vard));
+
+ z = create_tmp_var_raw
+ (TREE_TYPE (TREE_TYPE (new_vard)), name);
+ gimple_add_tmp_var (z);
+ TREE_ADDRESSABLE (z) = 1;
+ z = build_fold_addr_expr_loc (clause_loc, z);
+ gimplify_assign (new_vard, z, ilist);
+ }
+ }
x = lang_hooks.decls.omp_clause_default_ctor
(c, new_var, unshare_expr (x));
if (x)
{
x = omp_reduction_init (c, TREE_TYPE (new_var));
gcc_assert (TREE_CODE (TREE_TYPE (new_var)) != ARRAY_TYPE);
+ enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
+
+ /* reduction(-:var) sums up the partial results, so it
+ acts identically to reduction(+:var). */
+ if (code == MINUS_EXPR)
+ code = PLUS_EXPR;
+
if (is_simd
&& lower_rec_simd_input_clauses (new_var, ctx, max_vf,
idx, lane, ivar, lvar))
{
- enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
tree ref = build_outer_var_ref (var, ctx);
gimplify_assign (unshare_expr (ivar), x, &llist[0]);
- /* reduction(-:var) sums up the partial results, so it
- acts identically to reduction(+:var). */
- if (code == MINUS_EXPR)
- code = PLUS_EXPR;
-
x = build2 (code, TREE_TYPE (ref), ref, ivar);
ref = build_outer_var_ref (var, ctx);
gimplify_assign (ref, x, &llist[1]);
{
gimplify_assign (new_var, x, ilist);
if (is_simd)
- gimplify_assign (build_outer_var_ref (var, ctx),
- new_var, dlist);
+ {
+ tree ref = build_outer_var_ref (var, ctx);
+
+ x = build2 (code, TREE_TYPE (ref), ref, new_var);
+ ref = build_outer_var_ref (var, ctx);
+ gimplify_assign (ref, x, dlist);
+ }
}
}
break;
/* Don't add any barrier for #pragma omp simd or
#pragma omp distribute. */
if (gimple_code (ctx->stmt) != GIMPLE_OMP_FOR
- || gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_KIND_FOR)
+ || gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_FOR)
gimple_seq_add_stmt (ilist, build_omp_barrier (NULL_TREE));
}
&& find_omp_clause (gimple_omp_task_clauses (entry_stmt),
OMP_CLAUSE_UNTIED) != NULL);
- FOR_EACH_BB (bb)
+ FOR_EACH_BB_FN (bb, cfun)
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple call = gsi_stmt (gsi);
basic_block bb;
bool changed = false;
- FOR_EACH_BB (bb)
+ FOR_EACH_BB_FN (bb, cfun)
changed |= gimple_purge_dead_eh_edges (bb);
if (changed)
cleanup_tree_cfg ();
{
struct loop *loop = alloc_loop ();
loop->header = body_bb;
- loop->latch = cont_bb;
+ if (collapse_bb == NULL)
+ loop->latch = cont_bb;
add_loop (loop, trip_loop);
}
}
{
struct loop *loop = alloc_loop ();
loop->header = l1_bb;
- loop->latch = e->dest;
+ loop->latch = cont_bb;
add_loop (loop, l1_bb->loop_father);
if (safelen == NULL_TREE)
loop->safelen = INT_MAX;
{
safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
if (!tree_fits_uhwi_p (safelen)
- || (unsigned HOST_WIDE_INT) tree_to_uhwi (safelen)
- > INT_MAX)
+ || tree_to_uhwi (safelen) > INT_MAX)
loop->safelen = INT_MAX;
else
loop->safelen = tree_to_uhwi (safelen);
if ((flag_tree_loop_vectorize
|| (!global_options_set.x_flag_tree_loop_vectorize
&& !global_options_set.x_flag_tree_vectorize))
+ && flag_tree_loop_optimize
&& loop->safelen > 1)
{
loop->force_vect = true;
expand_omp_sections (struct omp_region *region)
{
tree t, u, vin = NULL, vmain, vnext, l2;
- vec<tree> label_vec;
unsigned len;
basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
gimple_stmt_iterator si, switch_si;
/* Use vec::quick_push on label_vec throughout, since we know the size
in advance. */
- label_vec.create (len);
+ auto_vec<tree> label_vec (len);
/* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
GIMPLE_OMP_SECTIONS statement. */
stmt = gimple_build_switch (vmain, u, label_vec);
gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
gsi_remove (&switch_si, true);
- label_vec.release ();
si = gsi_start_bb (default_bb);
stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
loadedi = loaded_val;
}
+ fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
+ tree loaddecl = builtin_decl_explicit (fncode);
+ if (loaddecl)
+ initial
+ = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
+ build_call_expr (loaddecl, 2, iaddr,
+ build_int_cst (NULL_TREE,
+ MEMMODEL_RELAXED)));
+ else
+ initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
+ build_int_cst (TREE_TYPE (iaddr), 0));
+
initial
- = force_gimple_operand_gsi (&si,
- build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)),
- iaddr,
- build_int_cst (TREE_TYPE (iaddr), 0)),
- true, NULL_TREE, true, GSI_SAME_STMT);
+ = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
+ GSI_SAME_STMT);
/* Move the value to the LOADEDI temporary. */
if (gimple_in_ssa_p (cfun))
basic_block bb;
bool changed = false;
- FOR_EACH_BB (bb)
+ FOR_EACH_BB_FN (bb, cfun)
changed |= gimple_purge_dead_eh_edges (bb);
if (changed)
cleanup_tree_cfg ();
{
gcc_assert (root_omp_region == NULL);
calculate_dominance_info (CDI_DOMINATORS);
- build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL, false);
+ build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
}
/* Main entry point for expanding OMP-GIMPLE into runtime calls. */
gate_expand_omp (void)
{
return ((flag_openmp != 0 || flag_openmp_simd != 0
- || flag_enable_cilkplus != 0) && !seen_error ());
+ || flag_cilkplus != 0) && !seen_error ());
}
namespace {
gimple_stmt_iterator tgsi;
gimple stmt, new_stmt, bind, t;
gimple_seq ilist, dlist, olist, new_body;
- struct gimplify_ctx gctx;
stmt = gsi_stmt (*gsi_p);
- push_gimplify_context (&gctx);
+ push_gimplify_context ();
dlist = NULL;
ilist = NULL;
tree block;
gimple t, bind, single_stmt = gsi_stmt (*gsi_p);
gimple_seq bind_body, bind_body_tail = NULL, dlist;
- struct gimplify_ctx gctx;
- push_gimplify_context (&gctx);
+ push_gimplify_context ();
block = make_node (BLOCK);
bind = gimple_build_bind (NULL, NULL, block);
gimple stmt = gsi_stmt (*gsi_p), bind;
location_t loc = gimple_location (stmt);
gimple_seq tseq;
- struct gimplify_ctx gctx;
- push_gimplify_context (&gctx);
+ push_gimplify_context ();
block = make_node (BLOCK);
bind = gimple_build_bind (NULL, NULL, block);
{
tree block;
gimple stmt = gsi_stmt (*gsi_p), bind, x;
- struct gimplify_ctx gctx;
- push_gimplify_context (&gctx);
+ push_gimplify_context ();
block = make_node (BLOCK);
bind = gimple_build_bind (NULL, NULL, block);
gimple stmt = gsi_stmt (*gsi_p), bind;
location_t loc = gimple_location (stmt);
gimple_seq tbody;
- struct gimplify_ctx gctx;
name = gimple_omp_critical_name (stmt);
if (name)
unlock = build_call_expr_loc (loc, unlock, 0);
}
- push_gimplify_context (&gctx);
+ push_gimplify_context ();
block = make_node (BLOCK);
bind = gimple_build_bind (NULL, NULL, block);
type optimizations deduce the value and remove a copy. */
if (tree_fits_shwi_p (fd->loop.step))
{
- HOST_WIDE_INT step = TREE_INT_CST_LOW (fd->loop.step);
+ HOST_WIDE_INT step = tree_to_shwi (fd->loop.step);
if (step == 1 || step == -1)
cond_code = EQ_EXPR;
}
gimple stmt = gsi_stmt (*gsi_p), new_stmt;
gimple_seq omp_for_body, body, dlist;
size_t i;
- struct gimplify_ctx gctx;
- push_gimplify_context (&gctx);
+ push_gimplify_context ();
lower_omp (gimple_omp_for_pre_body_ptr (stmt), ctx);
if (!gimple_seq_empty_p (omp_for_body)
&& gimple_code (gimple_seq_first_stmt (omp_for_body)) == GIMPLE_BIND)
{
- tree vars = gimple_bind_vars (gimple_seq_first_stmt (omp_for_body));
+ gimple inner_bind = gimple_seq_first_stmt (omp_for_body);
+ tree vars = gimple_bind_vars (inner_bind);
gimple_bind_append_vars (new_stmt, vars);
+ /* bind_vars/BLOCK_VARS are being moved to new_stmt/block, don't
+ keep them on the inner_bind and it's block. */
+ gimple_bind_set_vars (inner_bind, NULL_TREE);
+ if (gimple_bind_block (inner_bind))
+ BLOCK_VARS (gimple_bind_block (inner_bind)) = NULL_TREE;
}
if (gimple_omp_for_combined_into_p (stmt))
}
/* Callback for walk_stmts. Check if the current statement only contains
- GIMPLE_OMP_FOR or GIMPLE_OMP_PARALLEL. */
+ GIMPLE_OMP_FOR or GIMPLE_OMP_SECTIONS. */
static tree
check_combined_parallel (gimple_stmt_iterator *gsi_p,
bool record_needs_remap = false, srecord_needs_remap = false;
splay_tree_node n;
struct omp_taskcopy_context tcctx;
- struct gimplify_ctx gctx;
location_t loc = gimple_location (task_stmt);
child_fn = gimple_omp_task_copy_fn (task_stmt);
DECL_CONTEXT (t) = child_fn;
/* Populate the function. */
- push_gimplify_context (&gctx);
+ push_gimplify_context ();
push_cfun (child_cfun);
bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
gimple stmt = gsi_stmt (*gsi_p);
gimple par_bind, bind, dep_bind = NULL;
gimple_seq par_body, olist, ilist, par_olist, par_rlist, par_ilist, new_body;
- struct gimplify_ctx gctx, dep_gctx;
location_t loc = gimple_location (stmt);
clauses = gimple_omp_taskreg_clauses (stmt);
if (gimple_code (stmt) == GIMPLE_OMP_TASK
&& find_omp_clause (clauses, OMP_CLAUSE_DEPEND))
{
- push_gimplify_context (&dep_gctx);
+ push_gimplify_context ();
dep_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
lower_depend_clauses (stmt, &dep_ilist, &dep_olist);
}
if (ctx->srecord_type)
create_task_copyfn (stmt, ctx);
- push_gimplify_context (&gctx);
+ push_gimplify_context ();
par_olist = NULL;
par_ilist = NULL;
gimple stmt = gsi_stmt (*gsi_p);
gimple tgt_bind = NULL, bind;
gimple_seq tgt_body = NULL, olist, ilist, new_body;
- struct gimplify_ctx gctx;
location_t loc = gimple_location (stmt);
int kind = gimple_omp_target_kind (stmt);
unsigned int map_cnt = 0;
tgt_body = gimple_omp_body (stmt);
child_fn = ctx->cb.dst_fn;
- push_gimplify_context (&gctx);
+ push_gimplify_context ();
for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
switch (OMP_CLAUSE_CODE (c))
TREE_VEC_ELT (t, 1)),
&initlist, true, NULL_TREE);
gimple_seq_add_seq (&ilist, initlist);
+
+ tree clobber = build_constructor (TREE_TYPE (TREE_VEC_ELT (t, 1)),
+ NULL);
+ TREE_THIS_VOLATILE (clobber) = 1;
+ gimple_seq_add_stmt (&olist,
+ gimple_build_assign (TREE_VEC_ELT (t, 1),
+ clobber));
}
tree clobber = build_constructor (ctx->record_type, NULL);
lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx)
{
gimple teams_stmt = gsi_stmt (*gsi_p);
- struct gimplify_ctx gctx;
- push_gimplify_context (&gctx);
+ push_gimplify_context ();
tree block = make_node (BLOCK);
gimple bind = gimple_build_bind (NULL, NULL, block);
if ((ctx || task_shared_vars)
&& walk_gimple_op (stmt, lower_omp_regimplify_p,
ctx ? NULL : &wi))
- gimple_regimplify_operands (stmt, gsi_p);
+ {
+ /* Just remove clobbers, this should happen only if we have
+ "privatized" local addressable variables in SIMD regions,
+ the clobber isn't needed in that case and gimplifying address
+ of the ARRAY_REF into a pointer and creating MEM_REF based
+ clobber would create worse code than we get with the clobber
+ dropped. */
+ if (gimple_clobber_p (stmt))
+ {
+ gsi_replace (gsi_p, gimple_build_nop (), true);
+ break;
+ }
+ gimple_regimplify_operands (stmt, gsi_p);
+ }
break;
}
}
gimple_stmt_iterator gsi;
for (gsi = gsi_start (*body); !gsi_end_p (gsi); gsi_next (&gsi))
lower_omp_1 (&gsi, ctx);
- /* Inside target region we haven't called fold_stmt during gimplification,
- because it can break code by adding decl references that weren't in the
- source. Call fold_stmt now. */
+ /* During gimplification, we have not always invoked fold_stmt
+ (gimplify.c:maybe_fold_stmt); call it now. */
if (target_nesting_level)
for (gsi = gsi_start (*body); !gsi_end_p (gsi); gsi_next (&gsi))
fold_stmt (&gsi);
/* This pass always runs, to provide PROP_gimple_lomp.
But there is nothing to do unless -fopenmp is given. */
- if (flag_openmp == 0 && flag_openmp_simd == 0 && flag_enable_cilkplus == 0)
+ if (flag_openmp == 0 && flag_openmp_simd == 0 && flag_cilkplus == 0)
return 0;
all_contexts = splay_tree_new (splay_tree_compare_pointers, 0,
if (all_contexts->root)
{
- struct gimplify_ctx gctx;
-
if (task_shared_vars)
- push_gimplify_context (&gctx);
+ push_gimplify_context ();
lower_omp (&body, NULL);
if (task_shared_vars)
pop_gimplify_context (NULL);
#endif
bool cilkplus_block = false;
- if (flag_enable_cilkplus)
+ if (flag_cilkplus)
{
if ((branch_ctx
&& gimple_code (branch_ctx) == GIMPLE_OMP_FOR
&& gimple_omp_for_kind (branch_ctx) == GF_OMP_FOR_KIND_CILKSIMD)
- || (gimple_code (label_ctx) == GIMPLE_OMP_FOR
+ || (label_ctx
+ && gimple_code (label_ctx) == GIMPLE_OMP_FOR
&& gimple_omp_for_kind (label_ctx) == GF_OMP_FOR_KIND_CILKSIMD))
cilkplus_block = true;
}
/* Called from tree-cfg.c::make_edges to create cfg edges for all GIMPLE_OMP
codes. */
bool
-make_gimple_omp_edges (basic_block bb, struct omp_region **region)
+make_gimple_omp_edges (basic_block bb, struct omp_region **region,
+ int *region_idx)
{
gimple last = last_stmt (bb);
enum gimple_code code = gimple_code (last);
}
if (*region != cur_region)
- *region = cur_region;
+ {
+ *region = cur_region;
+ if (cur_region)
+ *region_idx = cur_region->entry->index;
+ else
+ *region_idx = 0;
+ }
return fallthru;
}
static bool
gate_diagnose_omp_blocks (void)
{
- return flag_openmp || flag_enable_cilkplus;
+ return flag_openmp || flag_cilkplus;
}
namespace {
{
return new pass_diagnose_omp_blocks (ctxt);
}
+\f
+/* SIMD clone supporting code. */
+
+/* Allocate a fresh `simd_clone' and return it. NARGS is the number
+ of arguments to reserve space for. */
+
+static struct cgraph_simd_clone *
+simd_clone_struct_alloc (int nargs)
+{
+ struct cgraph_simd_clone *clone_info;
+ size_t len = (sizeof (struct cgraph_simd_clone)
+ + nargs * sizeof (struct cgraph_simd_clone_arg));
+ clone_info = (struct cgraph_simd_clone *)
+ ggc_internal_cleared_alloc (len);
+ return clone_info;
+}
+
+/* Make a copy of the `struct cgraph_simd_clone' in FROM to TO. */
+
+static inline void
+simd_clone_struct_copy (struct cgraph_simd_clone *to,
+ struct cgraph_simd_clone *from)
+{
+ memcpy (to, from, (sizeof (struct cgraph_simd_clone)
+ + ((from->nargs - from->inbranch)
+ * sizeof (struct cgraph_simd_clone_arg))));
+}
+
+/* Return vector of parameter types of function FNDECL. This uses
+ TYPE_ARG_TYPES if available, otherwise falls back to types of
+ DECL_ARGUMENTS types. */
+
+vec<tree>
+simd_clone_vector_of_formal_parm_types (tree fndecl)
+{
+ if (TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
+ return ipa_get_vector_of_formal_parm_types (TREE_TYPE (fndecl));
+ vec<tree> args = ipa_get_vector_of_formal_parms (fndecl);
+ unsigned int i;
+ tree arg;
+ FOR_EACH_VEC_ELT (args, i, arg)
+ args[i] = TREE_TYPE (args[i]);
+ return args;
+}
+
+/* Given a simd function in NODE, extract the simd specific
+ information from the OMP clauses passed in CLAUSES, and return
+ the struct cgraph_simd_clone * if it should be cloned. *INBRANCH_SPECIFIED
+ is set to TRUE if the `inbranch' or `notinbranch' clause specified,
+ otherwise set to FALSE. */
+
+static struct cgraph_simd_clone *
+simd_clone_clauses_extract (struct cgraph_node *node, tree clauses,
+ bool *inbranch_specified)
+{
+ vec<tree> args = simd_clone_vector_of_formal_parm_types (node->decl);
+ tree t;
+ int n;
+ *inbranch_specified = false;
+
+ n = args.length ();
+ if (n > 0 && args.last () == void_type_node)
+ n--;
+
+ /* To distinguish from an OpenMP simd clone, Cilk Plus functions to
+ be cloned have a distinctive artificial label in addition to "omp
+ declare simd". */
+ bool cilk_clone
+ = (flag_cilkplus
+ && lookup_attribute ("cilk simd function",
+ DECL_ATTRIBUTES (node->decl)));
+
+ /* Allocate one more than needed just in case this is an in-branch
+ clone which will require a mask argument. */
+ struct cgraph_simd_clone *clone_info = simd_clone_struct_alloc (n + 1);
+ clone_info->nargs = n;
+ clone_info->cilk_elemental = cilk_clone;
+
+ if (!clauses)
+ {
+ args.release ();
+ return clone_info;
+ }
+ clauses = TREE_VALUE (clauses);
+ if (!clauses || TREE_CODE (clauses) != OMP_CLAUSE)
+ return clone_info;
+
+ for (t = clauses; t; t = OMP_CLAUSE_CHAIN (t))
+ {
+ switch (OMP_CLAUSE_CODE (t))
+ {
+ case OMP_CLAUSE_INBRANCH:
+ clone_info->inbranch = 1;
+ *inbranch_specified = true;
+ break;
+ case OMP_CLAUSE_NOTINBRANCH:
+ clone_info->inbranch = 0;
+ *inbranch_specified = true;
+ break;
+ case OMP_CLAUSE_SIMDLEN:
+ clone_info->simdlen
+ = TREE_INT_CST_LOW (OMP_CLAUSE_SIMDLEN_EXPR (t));
+ break;
+ case OMP_CLAUSE_LINEAR:
+ {
+ tree decl = OMP_CLAUSE_DECL (t);
+ tree step = OMP_CLAUSE_LINEAR_STEP (t);
+ int argno = TREE_INT_CST_LOW (decl);
+ if (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (t))
+ {
+ clone_info->args[argno].arg_type
+ = SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP;
+ clone_info->args[argno].linear_step = tree_to_shwi (step);
+ gcc_assert (clone_info->args[argno].linear_step >= 0
+ && clone_info->args[argno].linear_step < n);
+ }
+ else
+ {
+ if (POINTER_TYPE_P (args[argno]))
+ step = fold_convert (ssizetype, step);
+ if (!tree_fits_shwi_p (step))
+ {
+ warning_at (OMP_CLAUSE_LOCATION (t), 0,
+ "ignoring large linear step");
+ args.release ();
+ return NULL;
+ }
+ else if (integer_zerop (step))
+ {
+ warning_at (OMP_CLAUSE_LOCATION (t), 0,
+ "ignoring zero linear step");
+ args.release ();
+ return NULL;
+ }
+ else
+ {
+ clone_info->args[argno].arg_type
+ = SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP;
+ clone_info->args[argno].linear_step = tree_to_shwi (step);
+ }
+ }
+ break;
+ }
+ case OMP_CLAUSE_UNIFORM:
+ {
+ tree decl = OMP_CLAUSE_DECL (t);
+ int argno = tree_to_uhwi (decl);
+ clone_info->args[argno].arg_type
+ = SIMD_CLONE_ARG_TYPE_UNIFORM;
+ break;
+ }
+ case OMP_CLAUSE_ALIGNED:
+ {
+ tree decl = OMP_CLAUSE_DECL (t);
+ int argno = tree_to_uhwi (decl);
+ clone_info->args[argno].alignment
+ = TREE_INT_CST_LOW (OMP_CLAUSE_ALIGNED_ALIGNMENT (t));
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ args.release ();
+ return clone_info;
+}
+
+/* Given a SIMD clone in NODE, calculate the characteristic data
+ type and return the coresponding type. The characteristic data
+ type is computed as described in the Intel Vector ABI. */
+
+static tree
+simd_clone_compute_base_data_type (struct cgraph_node *node,
+ struct cgraph_simd_clone *clone_info)
+{
+ tree type = integer_type_node;
+ tree fndecl = node->decl;
+
+ /* a) For non-void function, the characteristic data type is the
+ return type. */
+ if (TREE_CODE (TREE_TYPE (TREE_TYPE (fndecl))) != VOID_TYPE)
+ type = TREE_TYPE (TREE_TYPE (fndecl));
+
+ /* b) If the function has any non-uniform, non-linear parameters,
+ then the characteristic data type is the type of the first
+ such parameter. */
+ else
+ {
+ vec<tree> map = simd_clone_vector_of_formal_parm_types (fndecl);
+ for (unsigned int i = 0; i < clone_info->nargs; ++i)
+ if (clone_info->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
+ {
+ type = map[i];
+ break;
+ }
+ map.release ();
+ }
+
+ /* c) If the characteristic data type determined by a) or b) above
+ is struct, union, or class type which is pass-by-value (except
+ for the type that maps to the built-in complex data type), the
+ characteristic data type is int. */
+ if (RECORD_OR_UNION_TYPE_P (type)
+ && !aggregate_value_p (type, NULL)
+ && TREE_CODE (type) != COMPLEX_TYPE)
+ return integer_type_node;
+
+ /* d) If none of the above three classes is applicable, the
+ characteristic data type is int. */
+
+ return type;
+
+ /* e) For Intel Xeon Phi native and offload compilation, if the
+ resulting characteristic data type is 8-bit or 16-bit integer
+ data type, the characteristic data type is int. */
+ /* Well, we don't handle Xeon Phi yet. */
+}
+
+static tree
+simd_clone_mangle (struct cgraph_node *node,
+ struct cgraph_simd_clone *clone_info)
+{
+ char vecsize_mangle = clone_info->vecsize_mangle;
+ char mask = clone_info->inbranch ? 'M' : 'N';
+ unsigned int simdlen = clone_info->simdlen;
+ unsigned int n;
+ pretty_printer pp;
+
+ gcc_assert (vecsize_mangle && simdlen);
+
+ pp_string (&pp, "_ZGV");
+ pp_character (&pp, vecsize_mangle);
+ pp_character (&pp, mask);
+ pp_decimal_int (&pp, simdlen);
+
+ for (n = 0; n < clone_info->nargs; ++n)
+ {
+ struct cgraph_simd_clone_arg arg = clone_info->args[n];
+
+ if (arg.arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
+ pp_character (&pp, 'u');
+ else if (arg.arg_type == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
+ {
+ gcc_assert (arg.linear_step != 0);
+ pp_character (&pp, 'l');
+ if (arg.linear_step > 1)
+ pp_unsigned_wide_integer (&pp, arg.linear_step);
+ else if (arg.linear_step < 0)
+ {
+ pp_character (&pp, 'n');
+ pp_unsigned_wide_integer (&pp, (-(unsigned HOST_WIDE_INT)
+ arg.linear_step));
+ }
+ }
+ else if (arg.arg_type == SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP)
+ {
+ pp_character (&pp, 's');
+ pp_unsigned_wide_integer (&pp, arg.linear_step);
+ }
+ else
+ pp_character (&pp, 'v');
+ if (arg.alignment)
+ {
+ pp_character (&pp, 'a');
+ pp_decimal_int (&pp, arg.alignment);
+ }
+ }
+
+ pp_underscore (&pp);
+ pp_string (&pp,
+ IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl)));
+ const char *str = pp_formatted_text (&pp);
+
+ /* If there already is a SIMD clone with the same mangled name, don't
+ add another one. This can happen e.g. for
+ #pragma omp declare simd
+ #pragma omp declare simd simdlen(8)
+ int foo (int, int);
+ if the simdlen is assumed to be 8 for the first one, etc. */
+ for (struct cgraph_node *clone = node->simd_clones; clone;
+ clone = clone->simdclone->next_clone)
+ if (strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (clone->decl)),
+ str) == 0)
+ return NULL_TREE;
+
+ return get_identifier (str);
+}
+
+/* Create a simd clone of OLD_NODE and return it. */
+
+static struct cgraph_node *
+simd_clone_create (struct cgraph_node *old_node)
+{
+ struct cgraph_node *new_node;
+ if (old_node->definition)
+ {
+ if (!cgraph_function_with_gimple_body_p (old_node))
+ return NULL;
+ cgraph_get_body (old_node);
+ new_node = cgraph_function_versioning (old_node, vNULL, NULL, NULL,
+ false, NULL, NULL, "simdclone");
+ }
+ else
+ {
+ tree old_decl = old_node->decl;
+ tree new_decl = copy_node (old_node->decl);
+ DECL_NAME (new_decl) = clone_function_name (old_decl, "simdclone");
+ SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
+ SET_DECL_RTL (new_decl, NULL);
+ DECL_STATIC_CONSTRUCTOR (new_decl) = 0;
+ DECL_STATIC_DESTRUCTOR (new_decl) = 0;
+ new_node
+ = cgraph_copy_node_for_versioning (old_node, new_decl, vNULL, NULL);
+ cgraph_call_function_insertion_hooks (new_node);
+ }
+ if (new_node == NULL)
+ return new_node;
+
+ TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl);
+
+ /* The function cgraph_function_versioning () will force the new
+ symbol local. Undo this, and inherit external visability from
+ the old node. */
+ new_node->local.local = old_node->local.local;
+ new_node->externally_visible = old_node->externally_visible;
+
+ return new_node;
+}
+
+/* Adjust the return type of the given function to its appropriate
+ vector counterpart. Returns a simd array to be used throughout the
+ function as a return value. */
+
+static tree
+simd_clone_adjust_return_type (struct cgraph_node *node)
+{
+ tree fndecl = node->decl;
+ tree orig_rettype = TREE_TYPE (TREE_TYPE (fndecl));
+ unsigned int veclen;
+ tree t;
+
+ /* Adjust the function return type. */
+ if (orig_rettype == void_type_node)
+ return NULL_TREE;
+ TREE_TYPE (fndecl) = build_distinct_type_copy (TREE_TYPE (fndecl));
+ if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (fndecl)))
+ || POINTER_TYPE_P (TREE_TYPE (TREE_TYPE (fndecl))))
+ veclen = node->simdclone->vecsize_int;
+ else
+ veclen = node->simdclone->vecsize_float;
+ veclen /= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl))));
+ if (veclen > node->simdclone->simdlen)
+ veclen = node->simdclone->simdlen;
+ if (veclen == node->simdclone->simdlen)
+ TREE_TYPE (TREE_TYPE (fndecl))
+ = build_vector_type (TREE_TYPE (TREE_TYPE (fndecl)),
+ node->simdclone->simdlen);
+ else
+ {
+ t = build_vector_type (TREE_TYPE (TREE_TYPE (fndecl)), veclen);
+ t = build_array_type_nelts (t, node->simdclone->simdlen / veclen);
+ TREE_TYPE (TREE_TYPE (fndecl)) = t;
+ }
+ if (!node->definition)
+ return NULL_TREE;
+
+ t = DECL_RESULT (fndecl);
+ /* Adjust the DECL_RESULT. */
+ gcc_assert (TREE_TYPE (t) != void_type_node);
+ TREE_TYPE (t) = TREE_TYPE (TREE_TYPE (fndecl));
+ relayout_decl (t);
+
+ tree atype = build_array_type_nelts (orig_rettype,
+ node->simdclone->simdlen);
+ if (veclen != node->simdclone->simdlen)
+ return build1 (VIEW_CONVERT_EXPR, atype, t);
+
+ /* Set up a SIMD array to use as the return value. */
+ tree retval = create_tmp_var_raw (atype, "retval");
+ gimple_add_tmp_var (retval);
+ return retval;
+}
+
+/* Each vector argument has a corresponding array to be used locally
+ as part of the eventual loop. Create such temporary array and
+ return it.
+
+ PREFIX is the prefix to be used for the temporary.
+
+ TYPE is the inner element type.
+
+ SIMDLEN is the number of elements. */
+
+static tree
+create_tmp_simd_array (const char *prefix, tree type, int simdlen)
+{
+ tree atype = build_array_type_nelts (type, simdlen);
+ tree avar = create_tmp_var_raw (atype, prefix);
+ gimple_add_tmp_var (avar);
+ return avar;
+}
+
+/* Modify the function argument types to their corresponding vector
+ counterparts if appropriate. Also, create one array for each simd
+ argument to be used locally when using the function arguments as
+ part of the loop.
+
+ NODE is the function whose arguments are to be adjusted.
+
+ Returns an adjustment vector that will be filled describing how the
+ argument types will be adjusted. */
+
+static ipa_parm_adjustment_vec
+simd_clone_adjust_argument_types (struct cgraph_node *node)
+{
+ vec<tree> args;
+ ipa_parm_adjustment_vec adjustments;
+
+ if (node->definition)
+ args = ipa_get_vector_of_formal_parms (node->decl);
+ else
+ args = simd_clone_vector_of_formal_parm_types (node->decl);
+ adjustments.create (args.length ());
+ unsigned i, j, veclen;
+ struct ipa_parm_adjustment adj;
+ for (i = 0; i < node->simdclone->nargs; ++i)
+ {
+ memset (&adj, 0, sizeof (adj));
+ tree parm = args[i];
+ tree parm_type = node->definition ? TREE_TYPE (parm) : parm;
+ adj.base_index = i;
+ adj.base = parm;
+
+ node->simdclone->args[i].orig_arg = node->definition ? parm : NULL_TREE;
+ node->simdclone->args[i].orig_type = parm_type;
+
+ if (node->simdclone->args[i].arg_type != SIMD_CLONE_ARG_TYPE_VECTOR)
+ {
+ /* No adjustment necessary for scalar arguments. */
+ adj.op = IPA_PARM_OP_COPY;
+ }
+ else
+ {
+ if (INTEGRAL_TYPE_P (parm_type) || POINTER_TYPE_P (parm_type))
+ veclen = node->simdclone->vecsize_int;
+ else
+ veclen = node->simdclone->vecsize_float;
+ veclen /= GET_MODE_BITSIZE (TYPE_MODE (parm_type));
+ if (veclen > node->simdclone->simdlen)
+ veclen = node->simdclone->simdlen;
+ adj.arg_prefix = "simd";
+ adj.type = build_vector_type (parm_type, veclen);
+ node->simdclone->args[i].vector_type = adj.type;
+ for (j = veclen; j < node->simdclone->simdlen; j += veclen)
+ {
+ adjustments.safe_push (adj);
+ if (j == veclen)
+ {
+ memset (&adj, 0, sizeof (adj));
+ adj.op = IPA_PARM_OP_NEW;
+ adj.arg_prefix = "simd";
+ adj.base_index = i;
+ adj.type = node->simdclone->args[i].vector_type;
+ }
+ }
+
+ if (node->definition)
+ node->simdclone->args[i].simd_array
+ = create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)),
+ parm_type, node->simdclone->simdlen);
+ }
+ adjustments.safe_push (adj);
+ }
+
+ if (node->simdclone->inbranch)
+ {
+ tree base_type
+ = simd_clone_compute_base_data_type (node->simdclone->origin,
+ node->simdclone);
+
+ memset (&adj, 0, sizeof (adj));
+ adj.op = IPA_PARM_OP_NEW;
+ adj.arg_prefix = "mask";
+
+ adj.base_index = i;
+ if (INTEGRAL_TYPE_P (base_type) || POINTER_TYPE_P (base_type))
+ veclen = node->simdclone->vecsize_int;
+ else
+ veclen = node->simdclone->vecsize_float;
+ veclen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
+ if (veclen > node->simdclone->simdlen)
+ veclen = node->simdclone->simdlen;
+ adj.type = build_vector_type (base_type, veclen);
+ adjustments.safe_push (adj);
+
+ for (j = veclen; j < node->simdclone->simdlen; j += veclen)
+ adjustments.safe_push (adj);
+
+ /* We have previously allocated one extra entry for the mask. Use
+ it and fill it. */
+ struct cgraph_simd_clone *sc = node->simdclone;
+ sc->nargs++;
+ if (node->definition)
+ {
+ sc->args[i].orig_arg
+ = build_decl (UNKNOWN_LOCATION, PARM_DECL, NULL, base_type);
+ sc->args[i].simd_array
+ = create_tmp_simd_array ("mask", base_type, sc->simdlen);
+ }
+ sc->args[i].orig_type = base_type;
+ sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK;
+ }
+
+ if (node->definition)
+ ipa_modify_formal_parameters (node->decl, adjustments);
+ else
+ {
+ tree new_arg_types = NULL_TREE, new_reversed;
+ bool last_parm_void = false;
+ if (args.length () > 0 && args.last () == void_type_node)
+ last_parm_void = true;
+
+ gcc_assert (TYPE_ARG_TYPES (TREE_TYPE (node->decl)));
+ j = adjustments.length ();
+ for (i = 0; i < j; i++)
+ {
+ struct ipa_parm_adjustment *adj = &adjustments[i];
+ tree ptype;
+ if (adj->op == IPA_PARM_OP_COPY)
+ ptype = args[adj->base_index];
+ else
+ ptype = adj->type;
+ new_arg_types = tree_cons (NULL_TREE, ptype, new_arg_types);
+ }
+ new_reversed = nreverse (new_arg_types);
+ if (last_parm_void)
+ {
+ if (new_reversed)
+ TREE_CHAIN (new_arg_types) = void_list_node;
+ else
+ new_reversed = void_list_node;
+ }
+
+ tree new_type = build_distinct_type_copy (TREE_TYPE (node->decl));
+ TYPE_ARG_TYPES (new_type) = new_reversed;
+ TREE_TYPE (node->decl) = new_type;
+
+ adjustments.release ();
+ }
+ args.release ();
+ return adjustments;
+}
+
+/* Initialize and copy the function arguments in NODE to their
+ corresponding local simd arrays. Returns a fresh gimple_seq with
+ the instruction sequence generated. */
+
+static gimple_seq
+simd_clone_init_simd_arrays (struct cgraph_node *node,
+ ipa_parm_adjustment_vec adjustments)
+{
+ gimple_seq seq = NULL;
+ unsigned i = 0, j = 0, k;
+
+ for (tree arg = DECL_ARGUMENTS (node->decl);
+ arg;
+ arg = DECL_CHAIN (arg), i++, j++)
+ {
+ if (adjustments[j].op == IPA_PARM_OP_COPY)
+ continue;
+
+ node->simdclone->args[i].vector_arg = arg;
+
+ tree array = node->simdclone->args[i].simd_array;
+ if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg)) == node->simdclone->simdlen)
+ {
+ tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
+ tree ptr = build_fold_addr_expr (array);
+ tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr,
+ build_int_cst (ptype, 0));
+ t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
+ gimplify_and_add (t, &seq);
+ }
+ else
+ {
+ unsigned int simdlen = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg));
+ tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
+ for (k = 0; k < node->simdclone->simdlen; k += simdlen)
+ {
+ tree ptr = build_fold_addr_expr (array);
+ int elemsize;
+ if (k)
+ {
+ arg = DECL_CHAIN (arg);
+ j++;
+ }
+ elemsize
+ = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (arg))));
+ tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr,
+ build_int_cst (ptype, k * elemsize));
+ t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
+ gimplify_and_add (t, &seq);
+ }
+ }
+ }
+ return seq;
+}
+
+/* Callback info for ipa_simd_modify_stmt_ops below. */
+
+struct modify_stmt_info {
+ ipa_parm_adjustment_vec adjustments;
+ gimple stmt;
+ /* True if the parent statement was modified by
+ ipa_simd_modify_stmt_ops. */
+ bool modified;
+};
+
+/* Callback for walk_gimple_op.
+
+ Adjust operands from a given statement as specified in the
+ adjustments vector in the callback data. */
+
+static tree
+ipa_simd_modify_stmt_ops (tree *tp, int *walk_subtrees, void *data)
+{
+ struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
+ if (!SSA_VAR_P (*tp))
+ {
+ /* Make sure we treat subtrees as a RHS. This makes sure that
+ when examining the `*foo' in *foo=x, the `foo' get treated as
+ a use properly. */
+ wi->is_lhs = false;
+ wi->val_only = true;
+ if (TYPE_P (*tp))
+ *walk_subtrees = 0;
+ return NULL_TREE;
+ }
+ struct modify_stmt_info *info = (struct modify_stmt_info *) wi->info;
+ struct ipa_parm_adjustment *cand
+ = ipa_get_adjustment_candidate (&tp, NULL, info->adjustments, true);
+ if (!cand)
+ return NULL_TREE;
+
+ tree t = *tp;
+ tree repl = make_ssa_name (TREE_TYPE (t), NULL);
+
+ gimple stmt;
+ gimple_stmt_iterator gsi = gsi_for_stmt (info->stmt);
+ if (wi->is_lhs)
+ {
+ stmt = gimple_build_assign (unshare_expr (cand->new_decl), repl);
+ gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
+ SSA_NAME_DEF_STMT (repl) = info->stmt;
+ }
+ else
+ {
+ /* You'd think we could skip the extra SSA variable when
+ wi->val_only=true, but we may have `*var' which will get
+ replaced into `*var_array[iter]' and will likely be something
+ not gimple. */
+ stmt = gimple_build_assign (repl, unshare_expr (cand->new_decl));
+ gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+ }
+
+ if (!useless_type_conversion_p (TREE_TYPE (*tp), TREE_TYPE (repl)))
+ {
+ tree vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (*tp), repl);
+ *tp = vce;
+ }
+ else
+ *tp = repl;
+
+ info->modified = true;
+ wi->is_lhs = false;
+ wi->val_only = true;
+ return NULL_TREE;
+}
+
+/* Traverse the function body and perform all modifications as
+ described in ADJUSTMENTS. At function return, ADJUSTMENTS will be
+ modified such that the replacement/reduction value will now be an
+ offset into the corresponding simd_array.
+
+ This function will replace all function argument uses with their
+ corresponding simd array elements, and ajust the return values
+ accordingly. */
+
+static void
+ipa_simd_modify_function_body (struct cgraph_node *node,
+ ipa_parm_adjustment_vec adjustments,
+ tree retval_array, tree iter)
+{
+ basic_block bb;
+ unsigned int i, j;
+
+ /* Re-use the adjustments array, but this time use it to replace
+ every function argument use to an offset into the corresponding
+ simd_array. */
+ for (i = 0, j = 0; i < node->simdclone->nargs; ++i, ++j)
+ {
+ if (!node->simdclone->args[i].vector_arg)
+ continue;
+
+ tree basetype = TREE_TYPE (node->simdclone->args[i].orig_arg);
+ tree vectype = TREE_TYPE (node->simdclone->args[i].vector_arg);
+ adjustments[j].new_decl
+ = build4 (ARRAY_REF,
+ basetype,
+ node->simdclone->args[i].simd_array,
+ iter,
+ NULL_TREE, NULL_TREE);
+ if (adjustments[j].op == IPA_PARM_OP_NONE
+ && TYPE_VECTOR_SUBPARTS (vectype) < node->simdclone->simdlen)
+ j += node->simdclone->simdlen / TYPE_VECTOR_SUBPARTS (vectype) - 1;
+ }
+
+ struct modify_stmt_info info;
+ info.adjustments = adjustments;
+
+ FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
+ {
+ gimple_stmt_iterator gsi;
+
+ gsi = gsi_start_bb (bb);
+ while (!gsi_end_p (gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ info.stmt = stmt;
+ struct walk_stmt_info wi;
+
+ memset (&wi, 0, sizeof (wi));
+ info.modified = false;
+ wi.info = &info;
+ walk_gimple_op (stmt, ipa_simd_modify_stmt_ops, &wi);
+
+ if (gimple_code (stmt) == GIMPLE_RETURN)
+ {
+ tree retval = gimple_return_retval (stmt);
+ if (!retval)
+ {
+ gsi_remove (&gsi, true);
+ continue;
+ }
+
+ /* Replace `return foo' with `retval_array[iter] = foo'. */
+ tree ref = build4 (ARRAY_REF, TREE_TYPE (retval),
+ retval_array, iter, NULL, NULL);
+ stmt = gimple_build_assign (ref, retval);
+ gsi_replace (&gsi, stmt, true);
+ info.modified = true;
+ }
+
+ if (info.modified)
+ {
+ update_stmt (stmt);
+ if (maybe_clean_eh_stmt (stmt))
+ gimple_purge_dead_eh_edges (gimple_bb (stmt));
+ }
+ gsi_next (&gsi);
+ }
+ }
+}
+
+/* Adjust the argument types in NODE to their appropriate vector
+ counterparts. */
+
+static void
+simd_clone_adjust (struct cgraph_node *node)
+{
+ push_cfun (DECL_STRUCT_FUNCTION (node->decl));
+
+ targetm.simd_clone.adjust (node);
+
+ tree retval = simd_clone_adjust_return_type (node);
+ ipa_parm_adjustment_vec adjustments
+ = simd_clone_adjust_argument_types (node);
+
+ push_gimplify_context ();
+
+ gimple_seq seq = simd_clone_init_simd_arrays (node, adjustments);
+
+ /* Adjust all uses of vector arguments accordingly. Adjust all
+ return values accordingly. */
+ tree iter = create_tmp_var (unsigned_type_node, "iter");
+ tree iter1 = make_ssa_name (iter, NULL);
+ tree iter2 = make_ssa_name (iter, NULL);
+ ipa_simd_modify_function_body (node, adjustments, retval, iter1);
+
+ /* Initialize the iteration variable. */
+ basic_block entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+ basic_block body_bb = split_block_after_labels (entry_bb)->dest;
+ gimple_stmt_iterator gsi = gsi_after_labels (entry_bb);
+ /* Insert the SIMD array and iv initialization at function
+ entry. */
+ gsi_insert_seq_before (&gsi, seq, GSI_NEW_STMT);
+
+ pop_gimplify_context (NULL);
+
+ /* Create a new BB right before the original exit BB, to hold the
+ iteration increment and the condition/branch. */
+ basic_block orig_exit = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0)->src;
+ basic_block incr_bb = create_empty_bb (orig_exit);
+ /* The succ of orig_exit was EXIT_BLOCK_PTR_FOR_FN (cfun), with an empty
+ flag. Set it now to be a FALLTHRU_EDGE. */
+ gcc_assert (EDGE_COUNT (orig_exit->succs) == 1);
+ EDGE_SUCC (orig_exit, 0)->flags |= EDGE_FALLTHRU;
+ for (unsigned i = 0;
+ i < EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds); ++i)
+ {
+ edge e = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), i);
+ redirect_edge_succ (e, incr_bb);
+ }
+ edge e = make_edge (incr_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
+ e->probability = REG_BR_PROB_BASE;
+ gsi = gsi_last_bb (incr_bb);
+ gimple g = gimple_build_assign_with_ops (PLUS_EXPR, iter2, iter1,
+ build_int_cst (unsigned_type_node,
+ 1));
+ gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+
+ /* Mostly annotate the loop for the vectorizer (the rest is done below). */
+ struct loop *loop = alloc_loop ();
+ cfun->has_force_vect_loops = true;
+ loop->safelen = node->simdclone->simdlen;
+ loop->force_vect = true;
+ loop->header = body_bb;
+ add_bb_to_loop (incr_bb, loop);
+
+ /* Branch around the body if the mask applies. */
+ if (node->simdclone->inbranch)
+ {
+ gimple_stmt_iterator gsi = gsi_last_bb (loop->header);
+ tree mask_array
+ = node->simdclone->args[node->simdclone->nargs - 1].simd_array;
+ tree mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)), NULL);
+ tree aref = build4 (ARRAY_REF,
+ TREE_TYPE (TREE_TYPE (mask_array)),
+ mask_array, iter1,
+ NULL, NULL);
+ g = gimple_build_assign (mask, aref);
+ gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+ int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (aref)));
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (aref)))
+ {
+ aref = build1 (VIEW_CONVERT_EXPR,
+ build_nonstandard_integer_type (bitsize, 0), mask);
+ mask = make_ssa_name (TREE_TYPE (aref), NULL);
+ g = gimple_build_assign (mask, aref);
+ gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+ }
+
+ g = gimple_build_cond (EQ_EXPR, mask, build_zero_cst (TREE_TYPE (mask)),
+ NULL, NULL);
+ gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+ make_edge (loop->header, incr_bb, EDGE_TRUE_VALUE);
+ FALLTHRU_EDGE (loop->header)->flags = EDGE_FALSE_VALUE;
+ }
+
+ /* Generate the condition. */
+ g = gimple_build_cond (LT_EXPR,
+ iter2,
+ build_int_cst (unsigned_type_node,
+ node->simdclone->simdlen),
+ NULL, NULL);
+ gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+ e = split_block (incr_bb, gsi_stmt (gsi));
+ basic_block latch_bb = e->dest;
+ basic_block new_exit_bb = e->dest;
+ new_exit_bb = split_block (latch_bb, NULL)->dest;
+ loop->latch = latch_bb;
+
+ redirect_edge_succ (FALLTHRU_EDGE (latch_bb), body_bb);
+
+ make_edge (incr_bb, new_exit_bb, EDGE_FALSE_VALUE);
+ /* The successor of incr_bb is already pointing to latch_bb; just
+ change the flags.
+ make_edge (incr_bb, latch_bb, EDGE_TRUE_VALUE); */
+ FALLTHRU_EDGE (incr_bb)->flags = EDGE_TRUE_VALUE;
+
+ gimple phi = create_phi_node (iter1, body_bb);
+ edge preheader_edge = find_edge (entry_bb, body_bb);
+ edge latch_edge = single_succ_edge (latch_bb);
+ add_phi_arg (phi, build_zero_cst (unsigned_type_node), preheader_edge,
+ UNKNOWN_LOCATION);
+ add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
+
+ /* Generate the new return. */
+ gsi = gsi_last_bb (new_exit_bb);
+ if (retval
+ && TREE_CODE (retval) == VIEW_CONVERT_EXPR
+ && TREE_CODE (TREE_OPERAND (retval, 0)) == RESULT_DECL)
+ retval = TREE_OPERAND (retval, 0);
+ else if (retval)
+ {
+ retval = build1 (VIEW_CONVERT_EXPR,
+ TREE_TYPE (TREE_TYPE (node->decl)),
+ retval);
+ retval = force_gimple_operand_gsi (&gsi, retval, true, NULL,
+ false, GSI_CONTINUE_LINKING);
+ }
+ g = gimple_build_return (retval);
+ gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
+
+ /* Handle aligned clauses by replacing default defs of the aligned
+ uniform args with __builtin_assume_aligned (arg_N(D), alignment)
+ lhs. Handle linear by adding PHIs. */
+ for (unsigned i = 0; i < node->simdclone->nargs; i++)
+ if (node->simdclone->args[i].alignment
+ && node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM
+ && (node->simdclone->args[i].alignment
+ & (node->simdclone->args[i].alignment - 1)) == 0
+ && TREE_CODE (TREE_TYPE (node->simdclone->args[i].orig_arg))
+ == POINTER_TYPE)
+ {
+ unsigned int alignment = node->simdclone->args[i].alignment;
+ tree orig_arg = node->simdclone->args[i].orig_arg;
+ tree def = ssa_default_def (cfun, orig_arg);
+ if (def && !has_zero_uses (def))
+ {
+ tree fn = builtin_decl_explicit (BUILT_IN_ASSUME_ALIGNED);
+ gimple_seq seq = NULL;
+ bool need_cvt = false;
+ gimple call
+ = gimple_build_call (fn, 2, def, size_int (alignment));
+ g = call;
+ if (!useless_type_conversion_p (TREE_TYPE (orig_arg),
+ ptr_type_node))
+ need_cvt = true;
+ tree t = make_ssa_name (need_cvt ? ptr_type_node : orig_arg, NULL);
+ gimple_call_set_lhs (g, t);
+ gimple_seq_add_stmt_without_update (&seq, g);
+ if (need_cvt)
+ {
+ t = make_ssa_name (orig_arg, NULL);
+ g = gimple_build_assign_with_ops (NOP_EXPR, t,
+ gimple_call_lhs (g),
+ NULL_TREE);
+ gimple_seq_add_stmt_without_update (&seq, g);
+ }
+ gsi_insert_seq_on_edge_immediate
+ (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)), seq);
+
+ entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+ int freq = compute_call_stmt_bb_frequency (current_function_decl,
+ entry_bb);
+ cgraph_create_edge (node, cgraph_get_create_node (fn),
+ call, entry_bb->count, freq);
+
+ imm_use_iterator iter;
+ use_operand_p use_p;
+ gimple use_stmt;
+ tree repl = gimple_get_lhs (g);
+ FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
+ if (is_gimple_debug (use_stmt) || use_stmt == call)
+ continue;
+ else
+ FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+ SET_USE (use_p, repl);
+ }
+ }
+ else if (node->simdclone->args[i].arg_type
+ == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
+ {
+ tree orig_arg = node->simdclone->args[i].orig_arg;
+ tree def = ssa_default_def (cfun, orig_arg);
+ gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
+ || POINTER_TYPE_P (TREE_TYPE (orig_arg)));
+ if (def && !has_zero_uses (def))
+ {
+ iter1 = make_ssa_name (orig_arg, NULL);
+ iter2 = make_ssa_name (orig_arg, NULL);
+ phi = create_phi_node (iter1, body_bb);
+ add_phi_arg (phi, def, preheader_edge, UNKNOWN_LOCATION);
+ add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
+ enum tree_code code = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
+ ? PLUS_EXPR : POINTER_PLUS_EXPR;
+ tree addtype = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
+ ? TREE_TYPE (orig_arg) : sizetype;
+ tree addcst
+ = build_int_cst (addtype, node->simdclone->args[i].linear_step);
+ g = gimple_build_assign_with_ops (code, iter2, iter1, addcst);
+ gsi = gsi_last_bb (incr_bb);
+ gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+
+ imm_use_iterator iter;
+ use_operand_p use_p;
+ gimple use_stmt;
+ FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
+ if (use_stmt == phi)
+ continue;
+ else
+ FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+ SET_USE (use_p, iter1);
+ }
+ }
+
+ calculate_dominance_info (CDI_DOMINATORS);
+ add_loop (loop, loop->header->loop_father);
+ update_ssa (TODO_update_ssa);
+
+ pop_cfun ();
+}
+
+/* If the function in NODE is tagged as an elemental SIMD function,
+ create the appropriate SIMD clones. */
+
+static void
+expand_simd_clones (struct cgraph_node *node)
+{
+ tree attr = lookup_attribute ("omp declare simd",
+ DECL_ATTRIBUTES (node->decl));
+ if (attr == NULL_TREE
+ || node->global.inlined_to
+ || lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl)))
+ return;
+
+ /* Ignore
+ #pragma omp declare simd
+ extern int foo ();
+ in C, there we don't know the argument types at all. */
+ if (!node->definition
+ && TYPE_ARG_TYPES (TREE_TYPE (node->decl)) == NULL_TREE)
+ return;
+
+ do
+ {
+ /* Start with parsing the "omp declare simd" attribute(s). */
+ bool inbranch_clause_specified;
+ struct cgraph_simd_clone *clone_info
+ = simd_clone_clauses_extract (node, TREE_VALUE (attr),
+ &inbranch_clause_specified);
+ if (clone_info == NULL)
+ continue;
+
+ int orig_simdlen = clone_info->simdlen;
+ tree base_type = simd_clone_compute_base_data_type (node, clone_info);
+ /* The target can return 0 (no simd clones should be created),
+ 1 (just one ISA of simd clones should be created) or higher
+ count of ISA variants. In that case, clone_info is initialized
+ for the first ISA variant. */
+ int count
+ = targetm.simd_clone.compute_vecsize_and_simdlen (node, clone_info,
+ base_type, 0);
+ if (count == 0)
+ continue;
+
+ /* Loop over all COUNT ISA variants, and if !INBRANCH_CLAUSE_SPECIFIED,
+ also create one inbranch and one !inbranch clone of it. */
+ for (int i = 0; i < count * 2; i++)
+ {
+ struct cgraph_simd_clone *clone = clone_info;
+ if (inbranch_clause_specified && (i & 1) != 0)
+ continue;
+
+ if (i != 0)
+ {
+ clone = simd_clone_struct_alloc (clone_info->nargs
+ + ((i & 1) != 0));
+ simd_clone_struct_copy (clone, clone_info);
+ /* Undo changes targetm.simd_clone.compute_vecsize_and_simdlen
+ and simd_clone_adjust_argument_types did to the first
+ clone's info. */
+ clone->nargs -= clone_info->inbranch;
+ clone->simdlen = orig_simdlen;
+ /* And call the target hook again to get the right ISA. */
+ targetm.simd_clone.compute_vecsize_and_simdlen (node, clone,
+ base_type,
+ i / 2);
+ if ((i & 1) != 0)
+ clone->inbranch = 1;
+ }
+
+ /* simd_clone_mangle might fail if such a clone has been created
+ already. */
+ tree id = simd_clone_mangle (node, clone);
+ if (id == NULL_TREE)
+ continue;
+
+ /* Only when we are sure we want to create the clone actually
+ clone the function (or definitions) or create another
+ extern FUNCTION_DECL (for prototypes without definitions). */
+ struct cgraph_node *n = simd_clone_create (node);
+ if (n == NULL)
+ continue;
+
+ n->simdclone = clone;
+ clone->origin = node;
+ clone->next_clone = NULL;
+ if (node->simd_clones == NULL)
+ {
+ clone->prev_clone = n;
+ node->simd_clones = n;
+ }
+ else
+ {
+ clone->prev_clone = node->simd_clones->simdclone->prev_clone;
+ clone->prev_clone->simdclone->next_clone = n;
+ node->simd_clones->simdclone->prev_clone = n;
+ }
+ change_decl_assembler_name (n->decl, id);
+ /* And finally adjust the return type, parameters and for
+ definitions also function body. */
+ if (node->definition)
+ simd_clone_adjust (n);
+ else
+ {
+ simd_clone_adjust_return_type (n);
+ simd_clone_adjust_argument_types (n);
+ }
+ }
+ }
+ while ((attr = lookup_attribute ("omp declare simd", TREE_CHAIN (attr))));
+}
+
+/* Entry point for IPA simd clone creation pass. */
+
+static unsigned int
+ipa_omp_simd_clone (void)
+{
+ struct cgraph_node *node;
+ FOR_EACH_FUNCTION (node)
+ expand_simd_clones (node);
+ return 0;
+}
+
+namespace {
+
+const pass_data pass_data_omp_simd_clone =
+{
+ SIMPLE_IPA_PASS, /* type */
+ "simdclone", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ true, /* has_gate */
+ true, /* has_execute */
+ TV_NONE, /* tv_id */
+ ( PROP_ssa | PROP_cfg ), /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_omp_simd_clone : public simple_ipa_opt_pass
+{
+public:
+ pass_omp_simd_clone(gcc::context *ctxt)
+ : simple_ipa_opt_pass(pass_data_omp_simd_clone, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ bool gate () { return ((flag_openmp || flag_openmp_simd
+ || flag_cilkplus || (in_lto_p && !flag_wpa))
+ && (targetm.simd_clone.compute_vecsize_and_simdlen
+ != NULL)); }
+ unsigned int execute () { return ipa_omp_simd_clone (); }
+};
+
+} // anon namespace
+
+simple_ipa_opt_pass *
+make_pass_omp_simd_clone (gcc::context *ctxt)
+{
+ return new pass_omp_simd_clone (ctxt);
+}
#include "gt-omp-low.h"