From 9669b00bfb16ced0d5bf09b9e016e9ffa8be4219 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Tue, 22 Nov 2016 19:57:29 +0300 Subject: [PATCH] OpenMP offloading to NVPTX: middle-end changes * internal-fn.c (expand_GOMP_SIMT_LANE): New. (expand_GOMP_SIMT_VF): New. (expand_GOMP_SIMT_LAST_LANE): New. (expand_GOMP_SIMT_ORDERED_PRED): New. (expand_GOMP_SIMT_VOTE_ANY): New. (expand_GOMP_SIMT_XCHG_BFLY): New. (expand_GOMP_SIMT_XCHG_IDX): New. * internal-fn.def (GOMP_SIMT_LANE): New. (GOMP_SIMT_VF): New. (GOMP_SIMT_LAST_LANE): New. (GOMP_SIMT_ORDERED_PRED): New. (GOMP_SIMT_VOTE_ANY): New. (GOMP_SIMT_XCHG_BFLY): New. (GOMP_SIMT_XCHG_IDX): New. * omp-low.c (omp_maybe_offloaded_ctx): New, outlined from... (create_omp_child_function): ...here. Set "omp target entrypoint" or "omp declare target" attribute based on is_gimple_omp_offloaded. (omp_max_simt_vf): New. Use it... (omp_max_vf): ...here. (lower_rec_input_clauses): Add reduction lowering for SIMT execution. (lower_lastprivate_clauses): Likewise, for "lastprivate" lowering. (lower_omp_ordered): Likewise, for "ordered" lowering. (expand_omp_simd): Add SIMT transforms. (pass_data_lower_omp): Add PROP_gimple_lomp_dev. (execute_omp_device_lower): New. (pass_data_omp_device_lower): New. (pass_omp_device_lower): New pass. (make_pass_omp_device_lower): New. * passes.def (pass_omp_device_lower): Position new pass. * tree-pass.h (PROP_gimple_lomp_dev): Define. (make_pass_omp_device_lower): Declare. From-SVN: r242710 --- gcc/ChangeLog | 34 ++++ gcc/internal-fn.c | 126 +++++++++++++ gcc/internal-fn.def | 7 + gcc/omp-low.c | 448 +++++++++++++++++++++++++++++++++++++------- gcc/passes.def | 1 + gcc/tree-pass.h | 2 + 6 files changed, 554 insertions(+), 64 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 179a26dd36b..8ee26220090 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,37 @@ +2016-11-22 Alexander Monakov + + * internal-fn.c (expand_GOMP_SIMT_LANE): New. + (expand_GOMP_SIMT_VF): New. + (expand_GOMP_SIMT_LAST_LANE): New. + (expand_GOMP_SIMT_ORDERED_PRED): New. + (expand_GOMP_SIMT_VOTE_ANY): New. + (expand_GOMP_SIMT_XCHG_BFLY): New. + (expand_GOMP_SIMT_XCHG_IDX): New. + * internal-fn.def (GOMP_SIMT_LANE): New. + (GOMP_SIMT_VF): New. + (GOMP_SIMT_LAST_LANE): New. + (GOMP_SIMT_ORDERED_PRED): New. + (GOMP_SIMT_VOTE_ANY): New. + (GOMP_SIMT_XCHG_BFLY): New. + (GOMP_SIMT_XCHG_IDX): New. + * omp-low.c (omp_maybe_offloaded_ctx): New, outlined from... + (create_omp_child_function): ...here. Set "omp target entrypoint" + or "omp declare target" attribute based on is_gimple_omp_offloaded. + (omp_max_simt_vf): New. Use it... + (omp_max_vf): ...here. + (lower_rec_input_clauses): Add reduction lowering for SIMT execution. + (lower_lastprivate_clauses): Likewise, for "lastprivate" lowering. + (lower_omp_ordered): Likewise, for "ordered" lowering. + (expand_omp_simd): Add SIMT transforms. + (pass_data_lower_omp): Add PROP_gimple_lomp_dev. + (execute_omp_device_lower): New. + (pass_data_omp_device_lower): New. + (pass_omp_device_lower): New pass. + (make_pass_omp_device_lower): New. + * passes.def (pass_omp_device_lower): Position new pass. + * tree-pass.h (PROP_gimple_lomp_dev): Define. + (make_pass_omp_device_lower): Declare. + 2016-11-22 Jakub Jelinek PR target/78451 diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index ca347c59cfb..6cd8522d7c4 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -158,6 +158,132 @@ expand_ANNOTATE (internal_fn, gcall *) gcc_unreachable (); } +/* Lane index on SIMT targets: thread index in the warp on NVPTX. On targets + without SIMT execution this should be expanded in omp_device_lower pass. */ + +static void +expand_GOMP_SIMT_LANE (internal_fn, gcall *stmt) +{ + tree lhs = gimple_call_lhs (stmt); + if (!lhs) + return; + + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + gcc_assert (targetm.have_omp_simt_lane ()); + emit_insn (targetm.gen_omp_simt_lane (target)); +} + +/* This should get expanded in omp_device_lower pass. */ + +static void +expand_GOMP_SIMT_VF (internal_fn, gcall *) +{ + gcc_unreachable (); +} + +/* Lane index of the first SIMT lane that supplies a non-zero argument. + This is a SIMT counterpart to GOMP_SIMD_LAST_LANE, used to represent the + lane that executed the last iteration for handling OpenMP lastprivate. */ + +static void +expand_GOMP_SIMT_LAST_LANE (internal_fn, gcall *stmt) +{ + tree lhs = gimple_call_lhs (stmt); + if (!lhs) + return; + + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + rtx cond = expand_normal (gimple_call_arg (stmt, 0)); + machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); + struct expand_operand ops[2]; + create_output_operand (&ops[0], target, mode); + create_input_operand (&ops[1], cond, mode); + gcc_assert (targetm.have_omp_simt_last_lane ()); + expand_insn (targetm.code_for_omp_simt_last_lane, 2, ops); +} + +/* Non-transparent predicate used in SIMT lowering of OpenMP "ordered". */ + +static void +expand_GOMP_SIMT_ORDERED_PRED (internal_fn, gcall *stmt) +{ + tree lhs = gimple_call_lhs (stmt); + if (!lhs) + return; + + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + rtx ctr = expand_normal (gimple_call_arg (stmt, 0)); + machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); + struct expand_operand ops[2]; + create_output_operand (&ops[0], target, mode); + create_input_operand (&ops[1], ctr, mode); + gcc_assert (targetm.have_omp_simt_ordered ()); + expand_insn (targetm.code_for_omp_simt_ordered, 2, ops); +} + +/* "Or" boolean reduction across SIMT lanes: return non-zero in all lanes if + any lane supplies a non-zero argument. */ + +static void +expand_GOMP_SIMT_VOTE_ANY (internal_fn, gcall *stmt) +{ + tree lhs = gimple_call_lhs (stmt); + if (!lhs) + return; + + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + rtx cond = expand_normal (gimple_call_arg (stmt, 0)); + machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); + struct expand_operand ops[2]; + create_output_operand (&ops[0], target, mode); + create_input_operand (&ops[1], cond, mode); + gcc_assert (targetm.have_omp_simt_vote_any ()); + expand_insn (targetm.code_for_omp_simt_vote_any, 2, ops); +} + +/* Exchange between SIMT lanes with a "butterfly" pattern: source lane index + is destination lane index XOR given offset. */ + +static void +expand_GOMP_SIMT_XCHG_BFLY (internal_fn, gcall *stmt) +{ + tree lhs = gimple_call_lhs (stmt); + if (!lhs) + return; + + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + rtx src = expand_normal (gimple_call_arg (stmt, 0)); + rtx idx = expand_normal (gimple_call_arg (stmt, 1)); + machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); + struct expand_operand ops[3]; + create_output_operand (&ops[0], target, mode); + create_input_operand (&ops[1], src, mode); + create_input_operand (&ops[2], idx, SImode); + gcc_assert (targetm.have_omp_simt_xchg_bfly ()); + expand_insn (targetm.code_for_omp_simt_xchg_bfly, 3, ops); +} + +/* Exchange between SIMT lanes according to given source lane index. */ + +static void +expand_GOMP_SIMT_XCHG_IDX (internal_fn, gcall *stmt) +{ + tree lhs = gimple_call_lhs (stmt); + if (!lhs) + return; + + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + rtx src = expand_normal (gimple_call_arg (stmt, 0)); + rtx idx = expand_normal (gimple_call_arg (stmt, 1)); + machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); + struct expand_operand ops[3]; + create_output_operand (&ops[0], target, mode); + create_input_operand (&ops[1], src, mode); + create_input_operand (&ops[2], idx, SImode); + gcc_assert (targetm.have_omp_simt_xchg_idx ()); + expand_insn (targetm.code_for_omp_simt_xchg_idx, 3, ops); +} + /* This should get expanded in adjust_simduid_builtins. */ static void diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index d1cd1a55b8f..f055230f722 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -141,6 +141,13 @@ DEF_INTERNAL_INT_FN (FFS, ECF_CONST, ffs, unary) DEF_INTERNAL_INT_FN (PARITY, ECF_CONST, parity, unary) DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST, popcount, unary) +DEF_INTERNAL_FN (GOMP_SIMT_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) +DEF_INTERNAL_FN (GOMP_SIMT_VF, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) +DEF_INTERNAL_FN (GOMP_SIMT_LAST_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) +DEF_INTERNAL_FN (GOMP_SIMT_ORDERED_PRED, ECF_LEAF | ECF_NOTHROW, NULL) +DEF_INTERNAL_FN (GOMP_SIMT_VOTE_ANY, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) +DEF_INTERNAL_FN (GOMP_SIMT_XCHG_BFLY, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) +DEF_INTERNAL_FN (GOMP_SIMT_XCHG_IDX, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (GOMP_SIMD_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (GOMP_SIMD_VF, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (GOMP_SIMD_LAST_LANE, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 7c58c033ded..6c52bff74ba 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -2427,6 +2427,20 @@ cilk_for_check_loop_diff_type (tree type) } } +/* Return true if CTX may belong to offloaded code: either if current function + is offloaded, or any enclosing context corresponds to a target region. */ + +static bool +omp_maybe_offloaded_ctx (omp_context *ctx) +{ + if (cgraph_node::get (current_function_decl)->offloadable) + return true; + for (; ctx; ctx = ctx->outer) + if (is_gimple_omp_offloaded (ctx->stmt)) + return true; + return false; +} + /* Build a decl for the omp child function. It'll not contain a body yet, just the bare decl. */ @@ -2475,28 +2489,24 @@ create_omp_child_function (omp_context *ctx, bool task_copy) DECL_CONTEXT (decl) = NULL_TREE; DECL_INITIAL (decl) = make_node (BLOCK); BLOCK_SUPERCONTEXT (DECL_INITIAL (decl)) = decl; - if (cgraph_node::get (current_function_decl)->offloadable) - cgraph_node::get_create (decl)->offloadable = 1; - else + if (omp_maybe_offloaded_ctx (ctx)) { - omp_context *octx; - for (octx = ctx; octx; octx = octx->outer) - if (is_gimple_omp_offloaded (octx->stmt)) - { - cgraph_node::get_create (decl)->offloadable = 1; - if (ENABLE_OFFLOADING) - g->have_offload = true; - - break; - } + cgraph_node::get_create (decl)->offloadable = 1; + if (ENABLE_OFFLOADING) + g->have_offload = true; } if (cgraph_node::get_create (decl)->offloadable && !lookup_attribute ("omp declare target", DECL_ATTRIBUTES (current_function_decl))) - DECL_ATTRIBUTES (decl) - = tree_cons (get_identifier ("omp target entrypoint"), - NULL_TREE, DECL_ATTRIBUTES (decl)); + { + const char *target_attr = (is_gimple_omp_offloaded (ctx->stmt) + ? "omp target entrypoint" + : "omp declare target"); + DECL_ATTRIBUTES (decl) + = tree_cons (get_identifier (target_attr), + NULL_TREE, DECL_ATTRIBUTES (decl)); + } t = build_decl (DECL_SOURCE_LOCATION (decl), RESULT_DECL, NULL_TREE, void_type_node); @@ -4264,6 +4274,25 @@ omp_clause_aligned_alignment (tree clause) return build_int_cst (integer_type_node, al); } + +/* Return maximum SIMT width if offloading may target SIMT hardware. */ + +static int +omp_max_simt_vf (void) +{ + if (!optimize) + return 0; + if (ENABLE_OFFLOADING) + for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c; ) + { + if (!strncmp (c, "nvptx", strlen ("nvptx"))) + return 32; + else if ((c = strchr (c, ','))) + c++; + } + return 0; +} + /* Return maximum possible vectorization factor for the target. */ static int @@ -4277,16 +4306,18 @@ omp_max_vf (void) || global_options_set.x_flag_tree_vectorize))) return 1; + int vf = 1; int vs = targetm.vectorize.autovectorize_vector_sizes (); if (vs) + vf = 1 << floor_log2 (vs); + else { - vs = 1 << floor_log2 (vs); - return vs; + machine_mode vqimode = targetm.vectorize.preferred_simd_mode (QImode); + if (GET_MODE_CLASS (vqimode) == MODE_VECTOR_INT) + vf = GET_MODE_NUNITS (vqimode); } - machine_mode vqimode = targetm.vectorize.preferred_simd_mode (QImode); - if (GET_MODE_CLASS (vqimode) == MODE_VECTOR_INT) - return GET_MODE_NUNITS (vqimode); - return 1; + int svf = omp_max_simt_vf (); + return MAX (vf, svf); } /* Helper function of lower_rec_input_clauses, used for #pragma omp simd @@ -4374,10 +4405,13 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, int pass; bool is_simd = (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD); + bool maybe_simt + = is_simd && omp_maybe_offloaded_ctx (ctx) && omp_max_simt_vf () > 1; int max_vf = 0; tree lane = NULL_TREE, idx = NULL_TREE; + tree simt_lane = NULL_TREE; tree ivar = NULL_TREE, lvar = NULL_TREE; - gimple_seq llist[2] = { NULL, NULL }; + gimple_seq llist[3] = { }; copyin_seq = NULL; @@ -5251,6 +5285,16 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, gimplify_assign (unshare_expr (ivar), x, &llist[0]); + if (maybe_simt) + { + if (!simt_lane) + simt_lane = create_tmp_var (unsigned_type_node); + x = build_call_expr_internal_loc + (UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_BFLY, + TREE_TYPE (ivar), 2, ivar, simt_lane); + x = build2 (code, TREE_TYPE (ivar), ivar, x); + gimplify_assign (ivar, x, &llist[2]); + } x = build2 (code, TREE_TYPE (ref), ref, ivar); ref = build_outer_var_ref (var, ctx); gimplify_assign (ref, x, &llist[1]); @@ -5303,6 +5347,39 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, g = gimple_build_assign (lane, INTEGER_CST, build_int_cst (unsigned_type_node, 0)); gimple_seq_add_stmt (ilist, g); + /* Emit reductions across SIMT lanes in log_2(simt_vf) steps. */ + if (llist[2]) + { + tree simt_vf = create_tmp_var (unsigned_type_node); + g = gimple_build_call_internal (IFN_GOMP_SIMT_VF, 0); + gimple_call_set_lhs (g, simt_vf); + gimple_seq_add_stmt (dlist, g); + + tree t = build_int_cst (unsigned_type_node, 1); + g = gimple_build_assign (simt_lane, INTEGER_CST, t); + gimple_seq_add_stmt (dlist, g); + + t = build_int_cst (unsigned_type_node, 0); + g = gimple_build_assign (idx, INTEGER_CST, t); + gimple_seq_add_stmt (dlist, g); + + tree body = create_artificial_label (UNKNOWN_LOCATION); + tree header = create_artificial_label (UNKNOWN_LOCATION); + tree end = create_artificial_label (UNKNOWN_LOCATION); + gimple_seq_add_stmt (dlist, gimple_build_goto (header)); + gimple_seq_add_stmt (dlist, gimple_build_label (body)); + + gimple_seq_add_seq (dlist, llist[2]); + + g = gimple_build_assign (simt_lane, LSHIFT_EXPR, simt_lane, integer_one_node); + gimple_seq_add_stmt (dlist, g); + + gimple_seq_add_stmt (dlist, gimple_build_label (header)); + g = gimple_build_cond (LT_EXPR, simt_lane, simt_vf, body, end); + gimple_seq_add_stmt (dlist, g); + + gimple_seq_add_stmt (dlist, gimple_build_label (end)); + } for (int i = 0; i < 2; i++) if (llist[i]) { @@ -5389,7 +5466,7 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list, { tree x, c, label = NULL, orig_clauses = clauses; bool par_clauses = false; - tree simduid = NULL, lastlane = NULL; + tree simduid = NULL, lastlane = NULL, simtcond = NULL, simtlast = NULL; /* Early exit if there are no lastprivate or linear clauses. */ for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses)) @@ -5416,6 +5493,16 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list, par_clauses = true; } + bool maybe_simt = false; + if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR + && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD) + { + maybe_simt = omp_maybe_offloaded_ctx (ctx) && omp_max_simt_vf () > 1; + simduid = find_omp_clause (orig_clauses, OMP_CLAUSE__SIMDUID_); + if (simduid) + simduid = OMP_CLAUSE__SIMDUID__DECL (simduid); + } + if (predicate) { gcond *stmt; @@ -5427,20 +5514,27 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list, arm2 = TREE_OPERAND (predicate, 1); gimplify_expr (&arm1, stmt_list, NULL, is_gimple_val, fb_rvalue); gimplify_expr (&arm2, stmt_list, NULL, is_gimple_val, fb_rvalue); - stmt = gimple_build_cond (TREE_CODE (predicate), arm1, arm2, - label_true, label); + if (maybe_simt) + { + c = build2 (TREE_CODE (predicate), boolean_type_node, arm1, arm2); + c = fold_convert (integer_type_node, c); + simtcond = create_tmp_var (integer_type_node); + gimplify_assign (simtcond, c, stmt_list); + gcall *g = gimple_build_call_internal (IFN_GOMP_SIMT_VOTE_ANY, + 1, simtcond); + c = create_tmp_var (integer_type_node); + gimple_call_set_lhs (g, c); + gimple_seq_add_stmt (stmt_list, g); + stmt = gimple_build_cond (NE_EXPR, c, integer_zero_node, + label_true, label); + } + else + stmt = gimple_build_cond (TREE_CODE (predicate), arm1, arm2, + label_true, label); gimple_seq_add_stmt (stmt_list, stmt); gimple_seq_add_stmt (stmt_list, gimple_build_label (label_true)); } - if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR - && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD) - { - simduid = find_omp_clause (orig_clauses, OMP_CLAUSE__SIMDUID_); - if (simduid) - simduid = OMP_CLAUSE__SIMDUID__DECL (simduid); - } - for (c = clauses; c ;) { tree var, new_var; @@ -5491,6 +5585,24 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list, new_var = build4 (ARRAY_REF, TREE_TYPE (val), TREE_OPERAND (val, 0), lastlane, NULL_TREE, NULL_TREE); + if (maybe_simt) + { + gcall *g; + if (simtlast == NULL) + { + simtlast = create_tmp_var (unsigned_type_node); + g = gimple_build_call_internal + (IFN_GOMP_SIMT_LAST_LANE, 1, simtcond); + gimple_call_set_lhs (g, simtlast); + gimple_seq_add_stmt (stmt_list, g); + } + x = build_call_expr_internal_loc + (UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX, + TREE_TYPE (new_var), 2, new_var, simtlast); + new_var = unshare_expr (new_var); + gimplify_assign (new_var, x, stmt_list); + new_var = unshare_expr (new_var); + } } } @@ -10564,12 +10676,23 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) edge e, ne; tree *counts = NULL; int i; + int safelen_int = INT_MAX; tree safelen = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), OMP_CLAUSE_SAFELEN); tree simduid = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), OMP_CLAUSE__SIMDUID_); tree n1, n2; + if (safelen) + { + safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen); + if (TREE_CODE (safelen) != INTEGER_CST) + safelen_int = 0; + else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX) + safelen_int = tree_to_uhwi (safelen); + if (safelen_int == 1) + safelen_int = 0; + } type = TREE_TYPE (fd->loop.v); entry_bb = region->entry; cont_bb = region->cont; @@ -10623,20 +10746,53 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) OMP_CLAUSE__LOOPTEMP_); gcc_assert (innerc); n2 = OMP_CLAUSE_DECL (innerc); - expand_omp_build_assign (&gsi, fd->loop.v, - fold_convert (type, n1)); + } + tree step = fd->loop.step; + + bool offloaded = cgraph_node::get (current_function_decl)->offloadable; + for (struct omp_region *rgn = region; !offloaded && rgn; rgn = rgn->outer) + offloaded = rgn->type == GIMPLE_OMP_TARGET; + bool is_simt = offloaded && omp_max_simt_vf () > 1 && safelen_int > 1; + tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE; + if (is_simt) + { + cfun->curr_properties &= ~PROP_gimple_lomp_dev; + simt_lane = create_tmp_var (unsigned_type_node); + gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0); + gimple_call_set_lhs (g, simt_lane); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, + fold_convert (TREE_TYPE (step), simt_lane)); + n1 = fold_convert (type, n1); + if (POINTER_TYPE_P (type)) + n1 = fold_build_pointer_plus (n1, offset); + else + n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset)); + + /* Collapsed loops not handled for SIMT yet: limit to one lane only. */ if (fd->collapse > 1) + simt_maxlane = build_one_cst (unsigned_type_node); + else if (safelen_int < omp_max_simt_vf ()) + simt_maxlane = build_int_cst (unsigned_type_node, safelen_int); + tree vf + = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF, + unsigned_type_node, 0); + if (simt_maxlane) + vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane); + vf = fold_convert (TREE_TYPE (step), vf); + step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf); + } + + expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1)); + if (fd->collapse > 1) + { + if (gimple_omp_for_combined_into_p (fd->for_stmt)) { gsi_prev (&gsi); expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1); gsi_next (&gsi); } - } - else - { - expand_omp_build_assign (&gsi, fd->loop.v, - fold_convert (type, fd->loop.n1)); - if (fd->collapse > 1) + else for (i = 0; i < fd->collapse; i++) { tree itype = TREE_TYPE (fd->loops[i].v); @@ -10645,7 +10801,7 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1); expand_omp_build_assign (&gsi, fd->loops[i].v, t); } - } + } /* Remove the GIMPLE_OMP_FOR statement. */ gsi_remove (&gsi, true); @@ -10658,9 +10814,9 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE); if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (fd->loop.v, fd->loop.step); + t = fold_build_pointer_plus (fd->loop.v, step); else - t = fold_build2 (PLUS_EXPR, type, fd->loop.v, fd->loop.step); + t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); expand_omp_build_assign (&gsi, fd->loop.v, t); if (fd->collapse > 1) @@ -10734,6 +10890,18 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) gimple_regimplify_operands (cond_stmt, &gsi); } + /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */ + if (is_simt) + { + gsi = gsi_start_bb (l2_bb); + step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step); + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (fd->loop.v, step); + else + t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); + expand_omp_build_assign (&gsi, fd->loop.v, t); + } + /* Remove GIMPLE_OMP_RETURN. */ gsi = gsi_last_bb (exit_bb); gsi_remove (&gsi, true); @@ -10763,30 +10931,29 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) ne->probability = REG_BR_PROB_BASE / 8; set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb); - set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb); + if (simt_maxlane) + { + cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane, + NULL_TREE, NULL_TREE); + gsi = gsi_last_bb (entry_bb); + gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); + make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE); + FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE; + FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE * 7 / 8; + BRANCH_EDGE (entry_bb)->probability = REG_BR_PROB_BASE / 8; + l2_dom_bb = entry_bb; + } + set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); + if (!broken_loop) { struct loop *loop = alloc_loop (); loop->header = l1_bb; loop->latch = cont_bb; add_loop (loop, l1_bb->loop_father); - if (safelen == NULL_TREE) - loop->safelen = INT_MAX; - else - { - safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen); - if (TREE_CODE (safelen) != INTEGER_CST) - loop->safelen = 0; - else if (!tree_fits_uhwi_p (safelen) - || tree_to_uhwi (safelen) > INT_MAX) - loop->safelen = INT_MAX; - else - loop->safelen = tree_to_uhwi (safelen); - if (loop->safelen == 1) - loop->safelen = 0; - } + loop->safelen = safelen_int; if (simduid) { loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid); @@ -13951,7 +14118,6 @@ expand_omp (struct omp_region *region) } } - /* Helper for build_omp_regions. Scan the dominator tree starting at block BB. PARENT is the region that contains BB. If SINGLE_TREE is true, the function ends once a single tree is built (otherwise, whole @@ -14834,12 +15000,14 @@ static void lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx) { tree block; - gimple *stmt = gsi_stmt (*gsi_p); + gimple *stmt = gsi_stmt (*gsi_p), *g; gomp_ordered *ord_stmt = as_a (stmt); gcall *x; gbind *bind; bool simd = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt), OMP_CLAUSE_SIMD); + bool maybe_simt + = simd && omp_maybe_offloaded_ctx (ctx) && omp_max_simt_vf () > 1; bool threads = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt), OMP_CLAUSE_THREADS); @@ -14873,11 +15041,56 @@ lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx) 0); gimple_bind_add_stmt (bind, x); + tree counter = NULL_TREE, test = NULL_TREE, body = NULL_TREE; + if (maybe_simt) + { + counter = create_tmp_var (integer_type_node); + g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0); + gimple_call_set_lhs (g, counter); + gimple_bind_add_stmt (bind, g); + + body = create_artificial_label (UNKNOWN_LOCATION); + test = create_artificial_label (UNKNOWN_LOCATION); + gimple_bind_add_stmt (bind, gimple_build_label (body)); + + tree simt_pred = create_tmp_var (integer_type_node); + g = gimple_build_call_internal (IFN_GOMP_SIMT_ORDERED_PRED, 1, counter); + gimple_call_set_lhs (g, simt_pred); + gimple_bind_add_stmt (bind, g); + + tree t = create_artificial_label (UNKNOWN_LOCATION); + g = gimple_build_cond (EQ_EXPR, simt_pred, integer_zero_node, t, test); + gimple_bind_add_stmt (bind, g); + + gimple_bind_add_stmt (bind, gimple_build_label (t)); + } lower_omp (gimple_omp_body_ptr (stmt), ctx); gimple_omp_set_body (stmt, maybe_catch_exception (gimple_omp_body (stmt))); gimple_bind_add_seq (bind, gimple_omp_body (stmt)); gimple_omp_set_body (stmt, NULL); + if (maybe_simt) + { + gimple_bind_add_stmt (bind, gimple_build_label (test)); + g = gimple_build_assign (counter, MINUS_EXPR, counter, integer_one_node); + gimple_bind_add_stmt (bind, g); + + tree c = build2 (GE_EXPR, boolean_type_node, counter, integer_zero_node); + tree nonneg = create_tmp_var (integer_type_node); + gimple_seq tseq = NULL; + gimplify_assign (nonneg, fold_convert (integer_type_node, c), &tseq); + gimple_bind_add_seq (bind, tseq); + + g = gimple_build_call_internal (IFN_GOMP_SIMT_VOTE_ANY, 1, nonneg); + gimple_call_set_lhs (g, nonneg); + gimple_bind_add_stmt (bind, g); + + tree end = create_artificial_label (UNKNOWN_LOCATION); + g = gimple_build_cond (NE_EXPR, nonneg, integer_zero_node, body, end); + gimple_bind_add_stmt (bind, g); + + gimple_bind_add_stmt (bind, gimple_build_label (end)); + } if (simd) x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_END, 1, build_int_cst (NULL_TREE, threads)); @@ -17998,7 +18211,7 @@ const pass_data pass_data_lower_omp = OPTGROUP_NONE, /* optinfo_flags */ TV_NONE, /* tv_id */ PROP_gimple_any, /* properties_required */ - PROP_gimple_lomp, /* properties_provided */ + PROP_gimple_lomp | PROP_gimple_lomp_dev, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ 0, /* todo_flags_finish */ @@ -19930,6 +20143,113 @@ make_pass_oacc_device_lower (gcc::context *ctxt) { return new pass_oacc_device_lower (ctxt); } + + +/* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets, + VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and + LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT + internal functions on non-SIMT targets, and likewise some SIMD internal + functions on SIMT targets. */ + +static unsigned int +execute_omp_device_lower () +{ + int vf = targetm.simt.vf ? targetm.simt.vf () : 1; + basic_block bb; + gimple_stmt_iterator gsi; + FOR_EACH_BB_FN (bb, cfun) + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt)) + continue; + tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE; + tree type = lhs ? TREE_TYPE (lhs) : integer_type_node; + switch (gimple_call_internal_fn (stmt)) + { + case IFN_GOMP_SIMT_LANE: + case IFN_GOMP_SIMT_LAST_LANE: + rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE; + break; + case IFN_GOMP_SIMT_VF: + rhs = build_int_cst (type, vf); + break; + case IFN_GOMP_SIMT_ORDERED_PRED: + rhs = vf == 1 ? integer_zero_node : NULL_TREE; + if (rhs || !lhs) + unlink_stmt_vdef (stmt); + break; + case IFN_GOMP_SIMT_VOTE_ANY: + case IFN_GOMP_SIMT_XCHG_BFLY: + case IFN_GOMP_SIMT_XCHG_IDX: + rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE; + break; + case IFN_GOMP_SIMD_LANE: + case IFN_GOMP_SIMD_LAST_LANE: + rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE; + break; + case IFN_GOMP_SIMD_VF: + rhs = vf != 1 ? build_one_cst (type) : NULL_TREE; + break; + default: + continue; + } + if (lhs && !rhs) + continue; + stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop (); + gsi_replace (&gsi, stmt, false); + } + if (vf != 1) + cfun->has_force_vectorize_loops = false; + return 0; +} + +namespace { + +const pass_data pass_data_omp_device_lower = +{ + GIMPLE_PASS, /* type */ + "ompdevlow", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_cfg, /* properties_required */ + PROP_gimple_lomp_dev, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_update_ssa, /* todo_flags_finish */ +}; + +class pass_omp_device_lower : public gimple_opt_pass +{ +public: + pass_omp_device_lower (gcc::context *ctxt) + : gimple_opt_pass (pass_data_omp_device_lower, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *ARG_UNUSED (fun)) + { + /* FIXME: this should use PROP_gimple_lomp_dev. */ +#ifdef ACCEL_COMPILER + return true; +#else + return ENABLE_OFFLOADING && (flag_openmp || in_lto_p); +#endif + } + virtual unsigned int execute (function *) + { + return execute_omp_device_lower (); + } + +}; // class pass_expand_omp_ssa + +} // anon namespace + +gimple_opt_pass * +make_pass_omp_device_lower (gcc::context *ctxt) +{ + return new pass_omp_device_lower (ctxt); +} /* "omp declare target link" handling pass. */ diff --git a/gcc/passes.def b/gcc/passes.def index 85a5af088f3..2a470a7de07 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -183,6 +183,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_fixup_cfg); NEXT_PASS (pass_lower_eh_dispatch); NEXT_PASS (pass_oacc_device_lower); + NEXT_PASS (pass_omp_device_lower); NEXT_PASS (pass_omp_target_link); NEXT_PASS (pass_all_optimizations); PUSH_INSERT_PASSES_WITHIN (pass_all_optimizations) diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index da9ba1374b4..8befebe17b1 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -222,6 +222,7 @@ protected: of math functions; the current choices have been optimized. */ +#define PROP_gimple_lomp_dev (1 << 16) /* done omp_device_lower */ #define PROP_trees \ (PROP_gimple_any | PROP_gimple_lcf | PROP_gimple_leh | PROP_gimple_lomp) @@ -417,6 +418,7 @@ extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt); extern gimple_opt_pass *make_pass_expand_omp_ssa (gcc::context *ctxt); extern gimple_opt_pass *make_pass_omp_target_link (gcc::context *ctxt); extern gimple_opt_pass *make_pass_oacc_device_lower (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_omp_device_lower (gcc::context *ctxt); extern gimple_opt_pass *make_pass_object_sizes (gcc::context *ctxt); extern gimple_opt_pass *make_pass_strlen (gcc::context *ctxt); extern gimple_opt_pass *make_pass_fold_builtins (gcc::context *ctxt); -- 2.30.2