From 6e723923df52bdde7c3aebd6bccaae3c7bff19ee Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Mon, 7 Jan 2019 10:01:49 +0000 Subject: [PATCH] [nvptx] Force vl32 if calling vector-partitionable routines With PTX_MAX_VECTOR_LENGTH set to larger than PTX_WARP_SIZE, routines can be called from offloading regions with vector-size set to larger than warp size. OTOH, vector-partitionable routines assume warp-sized vector length. Detect if we're calling a vector-partitionable routine from an offloading region, and if so, fall back to warp-sized vector length in that region. 2019-01-07 Tom de Vries PR target/85486 * config/nvptx/nvptx.c (has_vector_partitionable_routine_calls_p): New function. (nvptx_goacc_validate_dims): Force vl32 if calling vector-partitionable routines. From-SVN: r267640 --- gcc/ChangeLog | 8 +++++++ gcc/config/nvptx/nvptx.c | 45 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3c7bb497cf4..ae2e85cae6a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2019-01-07 Tom de Vries + + PR target/85486 + * config/nvptx/nvptx.c (has_vector_partitionable_routine_calls_p): New + function. + (nvptx_goacc_validate_dims): Force vl32 if calling vector-partitionable + routines. + 2019-01-07 Jakub Jelinek * config/i386/sse.md (vec_extract): Use diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 5a4b38de522..7fdc285b6f8 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -59,6 +59,7 @@ #include "builtins.h" #include "omp-general.h" #include "omp-low.h" +#include "omp-offload.h" #include "gomp-constants.h" #include "dumpfile.h" #include "internal-fn.h" @@ -5496,6 +5497,40 @@ nvptx_apply_dim_limits (int dims[]) dims[GOMP_DIM_VECTOR] = PTX_WARP_SIZE; } +/* Return true if FNDECL contains calls to vector-partitionable routines. */ + +static bool +has_vector_partitionable_routine_calls_p (tree fndecl) +{ + if (!fndecl) + return false; + + basic_block bb; + FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fndecl)) + for (gimple_stmt_iterator i = gsi_start_bb (bb); !gsi_end_p (i); + gsi_next_nondebug (&i)) + { + gimple *stmt = gsi_stmt (i); + if (gimple_code (stmt) != GIMPLE_CALL) + continue; + + tree callee = gimple_call_fndecl (stmt); + if (!callee) + continue; + + tree attrs = oacc_get_fn_attrib (callee); + if (attrs == NULL_TREE) + return false; + + int partition_level = oacc_fn_attrib_level (attrs); + bool seq_routine_p = partition_level == GOMP_DIM_MAX; + if (!seq_routine_p) + return true; + } + + return false; +} + /* As nvptx_goacc_validate_dims, but does not return bool to indicate whether DIMS has changed. */ @@ -5611,6 +5646,16 @@ nvptx_goacc_validate_dims_1 (tree decl, int dims[], int fn_level) old_dims[i] = dims[i]; const char *vector_reason = NULL; + if (offload_region_p && has_vector_partitionable_routine_calls_p (decl)) + { + if (dims[GOMP_DIM_VECTOR] > PTX_WARP_SIZE) + { + vector_reason = G_("using vector_length (%d) due to call to" + " vector-partitionable routine, ignoring %d"); + dims[GOMP_DIM_VECTOR] = PTX_WARP_SIZE; + } + } + if (dims[GOMP_DIM_VECTOR] == 0) { vector_reason = G_("using vector_length (%d), ignoring runtime setting"); -- 2.30.2