From f64b12bd152735aacb948424766b33303bbeb5f8 Mon Sep 17 00:00:00 2001 From: Cesar Philippidis Date: Mon, 18 Sep 2017 07:15:37 -0700 Subject: [PATCH] omp-offload.c (oacc_xform_loop): Enable SIMD vectorization on non-SIMT targets in acc vector loops. gcc/ * omp-offload.c (oacc_xform_loop): Enable SIMD vectorization on non-SIMT targets in acc vector loops. From-SVN: r252928 --- gcc/ChangeLog | 5 +++++ gcc/omp-offload.c | 25 +++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 39bae5be959..eb028010efa 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2017-09-18 Cesar Philippidis + + * omp-offload.c (oacc_xform_loop): Enable SIMD vectorization on + non-SIMT targets in acc vector loops. + 2017-09-18 Claudiu Zissulescu * configure.ac: Add arc and check if assembler supports gdwarf2. diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c index 2d4fd411680..9d5b8bef649 100644 --- a/gcc/omp-offload.c +++ b/gcc/omp-offload.c @@ -51,6 +51,7 @@ along with GCC; see the file COPYING3. If not see #include "intl.h" #include "stringpool.h" #include "attribs.h" +#include "cfgloop.h" /* Describe the OpenACC looping structure of a function. The entire function is held in a 'NULL' loop. */ @@ -370,6 +371,30 @@ oacc_xform_loop (gcall *call) break; case IFN_GOACC_LOOP_OFFSET: + /* Enable vectorization on non-SIMT targets. */ + if (!targetm.simt.vf + && outer_mask == GOMP_DIM_MASK (GOMP_DIM_VECTOR) + /* If not -fno-tree-loop-vectorize, hint that we want to vectorize + the loop. */ + && (flag_tree_loop_vectorize + || !global_options_set.x_flag_tree_loop_vectorize)) + { + basic_block bb = gsi_bb (gsi); + struct loop *parent = bb->loop_father; + struct loop *body = parent->inner; + + parent->force_vectorize = true; + parent->safelen = INT_MAX; + + /* "Chunking loops" may have inner loops. */ + if (parent->inner) + { + body->force_vectorize = true; + body->safelen = INT_MAX; + } + + cfun->has_force_vectorize_loops = true; + } if (striding) { r = oacc_thread_numbers (true, mask, &seq); -- 2.30.2