From e0ca27c53103d532b51a61679b5d4c13d35a09d0 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Mon, 30 Nov 2015 16:34:26 +0000 Subject: [PATCH] Handle BUILT_IN_GOMP_PARALLEL in ipa-pta 2015-11-30 Tom de Vries PR tree-optimization/46032 * tree-ssa-structalias.c (find_func_aliases_for_call_arg): New function, factored out of ... (find_func_aliases_for_call): ... here. (find_func_aliases_for_builtin_call, find_func_clobbers): Handle BUILT_IN_GOMP_PARALLEL. (ipa_pta_execute): Same. Handle node->parallelized_function as a local function. * gcc.dg/pr46032.c: New test. * testsuite/libgomp.c/pr46032.c: New test. From-SVN: r231076 --- gcc/ChangeLog | 11 +++++ gcc/testsuite/ChangeLog | 5 ++ gcc/testsuite/gcc.dg/pr46032.c | 47 ++++++++++++++++++ gcc/tree-ssa-structalias.c | 71 ++++++++++++++++++++++----- libgomp/ChangeLog | 5 ++ libgomp/testsuite/libgomp.c/pr46032.c | 44 +++++++++++++++++ 6 files changed, 170 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr46032.c create mode 100644 libgomp/testsuite/libgomp.c/pr46032.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 735fbd9964c..229aa77f89b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2015-11-30 Tom de Vries + + PR tree-optimization/46032 + * tree-ssa-structalias.c (find_func_aliases_for_call_arg): New function, + factored out of ... + (find_func_aliases_for_call): ... here. + (find_func_aliases_for_builtin_call, find_func_clobbers): Handle + BUILT_IN_GOMP_PARALLEL. + (ipa_pta_execute): Same. Handle node->parallelized_function as a local + function. + 2015-11-30 Jakub Jelinek PR tree-optimization/68501 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 368b4e7791a..c9283adc88f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2015-11-30 Tom de Vries + + PR tree-optimization/46032 + * gcc.dg/pr46032.c: New test. + 2015-11-30 Richard Biener PR tree-optimization/68592 diff --git a/gcc/testsuite/gcc.dg/pr46032.c b/gcc/testsuite/gcc.dg/pr46032.c new file mode 100644 index 00000000000..b91190e871f --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr46032.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fopenmp -ftree-vectorize -std=c99 -fipa-pta -fdump-tree-vect-all" } */ + +extern void abort (void); + +#define nEvents 1000 + +static void __attribute__((noinline, noclone, optimize("-fno-tree-vectorize"))) +init (unsigned *results, unsigned *pData) +{ + unsigned int i; + for (i = 0; i < nEvents; ++i) + pData[i] = i % 3; +} + +static void __attribute__((noinline, noclone, optimize("-fno-tree-vectorize"))) +check (unsigned *results) +{ + unsigned sum = 0; + for (int idx = 0; idx < (int)nEvents; idx++) + sum += results[idx]; + + if (sum != 1998) + abort (); +} + +int +main (void) +{ + unsigned results[nEvents]; + unsigned pData[nEvents]; + unsigned coeff = 2; + + init (&results[0], &pData[0]); + +#pragma omp parallel for + for (int idx = 0; idx < (int)nEvents; idx++) + results[idx] = coeff * pData[idx]; + + check (&results[0]); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "note: vectorized 1 loop" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-not "versioning for alias required" "vect" } } */ + diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c index f24ebeba815..7f4a8ad60e4 100644 --- a/gcc/tree-ssa-structalias.c +++ b/gcc/tree-ssa-structalias.c @@ -4139,6 +4139,24 @@ get_fi_for_callee (gcall *call) return get_vi_for_tree (fn); } +/* Create constraints for assigning call argument ARG to the incoming parameter + INDEX of function FI. */ + +static void +find_func_aliases_for_call_arg (varinfo_t fi, unsigned index, tree arg) +{ + struct constraint_expr lhs; + lhs = get_function_part_constraint (fi, fi_parm_base + index); + + auto_vec rhsc; + get_constraint_for_rhs (arg, &rhsc); + + unsigned j; + struct constraint_expr *rhsp; + FOR_EACH_VEC_ELT (rhsc, j, rhsp) + process_constraint (new_constraint (lhs, *rhsp)); +} + /* Create constraints for the builtin call T. Return true if the call was handled, otherwise false. */ @@ -4488,6 +4506,25 @@ find_func_aliases_for_builtin_call (struct function *fn, gcall *t) } return true; } + case BUILT_IN_GOMP_PARALLEL: + { + /* Handle __builtin_GOMP_parallel (fn, data, num_threads, flags) as + fn (data). */ + if (in_ipa_mode) + { + tree fnarg = gimple_call_arg (t, 0); + gcc_assert (TREE_CODE (fnarg) == ADDR_EXPR); + tree fndecl = TREE_OPERAND (fnarg, 0); + tree arg = gimple_call_arg (t, 1); + gcc_assert (TREE_CODE (arg) == ADDR_EXPR); + + varinfo_t fi = get_vi_for_tree (fndecl); + find_func_aliases_for_call_arg (fi, 0, arg); + return true; + } + /* Else fallthru to generic call handling. */ + break; + } /* printf-style functions may have hooks to set pointers to point to somewhere into the generated string. Leave them for a later exercise... */ @@ -4546,18 +4583,8 @@ find_func_aliases_for_call (struct function *fn, gcall *t) parameters of the function. */ for (j = 0; j < gimple_call_num_args (t); j++) { - struct constraint_expr lhs ; - struct constraint_expr *rhsp; tree arg = gimple_call_arg (t, j); - - get_constraint_for_rhs (arg, &rhsc); - lhs = get_function_part_constraint (fi, fi_parm_base + j); - while (rhsc.length () != 0) - { - rhsp = &rhsc.last (); - process_constraint (new_constraint (lhs, *rhsp)); - rhsc.pop (); - } + find_func_aliases_for_call_arg (fi, j, arg); } /* If we are returning a value, assign it to the result. */ @@ -5036,6 +5063,8 @@ find_func_clobbers (struct function *fn, gimple *origt) case BUILT_IN_VA_START: case BUILT_IN_VA_END: return; + case BUILT_IN_GOMP_PARALLEL: + return; /* printf-style functions may have hooks to set pointers to point to somewhere into the generated string. Leave them for a later exercise... */ @@ -7345,6 +7374,18 @@ ipa_pta_execute (void) gcc_assert (!node->clone_of); + /* When parallelizing a code region, we split the region off into a + separate function, to be run by several threads in parallel. So for a + function foo, we split off a region into a function + foo._0 (void *foodata), and replace the region with some variant of a + function call run_on_threads (&foo._0, data). The '&foo._0' sets the + address_taken bit for function foo._0, which would make it non-local. + But for the purpose of ipa-pta, we can regard the run_on_threads call + as a local call foo._0 (data), so we ignore address_taken on nodes + with parallelized_function set. */ + bool node_address_taken = (node->address_taken + && !node->parallelized_function); + /* For externally visible or attribute used annotated functions use local constraints for their arguments. For local functions we see all callers and thus do not need initial @@ -7352,7 +7393,7 @@ ipa_pta_execute (void) bool nonlocal_p = (node->used_from_other_partition || node->externally_visible || node->force_output - || node->address_taken); + || node_address_taken); vi = create_function_info_for (node->decl, alias_get_name (node->decl), false, @@ -7504,7 +7545,11 @@ ipa_pta_execute (void) continue; /* Handle direct calls to functions with body. */ - decl = gimple_call_fndecl (stmt); + if (gimple_call_builtin_p (stmt, BUILT_IN_GOMP_PARALLEL)) + decl = TREE_OPERAND (gimple_call_arg (stmt, 0), 0); + else + decl = gimple_call_fndecl (stmt); + if (decl && (fi = lookup_vi_for_tree (decl)) && fi->is_fn_info) diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index a2ff98c6deb..ce2828a8301 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,8 @@ +2015-11-30 Tom de Vries + + PR tree-optimization/46032 + * testsuite/libgomp.c/pr46032.c: New test. + 2015-11-27 Jakub Jelinek PR libgomp/68579 diff --git a/libgomp/testsuite/libgomp.c/pr46032.c b/libgomp/testsuite/libgomp.c/pr46032.c new file mode 100644 index 00000000000..2178aa7c9bb --- /dev/null +++ b/libgomp/testsuite/libgomp.c/pr46032.c @@ -0,0 +1,44 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -std=c99 -fipa-pta" } */ + + +extern void abort (void); + +#define nEvents 1000 + +static void __attribute__((noinline, noclone, optimize("-fno-tree-vectorize"))) +init (unsigned *results, unsigned *pData) +{ + unsigned int i; + for (i = 0; i < nEvents; ++i) + pData[i] = i % 3; +} + +static void __attribute__((noinline, noclone, optimize("-fno-tree-vectorize"))) +check (unsigned *results) +{ + unsigned sum = 0; + for (int idx = 0; idx < (int)nEvents; idx++) + sum += results[idx]; + + if (sum != 1998) + abort (); +} + +int +main (void) +{ + unsigned results[nEvents]; + unsigned pData[nEvents]; + unsigned coeff = 2; + + init (&results[0], &pData[0]); + +#pragma omp parallel for + for (int idx = 0; idx < (int)nEvents; idx++) + results[idx] = coeff * pData[idx]; + + check (&results[0]); + + return 0; +} -- 2.30.2