From dd3c1b14afa954856789c4cb955dec474d799edd Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Fri, 13 Nov 2015 15:08:11 +0000 Subject: [PATCH] nvptx.c (nvptx_generate_vector_shuffle): Deal with complex types. gcc/ * config/nvptx/nvptx.c (nvptx_generate_vector_shuffle): Deal with complex types. libgomp/ * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: New. * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c: New. From-SVN: r230325 --- gcc/ChangeLog | 5 ++ gcc/config/nvptx/nvptx.c | 49 ++++++++++++----- libgomp/ChangeLog | 5 ++ .../reduction-cplx-dbl.c | 52 +++++++++++++++++++ .../reduction-cplx-flt.c | 52 +++++++++++++++++++ 5 files changed, 151 insertions(+), 12 deletions(-) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3c2d9364bca..18d0fdb2353 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -9,6 +9,11 @@ using EDGE_FALSE_VALUE for edges to the call block and EDGE_TRUE_VALUE for the others. +2015-11-13 Nathan Sidwell + + * config/nvptx/nvptx.c (nvptx_generate_vector_shuffle): Deal with + complex types. + 2015-11-13 Nathan Sidwell * gimplify.c (oacc_default_clause): Use inform for enclosing scope. diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index d8673018819..26c2e961051 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -3634,26 +3634,51 @@ nvptx_generate_vector_shuffle (location_t loc, { unsigned fn = NVPTX_BUILTIN_SHUFFLE; tree_code code = NOP_EXPR; - tree type = unsigned_type_node; - enum machine_mode mode = TYPE_MODE (TREE_TYPE (var)); + tree arg_type = unsigned_type_node; + tree var_type = TREE_TYPE (var); + tree dest_type = var_type; - if (!INTEGRAL_MODE_P (mode)) + if (TREE_CODE (var_type) == COMPLEX_TYPE) + var_type = TREE_TYPE (var_type); + + if (TREE_CODE (var_type) == REAL_TYPE) code = VIEW_CONVERT_EXPR; - if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (DImode)) + + if (TYPE_SIZE (var_type) + == TYPE_SIZE (long_long_unsigned_type_node)) { fn = NVPTX_BUILTIN_SHUFFLELL; - type = long_long_unsigned_type_node; + arg_type = long_long_unsigned_type_node; } - + tree call = nvptx_builtin_decl (fn, true); - call = build_call_expr_loc - (loc, call, 3, fold_build1 (code, type, var), - build_int_cst (unsigned_type_node, shift), - build_int_cst (unsigned_type_node, SHUFFLE_DOWN)); + tree bits = build_int_cst (unsigned_type_node, shift); + tree kind = build_int_cst (unsigned_type_node, SHUFFLE_DOWN); + tree expr; - call = fold_build1 (code, TREE_TYPE (dest_var), call); + if (var_type != dest_type) + { + /* Do real and imaginary parts separately. */ + tree real = fold_build1 (REALPART_EXPR, var_type, var); + real = fold_build1 (code, arg_type, real); + real = build_call_expr_loc (loc, call, 3, real, bits, kind); + real = fold_build1 (code, var_type, real); + + tree imag = fold_build1 (IMAGPART_EXPR, var_type, var); + imag = fold_build1 (code, arg_type, imag); + imag = build_call_expr_loc (loc, call, 3, imag, bits, kind); + imag = fold_build1 (code, var_type, imag); + + expr = fold_build2 (COMPLEX_EXPR, dest_type, real, imag); + } + else + { + expr = fold_build1 (code, arg_type, var); + expr = build_call_expr_loc (loc, call, 3, expr, bits, kind); + expr = fold_build1 (code, dest_type, expr); + } - gimplify_assign (dest_var, call, seq); + gimplify_assign (dest_var, expr, seq); } /* Insert code to locklessly update *PTR with *PTR OP VAR just before diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 4e0cddb4e28..9ca963ac314 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,8 @@ +2015-11-13 Nathan Sidwell + + * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: New. + * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c: New. + 2015-11-12 James Norris Joseph Myers diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c new file mode 100644 index 00000000000..314e5118be9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c @@ -0,0 +1,52 @@ + +#include + +/* Double float has 53 bits of fraction. */ +#define FRAC (1.0 / (1LL << 48)) + +int close_enough (double _Complex a, double _Complex b) +{ + double _Complex diff = a - b; + double mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a); + double mag2_diff = (__real__(diff) * __real__ (diff) + + __imag__ (diff) * __imag__ (diff)); + + return mag2_diff / mag2_a < (FRAC * FRAC); +} + +int main (void) +{ +#define N 100 + double _Complex ary[N], sum, prod, tsum, tprod; + int ix; + + sum = tsum = 0; + prod = tprod = 1; + + for (ix = 0; ix < N; ix++) + { + double frac = ix * (1.0 / 1024) + 1.0; + + ary[ix] = frac + frac * 2.0i - 1.0i; + sum += ary[ix]; + prod *= ary[ix]; + } + +#pragma acc parallel vector_length(32) copyin(ary) copy (tsum, tprod) + { +#pragma acc loop vector reduction(+:tsum) reduction (*:tprod) + for (ix = 0; ix < N; ix++) + { + tsum += ary[ix]; + tprod *= ary[ix]; + } + } + + if (!close_enough (sum, tsum)) + return 1; + + if (!close_enough (prod, tprod)) + return 1; + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c new file mode 100644 index 00000000000..b3bde656079 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c @@ -0,0 +1,52 @@ + +#include + +/* Single float has 23 bits of fraction. */ +#define FRAC (1.0f / (1 << 20)) + +int close_enough (float _Complex a, float _Complex b) +{ + float _Complex diff = a - b; + float mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a); + float mag2_diff = (__real__(diff) * __real__ (diff) + + __imag__ (diff) * __imag__ (diff)); + + return mag2_diff / mag2_a < (FRAC * FRAC); +} + +int main (void) +{ +#define N 100 + float _Complex ary[N], sum, prod, tsum, tprod; + int ix; + + sum = tsum = 0; + prod = tprod = 1; + + for (ix = 0; ix < N; ix++) + { + float frac = ix * (1.0f / 1024) + 1.0f; + + ary[ix] = frac + frac * 2.0i - 1.0i; + sum += ary[ix]; + prod *= ary[ix]; + } + +#pragma acc parallel vector_length(32) copyin(ary) copy (tsum, tprod) + { +#pragma acc loop vector reduction(+:tsum) reduction (*:tprod) + for (ix = 0; ix < N; ix++) + { + tsum += ary[ix]; + tprod *= ary[ix]; + } + } + + if (!close_enough (sum, tsum)) + return 1; + + if (!close_enough (prod, tprod)) + return 1; + + return 0; +} -- 2.30.2