From 8c8e9a6bb656fc50b9a166ba452d252fe05a2c38 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Fri, 19 Jan 2018 16:29:41 +0000 Subject: [PATCH] [nvptx] Fix bug in jit bug workaround 2018-01-19 Tom de Vries Cesar Philippidis PR target/83920 * config/nvptx/nvptx.c (nvptx_single): Fix jit workaround. * testsuite/libgomp.oacc-c-c++-common/pr83920.c: New test. * testsuite/libgomp.oacc-fortran/pr83920.f90: New test. Co-Authored-By: Cesar Philippidis From-SVN: r256894 --- gcc/ChangeLog | 6 ++++ gcc/config/nvptx/nvptx.c | 28 ++++++++++++++-- libgomp/ChangeLog | 7 ++++ .../libgomp.oacc-c-c++-common/pr83920.c | 32 +++++++++++++++++++ .../libgomp.oacc-fortran/pr83920.f90 | 28 ++++++++++++++++ 5 files changed, 99 insertions(+), 2 deletions(-) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr83920.c create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/pr83920.f90 diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7aa2919f0cf..162ae1b1283 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2018-01-19 Tom de Vries + Cesar Philippidis + + PR target/83920 + * config/nvptx/nvptx.c (nvptx_single): Fix jit workaround. + 2018-01-19 Cesar Philippidis PR target/83790 diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 4cb87c8ad07..f5bb4387865 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -4102,9 +4102,33 @@ nvptx_single (unsigned mask, basic_block from, basic_block to) There is nothing in the PTX spec to suggest that this is wrong, or to explain why the extra initialization is needed. So, we classify - it as a JIT bug, and the extra initialization as workaround. */ - emit_insn_before (gen_movbi (pvar, const0_rtx), + it as a JIT bug, and the extra initialization as workaround: + + { + .reg .u32 %x; + mov.u32 %x,%tid.x; + setp.ne.u32 %rnotvzero,%x,0; + } + + +.reg .pred %rcond2; + +setp.eq.u32 %rcond2, 1, 0; + + @%rnotvzero bra Lskip; + setp.. %rcond,op1,op2; + +mov.pred %rcond2, %rcond; + Lskip: + +mov.pred %rcond, %rcond2; + selp.u32 %rcondu32,1,0,%rcond; + shfl.idx.b32 %rcondu32,%rcondu32,0,31; + setp.ne.u32 %rcond,%rcondu32,0; + */ + rtx_insn *label = PREV_INSN (tail); + gcc_assert (label && LABEL_P (label)); + rtx tmp = gen_reg_rtx (BImode); + emit_insn_before (gen_movbi (tmp, const0_rtx), bb_first_real_insn (from)); + emit_insn_before (gen_rtx_SET (tmp, pvar), label); + emit_insn_before (gen_rtx_SET (pvar, tmp), tail); #endif emit_insn_before (nvptx_gen_vcast (pvar), tail); } diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index db886ac932a..c308227951b 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,10 @@ +2018-01-19 Tom de Vries + Cesar Philippidis + + PR target/83920 + * testsuite/libgomp.oacc-c-c++-common/pr83920.c: New test. + * testsuite/libgomp.oacc-fortran/pr83920.f90: New test. + 2018-01-03 Jakub Jelinek Update copyright years. diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr83920.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr83920.c new file mode 100644 index 00000000000..6cd3b5d6f06 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr83920.c @@ -0,0 +1,32 @@ +/* { dg-do run } */ + +#include + +#define n 10 + +static void __attribute__((noinline)) __attribute__((noclone)) +foo (int beta, int *c) +{ + #pragma acc parallel copy(c[0:(n * n) - 1]) num_gangs(2) + #pragma acc loop gang + for (int j = 0; j < n; ++j) + if (beta != 1) + { + #pragma acc loop vector + for (int i = 0; i < n; ++i) + c[i + (j * n)] = 0; + } +} + +int +main (void) +{ + int c[n * n]; + + c[0] = 1; + foo (0, c); + if (c[0] != 0) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr83920.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr83920.f90 new file mode 100644 index 00000000000..34ad001abcd --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/pr83920.f90 @@ -0,0 +1,28 @@ +! { dg-do run } + +subroutine foo (BETA, C) + real :: C(100,100) + integer :: i, j, l + real, parameter :: one = 1.0 + real :: beta + + !$acc parallel copy(c(1:100,1:100)) num_gangs(2) + !$acc loop gang + do j = 1, 100 + if (beta /= one) then + !$acc loop vector + do i = 1, 100 + C(i,j) = 0.0 + end do + end if + end do + !$acc end parallel +end subroutine foo + +program test_foo + real :: c(100,100), beta + beta = 0.0 + c(:,:) = 1.0 + call foo (beta, c) + if (c(1,1) /= 0.0) call abort () +end program test_foo -- 2.30.2