From 344f09a756ebd50510cc1eb3db111fd61c527702 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Tue, 4 Aug 2020 09:53:08 +0200 Subject: [PATCH] [nvptx] Handle V2DI/V2SI mode in nvptx_gen_shuffle With the pr96628-part1.f90 source and -ftree-slp-vectorize, we run into an ICE due to the fact that V2DI mode is not handled in nvptx_gen_shuffle. Fix this by adding handling of V2DI as well as V2SI mode in nvptx_gen_shuffle. Build and reg-tested on x86_64 with nvptx accelerator. gcc/ChangeLog: PR target/96428 * config/nvptx/nvptx.c (nvptx_gen_shuffle): Handle V2SI/V2DI. libgomp/ChangeLog: PR target/96428 * testsuite/libgomp.oacc-fortran/pr96628-part1.f90: New test. * testsuite/libgomp.oacc-fortran/pr96628-part2.f90: New test. --- gcc/config/nvptx/nvptx.c | 38 +++++++++++++++++++ .../libgomp.oacc-fortran/pr96628-part1.f90 | 20 ++++++++++ .../libgomp.oacc-fortran/pr96628-part2.f90 | 37 ++++++++++++++++++ 3 files changed, 95 insertions(+) create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/pr96628-part1.f90 create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/pr96628-part2.f90 diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index d8a8fb2d55b..cf53a921e5b 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -1796,6 +1796,44 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind) end_sequence (); } break; + case E_V2SImode: + { + rtx src0 = gen_rtx_SUBREG (SImode, src, 0); + rtx src1 = gen_rtx_SUBREG (SImode, src, 4); + rtx dst0 = gen_rtx_SUBREG (SImode, dst, 0); + rtx dst1 = gen_rtx_SUBREG (SImode, dst, 4); + rtx tmp0 = gen_reg_rtx (SImode); + rtx tmp1 = gen_reg_rtx (SImode); + start_sequence (); + emit_insn (gen_movsi (tmp0, src0)); + emit_insn (gen_movsi (tmp1, src1)); + emit_insn (nvptx_gen_shuffle (tmp0, tmp0, idx, kind)); + emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind)); + emit_insn (gen_movsi (dst0, tmp0)); + emit_insn (gen_movsi (dst1, tmp1)); + res = get_insns (); + end_sequence (); + } + break; + case E_V2DImode: + { + rtx src0 = gen_rtx_SUBREG (DImode, src, 0); + rtx src1 = gen_rtx_SUBREG (DImode, src, 8); + rtx dst0 = gen_rtx_SUBREG (DImode, dst, 0); + rtx dst1 = gen_rtx_SUBREG (DImode, dst, 8); + rtx tmp0 = gen_reg_rtx (DImode); + rtx tmp1 = gen_reg_rtx (DImode); + start_sequence (); + emit_insn (gen_movdi (tmp0, src0)); + emit_insn (gen_movdi (tmp1, src1)); + emit_insn (nvptx_gen_shuffle (tmp0, tmp0, idx, kind)); + emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind)); + emit_insn (gen_movdi (dst0, tmp0)); + emit_insn (gen_movdi (dst1, tmp1)); + res = get_insns (); + end_sequence (); + } + break; case E_BImode: { rtx tmp = gen_reg_rtx (SImode); diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr96628-part1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr96628-part1.f90 new file mode 100644 index 00000000000..71219f9c467 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/pr96628-part1.f90 @@ -0,0 +1,20 @@ +! { dg-do run } +! { dg-additional-sources pr96628-part2.f90 } +! { dg-additional-options "-ftree-slp-vectorize" } +! +! This file is compiled first +module m2 + real*8 :: mysum + !$acc declare device_resident(mysum) +contains + SUBROUTINE one(t) + !$acc routine + REAL*8, INTENT(IN) :: t(:) + mysum = sum(t) + END SUBROUTINE one + SUBROUTINE two(t) + !$acc routine seq + REAL*8, INTENT(INOUT) :: t(:) + t = (100.0_8*t)/sum + END SUBROUTINE two +end module m2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr96628-part2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr96628-part2.f90 new file mode 100644 index 00000000000..784dc27e19e --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/pr96628-part2.f90 @@ -0,0 +1,37 @@ +! { dg-do compile { target skip-all-targets } } +! +! Main file is pr96628-part1.f90 + +MODULE m + IMPLICIT NONE + REAL*8, ALLOCATABLE :: t(:) +CONTAINS + SUBROUTINE run() + use m2 + IMPLICIT NONE + + INTEGER :: i,j ! loop indices + !$acc data present(t) + !$acc parallel + !$acc loop gang + DO j = 1,2 + !$acc loop vector + DO i = 1,2 + CALL one(t(:)) + CALL two(t(:)) + END DO + END DO + !$acc end parallel + !$acc end data + END SUBROUTINE run +END MODULE m + +use m +implicit none +integer :: i +t = [(3.0_8*i, i = 1, 100)] +!$acc data copy(t) +call run +!$acc end data +if (any (abs(t - [((300.0_8*i)/15150.0_8, i = 1, 100)]) < 10.0_8*epsilon(t))) stop 1 +end -- 2.30.2