From 3f660e47266016fcf2e906d8dfc023a612557fe1 Mon Sep 17 00:00:00 2001 From: Razya Ladelsky Date: Wed, 16 Nov 2011 14:58:04 +0000 Subject: [PATCH] re PR tree-optimization/49960 (inconsistent outputs when enabling autopar for a self -dependence testcase) PR tree-optimization/49960 * gcc.dg/autopar/pr49960.c: New test. * gcc.dg/autopar/pr49960-1.c: New test. From-SVN: r181409 --- gcc/testsuite/gcc.dg/autopar/pr49960-1.c | 36 +++++++++++++++++ gcc/testsuite/gcc.dg/autopar/pr49960.c | 49 ++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/autopar/pr49960-1.c create mode 100644 gcc/testsuite/gcc.dg/autopar/pr49960.c diff --git a/gcc/testsuite/gcc.dg/autopar/pr49960-1.c b/gcc/testsuite/gcc.dg/autopar/pr49960-1.c new file mode 100644 index 00000000000..ac9d5678499 --- /dev/null +++ b/gcc/testsuite/gcc.dg/autopar/pr49960-1.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */ + +#include +#include + +int main() +{ + unsigned int x, y, idx, H = 1024, W = 1024; + + int * tmps = (int *)malloc(H*W*sizeof(int)); + + /* This loop gets parallelized even though output dependences exist + between writes to 'tmps' that prevent parallelization. + For example: tmps[1] = 1, ..., tmps[1] = 17. */ + + for(x = 1; x < H; x++) + { + for(y = 1; y < W; y++) + { + idx = x*W+y; + tmps[idx % 4096] = idx; + } + } + + for(x = 1; x < 8; x++) + printf("tmps[%d]=%d\n", x, tmps[x]); + + return 0; +} +/* Check that no loop gets parallelized. */ + +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 0 "parloops" } } */ +/* { dg-final { scan-tree-dump-times "loopfn" 0 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "parloops" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/autopar/pr49960.c b/gcc/testsuite/gcc.dg/autopar/pr49960.c new file mode 100644 index 00000000000..1f3482f170d --- /dev/null +++ b/gcc/testsuite/gcc.dg/autopar/pr49960.c @@ -0,0 +1,49 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */ + +#include +#define MB 100 +#define NA 450 +#define MA 400 + +int T[MA][MB],A[MA][NA],B[MB][NA]; +void MRTRBR(int MA_1, int NA_1, int MB_1) +{ + int i,j, t,k; + + /* The outer most loop is not parallel because for different k's there + is write-write dependency for T[i][j]. */ + + /* The two inner loops don't get parallelized due to low number of + iterations. */ + + for (k = 3; k < NA_1; k++) + for (i = 3; i < MA_1; i++) + for (j = 3; j < MB_1; j++) + { + t = T[i][j]; + T[i][j] = t+2+A[i][k]*B[j][k]; + } +} +void main () +{ + int j,i; + + for (i = 3; i < MA; i++) + for (j = 3; j < MB; j++) + T[i][j] = (i>j?i:j); + + MRTRBR (MA,NA,MB); + + for (i = MA-1; i < MA; i++) + for (j = MB-10; j < MB; j++) + printf ("i %d j %d T[i][j] = %d\n",i,j,T[i][j]); +} + + +/* Check that the outer most loop doesn't get parallelized (thus no loop gets parallelized) */ + +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 0 "parloops" } } */ +/* { dg-final { scan-tree-dump-times "loopfn" 0 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "parloops" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ -- 2.30.2