From d6e840eed2559f03c82cdb203cc2339c4a5c6425 Mon Sep 17 00:00:00 2001 From: Richard Guenther Date: Sun, 27 Apr 2008 16:27:08 +0000 Subject: [PATCH] re PR tree-optimization/18754 (unrolling happens too late/SRA does not happen late enough) 2008-04-27 Richard Guenther PR tree-optimization/18754 PR tree-optimization/34223 * tree-pass.h (pass_complete_unrolli): Declare. * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Print loop size before and after unconditionally of UL_NO_GROWTH in effect. Rewrite loop into loop closed SSA form if it is not already. (tree_unroll_loops_completely): Re-structure to iterate over innermost loops with intermediate CFG cleanups. Unroll outermost loops only if requested or the code does not grow doing so. * tree-ssa-loop.c (gate_tree_vectorize): Don't shortcut if no loops are available. (tree_vectorize): Instead do so here. (tree_complete_unroll): Also unroll outermost loops. (tree_complete_unroll_inner): New function. (gate_tree_complete_unroll_inner): Likewise. (pass_complete_unrolli): New pass. * tree-ssa-loop-manip.c (find_uses_to_rename_use): Only record uses outside of the loop. (tree_duplicate_loop_to_header_edge): Only verify loop-closed SSA form if it is available. * tree-flow.h (tree_unroll_loops_completely): Add extra parameter. * passes.c (init_optimization_passes): Schedule complete inner loop unrolling pass before the first CCP pass after final inlining. * gcc.dg/tree-ssa/loop-36.c: New testcase. * gcc.dg/tree-ssa/loop-37.c: Likewise. * gcc.dg/vect/vect-118.c: Likewise. * gcc.dg/Wunreachable-8.c: XFAIL bogus warning. * gcc.dg/vect/vect-66.c: Increase loop trip count. * gcc.dg/vect/no-section-anchors-vect-66.c: Likewise. * gcc.dg/vect/no-section-anchors-vect-69.c: Likewise. * gcc.dg/vect/vect-76.c: Likewise. * gcc.dg/vect/vect-outer-6.c: Likewise. * gcc.dg/vect/vect-outer-1.c: Likewise. * gcc.dg/vect/vect-outer-1a.c: Likewise. * gcc.dg/vect/vect-11a.c: Likewise. * gcc.dg/vect/vect-shift-1.c: Likewise. * gcc.target/i386/vectorize1.c: Likewise. From-SVN: r134730 --- gcc/ChangeLog | 27 +++++++ gcc/passes.c | 1 + gcc/testsuite/ChangeLog | 19 +++++ gcc/testsuite/gcc.dg/Wunreachable-8.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/loop-36.c | 21 ++++++ gcc/testsuite/gcc.dg/tree-ssa/loop-37.c | 27 +++++++ .../gcc.dg/vect/no-section-anchors-vect-66.c | 19 +++-- .../gcc.dg/vect/no-section-anchors-vect-69.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-118.c | 34 +++++++++ gcc/testsuite/gcc.dg/vect/vect-11a.c | 30 +++++--- gcc/testsuite/gcc.dg/vect/vect-66.c | 45 +++++++---- gcc/testsuite/gcc.dg/vect/vect-76.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-outer-1.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-outer-1a.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-outer-6.c | 8 +- gcc/testsuite/gcc.dg/vect/vect-shift-1.c | 8 +- gcc/testsuite/gcc.target/i386/vectorize1.c | 4 +- gcc/tree-flow.h | 2 +- gcc/tree-pass.h | 1 + gcc/tree-ssa-loop-ivcanon.c | 74 +++++++++++-------- gcc/tree-ssa-loop-manip.c | 10 ++- gcc/tree-ssa-loop.c | 54 +++++++++++++- 22 files changed, 307 insertions(+), 87 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-36.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-37.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-118.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 37d675217a5..f1144d65696 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,30 @@ +2008-04-27 Richard Guenther + + PR tree-optimization/18754 + PR tree-optimization/34223 + * tree-pass.h (pass_complete_unrolli): Declare. + * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Print + loop size before and after unconditionally of UL_NO_GROWTH in effect. + Rewrite loop into loop closed SSA form if it is not already. + (tree_unroll_loops_completely): Re-structure to iterate over + innermost loops with intermediate CFG cleanups. + Unroll outermost loops only if requested or the code does not grow + doing so. + * tree-ssa-loop.c (gate_tree_vectorize): Don't shortcut if no + loops are available. + (tree_vectorize): Instead do so here. + (tree_complete_unroll): Also unroll outermost loops. + (tree_complete_unroll_inner): New function. + (gate_tree_complete_unroll_inner): Likewise. + (pass_complete_unrolli): New pass. + * tree-ssa-loop-manip.c (find_uses_to_rename_use): Only record + uses outside of the loop. + (tree_duplicate_loop_to_header_edge): Only verify loop-closed SSA + form if it is available. + * tree-flow.h (tree_unroll_loops_completely): Add extra parameter. + * passes.c (init_optimization_passes): Schedule complete inner + loop unrolling pass before the first CCP pass after final inlining. + 2008-04-27 Nathan Sidwell * targhooks.h (default_emutls_var_fields, diff --git a/gcc/passes.c b/gcc/passes.c index 2d05171e712..42f456c7f83 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -567,6 +567,7 @@ init_optimization_passes (void) NEXT_PASS (pass_rename_ssa_copies); /* Initial scalar cleanups. */ + NEXT_PASS (pass_complete_unrolli); NEXT_PASS (pass_ccp); NEXT_PASS (pass_phiprop); NEXT_PASS (pass_fre); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 4c1397ca992..4f2b2129b3a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,22 @@ +2008-04-27 Richard Guenther + + PR tree-optimization/18754 + PR tree-optimization/34223 + * gcc.dg/tree-ssa/loop-36.c: New testcase. + * gcc.dg/tree-ssa/loop-37.c: Likewise. + * gcc.dg/vect/vect-118.c: Likewise. + * gcc.dg/Wunreachable-8.c: XFAIL bogus warning. + * gcc.dg/vect/vect-66.c: Increase loop trip count. + * gcc.dg/vect/no-section-anchors-vect-66.c: Likewise. + * gcc.dg/vect/no-section-anchors-vect-69.c: Likewise. + * gcc.dg/vect/vect-76.c: Likewise. + * gcc.dg/vect/vect-outer-6.c: Likewise. + * gcc.dg/vect/vect-outer-1.c: Likewise. + * gcc.dg/vect/vect-outer-1a.c: Likewise. + * gcc.dg/vect/vect-11a.c: Likewise. + * gcc.dg/vect/vect-shift-1.c: Likewise. + * gcc.target/i386/vectorize1.c: Likewise. + 2008-04-27 Nathan Sidwell * gcc.dg/tls/section-2.c: New. diff --git a/gcc/testsuite/gcc.dg/Wunreachable-8.c b/gcc/testsuite/gcc.dg/Wunreachable-8.c index 7fbb2e6ad60..81254ba3b2a 100644 --- a/gcc/testsuite/gcc.dg/Wunreachable-8.c +++ b/gcc/testsuite/gcc.dg/Wunreachable-8.c @@ -6,7 +6,7 @@ float Factorial(float X) int k,j; for (k=1; k < 5; k++) { - val += 1.0; + val += 1.0; /* { dg-bogus "will never be executed" "" { xfail *-*-* } } */ } return (val); /* { dg-bogus "will never be executed" } */ } diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-36.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-36.c new file mode 100644 index 00000000000..6b408a20386 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-36.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-dce2" } */ + +struct X { float array[4]; }; + +struct X a,b; + +float foobar () { + float s = 0; + unsigned int d; + struct X c; + for (d=0; d<4; ++d) + c.array[d] = a.array[d] * b.array[d]; + for (d=0; d<4; ++d) + s+=c.array[d]; + return s; +} + +/* The temporary structure should have been promoted to registers + by FRE after the loops have been unrolled by the early unrolling pass. */ +/* { dg-final { scan-tree-dump-not "c\.array" "dce2" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-37.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-37.c new file mode 100644 index 00000000000..46b26bb873f --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-37.c @@ -0,0 +1,27 @@ +/* { dg-do link } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +extern void link_error (void); +static const int my_array [3] = { 4, 5, 6 }; + +void f0 (void) +{ + int j, sum = 0; + for (j = 0; j < 3; j ++) + sum += my_array [j]; + if (15 != sum) + link_error (); +} + +int f1 (int a []) +{ + int j, sum = 0; + for (j = 0; j < 3; j ++) + sum += a [j] + my_array [j]; + return sum; +} + +int main() { } + +/* { dg-final { scan-tree-dump-not "my_array" "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c index 2be14ea33b6..d590975e57f 100644 --- a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c +++ b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c @@ -3,11 +3,10 @@ #include #include "tree-vect.h" -#define N 16 +#define N 8 -int ib[6] = {0,3,6,9,12,15}; -int ia[8][5][6]; -int ic[16][16][5][6]; +int ia[8][5][N+2]; +int ic[16][16][5][N+2]; __attribute__ ((noinline)) int main1 () @@ -17,7 +16,7 @@ int main1 () /* Multidimensional array. Aligned. */ for (i = 0; i < 16; i++) { - for (j = 0; j < 4; j++) + for (j = 0; j < N; j++) { ia[2][6][j] = 5; } @@ -26,7 +25,7 @@ int main1 () /* check results: */ for (i = 0; i < 16; i++) { - for (j = 0; j < 4; j++) + for (j = 0; j < N; j++) { if (ia[2][6][j] != 5) abort(); @@ -35,14 +34,14 @@ int main1 () /* Multidimensional array. Aligned. */ for (i = 0; i < 16; i++) { - for (j = 0; j < 4; j++) + for (j = 0; j < N; j++) ia[3][6][j+2] = 5; } /* check results: */ for (i = 0; i < 16; i++) { - for (j = 2; j < 6; j++) + for (j = 2; j < N+2; j++) { if (ia[3][6][j] != 5) abort(); @@ -52,7 +51,7 @@ int main1 () /* Multidimensional array. Not aligned. */ for (i = 0; i < 16; i++) { - for (j = 0; j < 4; j++) + for (j = 0; j < N; j++) { ic[2][1][6][j+1] = 5; } @@ -61,7 +60,7 @@ int main1 () /* check results: */ for (i = 0; i < 16; i++) { - for (j = 0; j < 4; j++) + for (j = 0; j < N; j++) { if (ic[2][1][6][j+1] != 5) abort(); diff --git a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-69.c b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-69.c index 8317848db9b..c63ae2fd21b 100644 --- a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-69.c +++ b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-69.c @@ -3,7 +3,7 @@ #include #include "tree-vect.h" -#define N 8 +#define N 12 struct s{ int m; diff --git a/gcc/testsuite/gcc.dg/vect/vect-118.c b/gcc/testsuite/gcc.dg/vect/vect-118.c new file mode 100644 index 00000000000..6c0268cc78c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-118.c @@ -0,0 +1,34 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-O3 -fdump-tree-vect-details" } */ + +#include "tree-vect.h" + +#define M 10 +#define N 3 + +void __attribute__((noinline)) +foo (int n, int *ub, int *uc) +{ + int i, j, tmp1; + + for (i = 0; i < n; i++) + { + tmp1 = 0; + for (j = 0; j < M; j++) + { + tmp1 += uc[i] * ((int)(j << N) / M); + } + ub[i] = tmp1; + } +} + +int main() +{ + int uc[16], ub[16]; + check_vect (); + foo (16, uc, ub); + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-11a.c b/gcc/testsuite/gcc.dg/vect/vect-11a.c index 84e2a8b36cd..75b1035df4d 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-11a.c +++ b/gcc/testsuite/gcc.dg/vect/vect-11a.c @@ -9,15 +9,18 @@ extern void abort (void); __attribute__ ((noinline)) void u () { - unsigned int A[4] = {0x08000000,0xffffffff,0xff0000ff,0xf0000001}; - unsigned int B[4] = {0x08000000,0x08000001,0xff0000ff,0xf0000001}; - unsigned int Answer[4] = {0,0xf7ffffff,0x0200fe01,0xe0000001}; - unsigned int C[4]; + unsigned int A[8] = {0x08000000,0xffffffff,0xff0000ff,0xf0000001, + 0x08000000,0xffffffff,0xff0000ff,0xf0000001}; + unsigned int B[8] = {0x08000000,0x08000001,0xff0000ff,0xf0000001, + 0x08000000,0x08000001,0xff0000ff,0xf0000001}; + unsigned int Answer[8] = {0,0xf7ffffff,0x0200fe01,0xe0000001, + 0,0xf7ffffff,0x0200fe01,0xe0000001}; + unsigned int C[8]; int i, j; - for (i=0; i<4; i++) + for (i=0; i<8; i++) C[i] = A[i] * B[i]; - for (i=0; i<4; i++) + for (i=0; i<8; i++) if (C[i] != Answer[i]) abort (); } @@ -25,15 +28,18 @@ void u () __attribute__ ((noinline)) void s() { - signed int A[4] = {0x08000000,0xffffffff,0xff0000ff,0xf0000001}; - signed int B[4] = {0x08000000,0x08000001,0xff0000ff,0xf0000001}; - signed int Answer[4] = {0,0xf7ffffff,0x0200fe01, 0xe0000001}; - signed int C[4]; + signed int A[8] = {0x08000000,0xffffffff,0xff0000ff,0xf0000001, + 0x08000000,0xffffffff,0xff0000ff,0xf0000001}; + signed int B[8] = {0x08000000,0x08000001,0xff0000ff,0xf0000001, + 0x08000000,0x08000001,0xff0000ff,0xf0000001}; + signed int Answer[8] = {0,0xf7ffffff,0x0200fe01, 0xe0000001, + 0,0xf7ffffff,0x0200fe01, 0xe0000001}; + signed int C[8]; int i, j; - for (i=0; i<4; i++) + for (i=0; i<8; i++) C[i] = A[i] * B[i]; - for (i=0; i<4; i++) + for (i=0; i<8; i++) if (C[i] != Answer[i]) abort (); } diff --git a/gcc/testsuite/gcc.dg/vect/vect-66.c b/gcc/testsuite/gcc.dg/vect/vect-66.c index 4f93bc05d67..a332fa024b9 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-66.c +++ b/gcc/testsuite/gcc.dg/vect/vect-66.c @@ -3,20 +3,18 @@ #include #include "tree-vect.h" -#define N 16 +#define N 8 __attribute__ ((noinline)) -int main1 () +void main1 () { int i, j; - int ib[6] = {0,3,6,9,12,15}; - int ia[8][5][6]; - int ic[16][16][5][6]; + int ia[8][5][N+2]; /* Multidimensional array. Aligned. */ for (i = 0; i < 16; i++) { - for (j = 0; j < 4; j++) + for (j = 0; j < N; j++) { ia[2][6][j] = 5; } @@ -25,33 +23,48 @@ int main1 () /* check results: */ for (i = 0; i < 16; i++) { - for (j = 0; j < 4; j++) + for (j = 0; j < N; j++) { if (ia[2][6][j] != 5) abort(); } } +} + +__attribute__ ((noinline)) +void main2 () +{ + int i, j; + int ia[8][5][N+2]; + /* Multidimensional array. Aligned. */ for (i = 0; i < 16; i++) { - for (j = 0; j < 4; j++) + for (j = 0; j < N; j++) ia[3][6][j+2] = 5; } /* check results: */ for (i = 0; i < 16; i++) { - for (j = 2; j < 6; j++) + for (j = 2; j < N+2; j++) { if (ia[3][6][j] != 5) abort(); } } +} + +__attribute__ ((noinline)) +void main3 () +{ + int i, j; + int ic[16][16][5][N+2]; /* Multidimensional array. Not aligned. */ for (i = 0; i < 16; i++) { - for (j = 0; j < 4; j++) + for (j = 0; j < N; j++) { ic[2][1][6][j+1] = 5; } @@ -60,22 +73,24 @@ int main1 () /* check results: */ for (i = 0; i < 16; i++) { - for (j = 0; j < 4; j++) + for (j = 0; j < N; j++) { if (ic[2][1][6][j+1] != 5) abort(); } } - - return 0; } int main (void) { check_vect (); - return main1 (); + main1 (); + main2 (); + main3 (); + + return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-76.c b/gcc/testsuite/gcc.dg/vect/vect-76.c index 908ccd249c3..8a2f7514e7e 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-76.c +++ b/gcc/testsuite/gcc.dg/vect/vect-76.c @@ -3,7 +3,7 @@ #include #include "tree-vect.h" -#define N 8 +#define N 12 #define OFF 4 /* Check handling of accesses for which the "initial condition" - diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-1.c b/gcc/testsuite/gcc.dg/vect/vect-outer-1.c index 79a2ba41a36..f1892488603 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-outer-1.c +++ b/gcc/testsuite/gcc.dg/vect/vect-outer-1.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ -#define N 40 +#define N 64 signed short image[N][N] __attribute__ ((__aligned__(16))); signed short block[N][N] __attribute__ ((__aligned__(16))); signed short out[N] __attribute__ ((__aligned__(16))); diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c b/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c index 2d6eb06d0c8..2d903937f91 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c +++ b/gcc/testsuite/gcc.dg/vect/vect-outer-1a.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ -#define N 40 +#define N 64 signed short image[N][N] __attribute__ ((__aligned__(16))); signed short block[N][N] __attribute__ ((__aligned__(16))); diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-6.c b/gcc/testsuite/gcc.dg/vect/vect-outer-6.c index 56825449c12..6c4fa1ecb08 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-outer-6.c +++ b/gcc/testsuite/gcc.dg/vect/vect-outer-6.c @@ -20,10 +20,10 @@ int main1 () int i, j; - for (i = 0; i < 8; i++) + for (i = 0; i < N; i++) { s = 0; - for (j=0; j<8; j+=4) + for (j = 0; j < N; j += 4) s += C[j]; A[i] = s; } @@ -49,10 +49,10 @@ int main () main1(); /* check results: */ - for (i = 0; i < 8; i++) + for (i = 0; i < N; i++) { s = 0; - for (j=0; j<8; j+=4) + for (j = 0; j < N; j += 4) s += C[j]; if (A[i] != s) abort (); diff --git a/gcc/testsuite/gcc.dg/vect/vect-shift-1.c b/gcc/testsuite/gcc.dg/vect/vect-shift-1.c index ca6e07dc703..346fffee102 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-shift-1.c +++ b/gcc/testsuite/gcc.dg/vect/vect-shift-1.c @@ -4,12 +4,14 @@ #include #include "tree-vect.h" -#define N 4 +#define N 8 int main () { - unsigned int A[N] = { 0x08000000, 0x08000001, 0x0ff0000ff, 0xf0000001 }; - unsigned int B[N] = { 0x01000000, 0x01000000, 0x01fe0001f, 0x1e000000 }; + unsigned int A[N] = { 0x08000000, 0x08000001, 0x0ff0000ff, 0xf0000001, + 0x08000000, 0x08000001, 0x0ff0000ff, 0xf0000001 }; + unsigned int B[N] = { 0x01000000, 0x01000000, 0x01fe0001f, 0x1e000000, + 0x01000000, 0x01000000, 0x01fe0001f, 0x1e000000 }; int i; check_vect (); diff --git a/gcc/testsuite/gcc.target/i386/vectorize1.c b/gcc/testsuite/gcc.target/i386/vectorize1.c index bd5473fffca..7a5023aa905 100644 --- a/gcc/testsuite/gcc.target/i386/vectorize1.c +++ b/gcc/testsuite/gcc.target/i386/vectorize1.c @@ -4,14 +4,14 @@ extern char lanip[3][40]; typedef struct { - char *t[4]; + char *t[8]; }tx_typ; int set_names (void) { static tx_typ tt1; int ln; - for (ln = 0; ln < 4; ln++) + for (ln = 0; ln < 8; ln++) tt1.t[ln] = lanip[1]; } diff --git a/gcc/tree-flow.h b/gcc/tree-flow.h index 74cb073277b..e36493dae42 100644 --- a/gcc/tree-flow.h +++ b/gcc/tree-flow.h @@ -1018,7 +1018,7 @@ basic_block *blocks_in_phiopt_order (void); void tree_ssa_lim (void); unsigned int tree_ssa_unswitch_loops (void); unsigned int canonicalize_induction_variables (void); -unsigned int tree_unroll_loops_completely (bool); +unsigned int tree_unroll_loops_completely (bool, bool); unsigned int tree_ssa_prefetch_arrays (void); unsigned int remove_empty_loops (void); void tree_ssa_iv_optimize (void); diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index b82e62024cd..f391c52c374 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -290,6 +290,7 @@ extern struct gimple_opt_pass pass_if_conversion; extern struct gimple_opt_pass pass_loop_distribution; extern struct gimple_opt_pass pass_vectorize; extern struct gimple_opt_pass pass_complete_unroll; +extern struct gimple_opt_pass pass_complete_unrolli; extern struct gimple_opt_pass pass_parallelize_loops; extern struct gimple_opt_pass pass_loop_prefetch; extern struct gimple_opt_pass pass_iv_optimize; diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c index 5e9e2482765..1472b0d237d 100644 --- a/gcc/tree-ssa-loop-ivcanon.c +++ b/gcc/tree-ssa-loop-ivcanon.c @@ -187,23 +187,20 @@ try_unroll_loop_completely (struct loop *loop, > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)) return false; - if (ul == UL_NO_GROWTH) + unr_insns = estimated_unrolled_size (ninsns, n_unroll); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " Loop size: %d\n", (int) ninsns); + fprintf (dump_file, " Estimated size after unrolling: %d\n", + (int) unr_insns); + } + + if (ul == UL_NO_GROWTH + && unr_insns > ninsns) { - unr_insns = estimated_unrolled_size (ninsns, n_unroll); - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, " Loop size: %d\n", (int) ninsns); - fprintf (dump_file, " Estimated size after unrolling: %d\n", - (int) unr_insns); - } - - if (unr_insns > ninsns) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d:\n", loop->num); - return false; - } + fprintf (dump_file, "Not unrolling loop %d.\n", loop->num); + return false; } } @@ -339,30 +336,45 @@ canonicalize_induction_variables (void) size of the code does not increase. */ unsigned int -tree_unroll_loops_completely (bool may_increase_size) +tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer) { loop_iterator li; struct loop *loop; - bool changed = false; + bool changed; enum unroll_level ul; - FOR_EACH_LOOP (li, loop, 0) + do { - if (may_increase_size && maybe_hot_bb_p (loop->header)) - ul = UL_ALL; - else - ul = UL_NO_GROWTH; - changed |= canonicalize_loop_induction_variables (loop, - false, ul, - !flag_tree_loop_ivcanon); - } + changed = false; - /* Clean up the information about numbers of iterations, since complete - unrolling might have invalidated it. */ - scev_reset (); + FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST) + { + if (may_increase_size && maybe_hot_bb_p (loop->header) + /* Unroll outermost loops only if asked to do so or they do + not cause code growth. */ + && (unroll_outer + || loop_outer (loop_outer (loop)))) + ul = UL_ALL; + else + ul = UL_NO_GROWTH; + changed |= canonicalize_loop_induction_variables + (loop, false, ul, !flag_tree_loop_ivcanon); + } + + if (changed) + { + /* This will take care of removing completely unrolled loops + from the loop structures so we can continue unrolling now + innermost loops. */ + cleanup_tree_cfg (); + + /* Clean up the information about numbers of iterations, since + complete unrolling might have invalidated it. */ + scev_reset (); + } + } + while (changed); - if (changed) - return TODO_cleanup_cfg; return 0; } diff --git a/gcc/tree-ssa-loop-manip.c b/gcc/tree-ssa-loop-manip.c index 835b0062d8a..1ff6254c0b1 100644 --- a/gcc/tree-ssa-loop-manip.c +++ b/gcc/tree-ssa-loop-manip.c @@ -248,10 +248,15 @@ find_uses_to_rename_use (basic_block bb, tree use, bitmap *use_blocks, return; def_loop = def_bb->loop_father; - /* If the definition is not inside loop, it is not interesting. */ + /* If the definition is not inside a loop, it is not interesting. */ if (!loop_outer (def_loop)) return; + /* If the use is not outside of the loop it is defined in, it is not + interesting. */ + if (flow_bb_inside_loop_p (def_loop, bb)) + return; + if (!use_blocks[ver]) use_blocks[ver] = BITMAP_ALLOC (NULL); bitmap_set_bit (use_blocks[ver], bb->index); @@ -592,7 +597,8 @@ tree_duplicate_loop_to_header_edge (struct loop *loop, edge e, return false; #ifdef ENABLE_CHECKING - verify_loop_closed_ssa (); + if (loops_state_satisfies_p (LOOP_CLOSED_SSA)) + verify_loop_closed_ssa (); #endif first_new_block = last_basic_block; diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c index 639fb10a393..52f5a7f58f8 100644 --- a/gcc/tree-ssa-loop.c +++ b/gcc/tree-ssa-loop.c @@ -223,13 +223,16 @@ struct gimple_opt_pass pass_predcom = static unsigned int tree_vectorize (void) { + if (number_of_loops () <= 1) + return 0; + return vectorize_loops (); } static bool gate_tree_vectorize (void) { - return flag_tree_vectorize && number_of_loops () > 1; + return flag_tree_vectorize; } struct gimple_opt_pass pass_vectorize = @@ -466,7 +469,7 @@ tree_complete_unroll (void) return tree_unroll_loops_completely (flag_unroll_loops || flag_peel_loops - || optimize >= 3); + || optimize >= 3, true); } static bool @@ -495,6 +498,53 @@ struct gimple_opt_pass pass_complete_unroll = } }; +/* Complete unrolling of inner loops. */ + +static unsigned int +tree_complete_unroll_inner (void) +{ + unsigned ret = 0; + + loop_optimizer_init (LOOPS_NORMAL + | LOOPS_HAVE_RECORDED_EXITS); + if (number_of_loops () > 1) + { + scev_initialize (); + ret = tree_unroll_loops_completely (optimize >= 3, false); + free_numbers_of_iterations_estimates (); + scev_finalize (); + } + loop_optimizer_finalize (); + + return ret; +} + +static bool +gate_tree_complete_unroll_inner (void) +{ + return optimize >= 2; +} + +struct gimple_opt_pass pass_complete_unrolli = +{ + { + GIMPLE_PASS, + "cunrolli", /* name */ + gate_tree_complete_unroll_inner, /* gate */ + tree_complete_unroll_inner, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_COMPLETE_UNROLL, /* tv_id */ + PROP_cfg | PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func | TODO_verify_loops + | TODO_ggc_collect /* todo_flags_finish */ + } +}; + /* Parallelization. */ static bool -- 2.30.2