From 06066f92aaea9e9b0fff53a693b300bb9aa1121a Mon Sep 17 00:00:00 2001 From: Ira Rosen Date: Sun, 12 Jul 2009 07:09:07 +0000 Subject: [PATCH] tree-parloops.c (loop_parallel_p): Call vect_is_simple_reduction with additional argument. * tree-parloops.c (loop_parallel_p): Call vect_is_simple_reduction with additional argument. * tree-vectorizer.h (enum vect_def_type): Add vect_double_reduction_def. (vect_is_simple_reduction): Add argument. * tree-vect-loop.c (vect_determine_vectorization_factor): Fix indentation. (vect_analyze_scalar_cycles_1): Detect double reduction. Call vect_is_simple_reduction with additional argument. (vect_analyze_loop_operations): Handle exit phi nodes in case of double reduction. (reduction_code_for_scalar_code): Handle additional codes by returning ERROR_MARK for them. Fix comment and indentation. (vect_is_simple_reduction): Fix comment, add argument to specify double reduction. Detect double reduction. (get_initial_def_for_induction): Fix indentation. (get_initial_def_for_reduction): Fix comment and indentation. Handle double reduction. Create initial definitions that do not require adjustment if ADJUSTMENT_DEF is NULL. Handle additional cases. (vect_create_epilog_for_reduction): Fix comment, add argument to handle double reduction. Use PLUS_EXPR in case of MINUS_EXPR in epilogue result extraction. Create double reduction phi node and replace relevant uses. (vectorizable_reduction): Call vect_is_simple_reduction with additional argument. Fix indentation. Update epilogue code treatment according to the changes in reduction_code_for_scalar_code. Check for double reduction. Call vect_create_epilog_for_reduction with additional argument. * tree-vect-stmts.c (process_use): Handle double reduction, update documentation. (vect_mark_stmts_to_be_vectorized): Handle double reduction. (vect_get_vec_def_for_operand): Likewise. From-SVN: r149526 --- gcc/ChangeLog | 35 ++ gcc/testsuite/ChangeLog | 8 + .../gcc.dg/vect/no-scevccp-outer-2.c | 4 +- .../gcc.dg/vect/vect-double-reduc-1.c | 56 ++ .../gcc.dg/vect/vect-double-reduc-2.c | 56 ++ .../gcc.dg/vect/vect-double-reduc-3.c | 67 +++ .../gcc.dg/vect/vect-double-reduc-4.c | 56 ++ .../gcc.dg/vect/vect-double-reduc-5.c | 58 +++ .../gcc.dg/vect/vect-double-reduc-6.c | 50 ++ .../gcc.dg/vect/vect-double-reduc-7.c | 65 +++ gcc/tree-parloops.c | 4 +- gcc/tree-vect-loop.c | 493 ++++++++++++++---- gcc/tree-vect-stmts.c | 88 ++-- gcc/tree-vectorizer.h | 3 +- 14 files changed, 893 insertions(+), 150 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-double-reduc-1.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-double-reduc-2.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-double-reduc-3.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-double-reduc-4.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-double-reduc-6.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-double-reduc-7.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c8a39500b80..edeb0492b36 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,38 @@ +2009-07-12 Ira Rosen + + * tree-parloops.c (loop_parallel_p): Call vect_is_simple_reduction + with additional argument. + * tree-vectorizer.h (enum vect_def_type): Add + vect_double_reduction_def. + (vect_is_simple_reduction): Add argument. + * tree-vect-loop.c (vect_determine_vectorization_factor): Fix + indentation. + (vect_analyze_scalar_cycles_1): Detect double reduction. Call + vect_is_simple_reduction with additional argument. + (vect_analyze_loop_operations): Handle exit phi nodes in case of + double reduction. + (reduction_code_for_scalar_code): Handle additional codes by + returning ERROR_MARK for them. Fix comment and indentation. + (vect_is_simple_reduction): Fix comment, add argument to specify + double reduction. Detect double reduction. + (get_initial_def_for_induction): Fix indentation. + (get_initial_def_for_reduction): Fix comment and indentation. + Handle double reduction. Create initial definitions that do not + require adjustment if ADJUSTMENT_DEF is NULL. Handle additional cases. + (vect_create_epilog_for_reduction): Fix comment, add argument to + handle double reduction. Use PLUS_EXPR in case of MINUS_EXPR in + epilogue result extraction. Create double reduction phi node and + replace relevant uses. + (vectorizable_reduction): Call vect_is_simple_reduction with + additional argument. Fix indentation. Update epilogue code treatment + according to the changes in reduction_code_for_scalar_code. Check + for double reduction. Call vect_create_epilog_for_reduction with + additional argument. + * tree-vect-stmts.c (process_use): Handle double reduction, update + documentation. + (vect_mark_stmts_to_be_vectorized): Handle double reduction. + (vect_get_vec_def_for_operand): Likewise. + 2009-07-12 Danny Smith * config/i386/winnt.c (i386_pe_determine_dllexport_p): Don't diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 06d7675748b..7df599ed587 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2009-07-12 Ira Rosen + + * gcc.dg/vect/no-scevccp-outer-2.c: Expect to vectorize. + * gcc.dg/vect/vect-double-reduc-1.c, gcc.dg/vect/vect-double-reduc-2.c, + gcc.dg/vect/vect-double-reduc-3.c, gcc.dg/vect/vect-double-reduc-4.c, + gcc.dg/vect/vect-double-reduc-5.c, gcc.dg/vect/vect-double-reduc-6.c, + gcc.dg/vect/vect-double-reduc-7.c: New tests. + 2009-07-12 Hans-Peter Nilsson * gfortran.dg/f2003_io_4.f03, gfortran.dg/read_size_noadvance.f90, diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-2.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-2.c index a9ac09c4a2b..13b37883c2e 100644 --- a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-2.c +++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-2.c @@ -1,4 +1,6 @@ /* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ + #define N 40 int @@ -14,5 +16,5 @@ foo (){ return diff; } -/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-1.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-1.c new file mode 100644 index 00000000000..e3358428a48 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-1.c @@ -0,0 +1,56 @@ +/* { dg-require-effective-target vect_int_mult } */ + +#include +#include +#include "tree-vect.h" + +#define K 32 + +int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +int coeff[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +int out[K]; +int check_result[K] = {642816,660736,678656,696576,714496,732416,750336,768256,786176,804096,822016,839936,857856,875776,893696,911616,929536,947456,965376,983296,1001216,1019136,1037056,1054976,1072896,1090816,1108736,1126656,1144576,1162496,1180416,1198336}; + +__attribute__ ((noinline)) void +foo () +{ + int sum = 0, i, j, k; + + for (k = 0; k < K; k++) + { + sum = 0; + for (j = 0; j < K; j++) + for (i = 0; i < K; i++) + sum += in[i+k][j] * coeff[i][j]; + + out[k] = sum; + } +} + +int main () +{ + int i, j, k; + + check_vect (); + + for (j = 0; j < K; j++) + { + for (i = 0; i < 2*K; i++) + in[i][j] = i+j; + + for (i = 0; i < K; i++) + coeff[i][j] = i+2; + } + + foo(); + + for (k = 0; k < K; k++) + if (out[k] != check_result[k]) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-2.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-2.c new file mode 100644 index 00000000000..be469be02de --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-2.c @@ -0,0 +1,56 @@ +/* { dg-require-effective-target vect_int_mult } */ + +#include +#include +#include "tree-vect.h" + +#define K 32 + +int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +int coeff[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +int out[K]; +int check_result[K] = {357184,339264,321344,303424,285504,267584,249664,231744,213824,195904,177984,160064,142144,124224,106304,88384,70464,52544,34624,16704,-1216,-19136,-37056,-54976,-72896,-90816,-108736,-126656,-144576,-162496,-180416,-198336}; + +__attribute__ ((noinline)) void +foo () +{ + int res = 0, i, j, k; + + for (k = 0; k < K; k++) + { + res = 1000000; + for (j = 0; j < K; j++) + for (i = 0; i < K; i++) + res -= in[i+k][j] * coeff[i][j]; + + out[k] = res; + } +} + +int main () +{ + int i, j, k; + + check_vect (); + + for (j = 0; j < K; j++) + { + for (i = 0; i < 2*K; i++) + in[i][j] = i+j; + + for (i = 0; i < K; i++) + coeff[i][j] = i+2; + } + + foo(); + + for (k = 0; k < K; k++) + if (out[k] != check_result[k]) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-3.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-3.c new file mode 100644 index 00000000000..87b5a04099e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-3.c @@ -0,0 +1,67 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include +#include "tree-vect.h" + +#define K 32 + +int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +int coeff[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +int out_max[K], out_min[K]; +int check_max[K] = {62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93}; +int check_min[K] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; + +__attribute__ ((noinline)) void +foo (int x, int y) +{ + int max, min, i, j, k; + + for (k = 0; k < K; k++) + { + max = x; + min = y; + for (j = 0; j < K; j++) + for (i = 0; i < K; i++) + { + max = max < in[i+k][j] ? in[i+k][j] : max; + min = min > in[i+k][j] ? in[i+k][j] : min; + } + out_max[k] = max; + out_min[k] = min; + } +} + +int main () +{ + int i, j, k; + + check_vect (); + + for (j = 0; j < K; j++) + { + for (i = 0; i < 2*K; i++) + in[i][j] = i+j; + + for (i = 0; i < K; i++) + coeff[i][j] = i+2; + } + + foo(0, 0); + + for (k = 0; k < K; k++) + if (out_max[k] != check_max[k] || out_min[k] != 0) + abort (); + + foo(100, 45); + + for (k = 0; k < K; k++) + if (out_min[k] != check_min[k] || out_max[k] != 100) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_int_max } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-4.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-4.c new file mode 100644 index 00000000000..90e0da70a20 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-4.c @@ -0,0 +1,56 @@ +/* { dg-require-effective-target vect_int_mult } */ + +#include +#include +#include "tree-vect.h" + +#define K 32 + +int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +int coeff[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +int out[K]; +int check_result[K] = {652816,670736,688656,706576,724496,742416,760336,778256,796176,814096,832016,849936,867856,885776,903696,921616,939536,957456,975376,993296,1011216,1029136,1047056,1064976,1082896,1100816,1118736,1136656,1154576,1172496,1190416,1208336}; + +__attribute__ ((noinline)) void +foo () +{ + int sum = 0, i, j, k; + + for (k = 0; k < K; k++) + { + sum = 10000; + for (j = 0; j < K; j++) + for (i = 0; i < K; i++) + sum += in[i+k][j] * coeff[i][j]; + + out[k] = sum; + } +} + +int main () +{ + int i, j, k; + + check_vect (); + + for (j = 0; j < K; j++) + { + for (i = 0; i < 2*K; i++) + in[i][j] = i+j; + + for (i = 0; i < K; i++) + coeff[i][j] = i+2; + } + + foo(); + + for (k = 0; k < K; k++) + if (out[k] != check_result[k]) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c new file mode 100644 index 00000000000..f624d86502f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c @@ -0,0 +1,58 @@ +/* { dg-require-effective-target vect_int_mult } */ + +#include +#include +#include "tree-vect.h" + +#define K 32 + +signed short in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +signed short coeff[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +int out[K]; +int check_result[K] = {642816,660736,678656,696576,714496,732416,750336,768256,786176,804096,822016,839936,857856,875776,893696,911616,929536,947456,965376,983296,1001216,1019136,1037056,1054976,1072896,1090816,1108736,1126656,1144576,1162496,1180416,1198336}; + +__attribute__ ((noinline)) void +foo () +{ + int sum = 0, i, j, k; + + for (k = 0; k < K; k++) + { + sum = 0; + for (j = 0; j < K; j++) + for (i = 0; i < K; i++) + sum += in[i+k][j] * coeff[i][j]; + + out[k] = sum; + } +} + +int main () +{ + int i, j, k; + + check_vect (); + + for (j = 0; j < K; j++) + { + for (i = 0; i < 2*K; i++) + in[i][j] = i+j; + + for (i = 0; i < K; i++) + coeff[i][j] = i+2; + } + + foo(); + + for (k = 0; k < K; k++) + if (out[k] != check_result[k]) + abort (); + + return 0; +} + +/* Vectorization of loops with multiple types and double reduction is not + supported yet. */ +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-6.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-6.c new file mode 100644 index 00000000000..f52b32bfad9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-6.c @@ -0,0 +1,50 @@ +/* { dg-require-effective-target vect_int_mult } */ + +#include +#include +#include "tree-vect.h" + +#define K 4 + +int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +int out[K]; +int check_result[K] = {0,16,256,4096}; + +__attribute__ ((noinline)) void +foo () +{ + int sum; + int i, j, k; + + for (k = 0; k < K; k++) + { + sum = 1; + for (j = 0; j < K; j++) + for (i = 0; i < K; i++) + sum *= in[i+k][j]; + out[k] = sum; + } +} + +int main () +{ + int i, j, k; + + check_vect (); + + for (i = 0; i < 2*K; i++) + for (j = 0; j < K; j++) + in[i][j] = (i+2)/3; + + foo(); + + for (k = 0; k < K; k++) + if (out[k] != check_result[k]) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-7.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-7.c new file mode 100644 index 00000000000..9e7ced7f927 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-7.c @@ -0,0 +1,65 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include +#include "tree-vect.h" + +#define K 32 + +int in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); +int out[K]; +int check_result[K] = {63,63,191,191,127,127,191,191,127,127,191,191,127,127,191,191,127,127,191,191,127,127,191,191,127,127,191,191,127,127,191,191}; + +__attribute__ ((noinline)) void +foo () +{ + int res_or, res_and, res_xor, i, j, k; + + for (k = 0; k < K; k++) + { + res_or = 0; + for (j = 0; j < K; j++) + for (i = 0; i < K; i++) + res_or = res_or | in[i+k][j]; + + res_and = 1; + for (j = 0; j < K; j++) + for (i = 0; i < K; i++) + res_and = res_and & in[i+k][j]; + + res_xor = 0; + for (j = 0; j < K; j++) + for (i = 0; i < K; i++) + res_xor = res_xor ^ in[i+k][j]; + + out[k] = res_or + res_and + res_xor; + } +} + +int main () +{ + int i, j, k; + + check_vect (); + + for (j = 0; j < K; j++) + { + for (i = 0; i < 2*K; i++) + in[i][j] = i+j; + + for (i = 0; i < K; i++) + out[i] = i+j; + } + + foo(); + + for (k = 0; k < K; k++) + if (out[k] != check_result[k]) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 3 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c index 5f11fc77a1b..28c96a26f59 100644 --- a/gcc/tree-parloops.c +++ b/gcc/tree-parloops.c @@ -284,13 +284,15 @@ loop_parallel_p (struct loop *loop, htab_t reduction_list, { gimple phi = gsi_stmt (gsi); gimple reduc_stmt = NULL; + bool dummy; /* ??? TODO: Change this into a generic function that recognizes reductions. */ if (!is_gimple_reg (PHI_RESULT (phi))) continue; if (simple_loop_info) - reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true); + reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true, + &dummy); /* Create a reduction_info struct, initialize it and insert it to the reduction list. */ diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index a37e3c00f72..c96fb04a814 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -291,8 +291,7 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) } else { - - gcc_assert (! STMT_VINFO_DATA_REF (stmt_info) + gcc_assert (!STMT_VINFO_DATA_REF (stmt_info) && !is_pattern_stmt_p (stmt_info)); scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, @@ -410,6 +409,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop) tree dumy; VEC(gimple,heap) *worklist = VEC_alloc (gimple, heap, 64); gimple_stmt_iterator gsi; + bool double_reduc; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_analyze_scalar_cycles ==="); @@ -477,26 +477,39 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop) gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type); nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo)); - reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi, !nested_cycle); + reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi, !nested_cycle, + &double_reduc); if (reduc_stmt) { - if (nested_cycle) + if (double_reduc) { if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "Detected vectorizable nested cycle."); + fprintf (vect_dump, "Detected double reduction."); - STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle; + STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def; STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) = - vect_nested_cycle; + vect_double_reduction_def; } - else + else { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "Detected reduction."); + if (nested_cycle) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "Detected vectorizable nested cycle."); - STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def; - STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) = - vect_reduction_def; + STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle; + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) = + vect_nested_cycle; + } + else + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "Detected reduction."); + + STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def; + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) = + vect_reduction_def; + } } } else @@ -1111,10 +1124,13 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo) /* inner-loop loop-closed exit phi in outer-loop vectorization (i.e. a phi in the tail of the outer-loop). FORNOW: we currently don't support the case that these phis - are not used in the outerloop, cause this case requires - to actually do something here. */ - if (!STMT_VINFO_RELEVANT_P (stmt_info) - || STMT_VINFO_LIVE_P (stmt_info)) + are not used in the outerloop (unless it is double reduction, + i.e., this phi is vect_reduction_def), cause this case + requires to actually do something here. */ + if ((!STMT_VINFO_RELEVANT_P (stmt_info) + || STMT_VINFO_LIVE_P (stmt_info)) + && STMT_VINFO_DEF_TYPE (stmt_info) + != vect_double_reduction_def) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, @@ -1466,31 +1482,40 @@ vect_analyze_loop (struct loop *loop) Output: REDUC_CODE - the corresponding tree-code to be used to reduce the vector of partial results into a single scalar result (which - will also reside in a vector). + will also reside in a vector) or ERROR_MARK if the operation is + a supported reduction operation, but does not have such tree-code. - Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise. */ + Return FALSE if CODE currently cannot be vectorized as reduction. */ static bool reduction_code_for_scalar_code (enum tree_code code, enum tree_code *reduc_code) { switch (code) - { - case MAX_EXPR: - *reduc_code = REDUC_MAX_EXPR; - return true; - - case MIN_EXPR: - *reduc_code = REDUC_MIN_EXPR; - return true; - - case PLUS_EXPR: - *reduc_code = REDUC_PLUS_EXPR; - return true; - - default: - return false; - } + { + case MAX_EXPR: + *reduc_code = REDUC_MAX_EXPR; + return true; + + case MIN_EXPR: + *reduc_code = REDUC_MIN_EXPR; + return true; + + case PLUS_EXPR: + *reduc_code = REDUC_PLUS_EXPR; + return true; + + case MULT_EXPR: + case MINUS_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + case BIT_AND_EXPR: + *reduc_code = ERROR_MARK; + return true; + + default: + return false; + } } @@ -1507,7 +1532,7 @@ report_vect_op (gimple stmt, const char *msg) /* Function vect_is_simple_reduction - Detect a cross-iteration def-use cycle that represents a simple + (1) Detect a cross-iteration def-use cycle that represents a simple reduction computation. We look for the following pattern: loop_header: @@ -1524,12 +1549,20 @@ report_vect_op (gimple stmt, const char *msg) Condition 1 is tested here. Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized. - Also detect a cross-iteration def-use cycle in nested loops, i.e., nested - cycles, if CHECK_REDUCTION is false. */ + (2) Detect a cross-iteration def-use cycle in nested loops, i.e., + nested cycles, if CHECK_REDUCTION is false. + + (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double + reductions: + + a1 = phi < a0, a2 > + inner loop (def of a3) + a2 = phi < a3 > +*/ gimple vect_is_simple_reduction (loop_vec_info loop_info, gimple phi, - bool check_reduction) + bool check_reduction, bool *double_reduc) { struct loop *loop = (gimple_bb (phi))->loop_father; struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); @@ -1543,6 +1576,9 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi, tree name; imm_use_iterator imm_iter; use_operand_p use_p; + bool phi_def; + + *double_reduc = false; /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization, otherwise, we assume outer loop vectorization. */ @@ -1584,14 +1620,24 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi, return NULL; } - if (!is_gimple_assign (def_stmt)) + if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI) { if (vect_print_dump_info (REPORT_DETAILS)) print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM); return NULL; } - name = gimple_assign_lhs (def_stmt); + if (is_gimple_assign (def_stmt)) + { + name = gimple_assign_lhs (def_stmt); + phi_def = false; + } + else + { + name = PHI_RESULT (def_stmt); + phi_def = true; + } + nloop_uses = 0; FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) { @@ -1608,6 +1654,37 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi, } } + /* If DEF_STMT is a phi node itself, we expect it to have a single argument + defined in the inner loop. */ + if (phi_def) + { + op1 = PHI_ARG_DEF (def_stmt, 0); + + if (gimple_phi_num_args (def_stmt) != 1 + || TREE_CODE (op1) != SSA_NAME) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "unsupported phi node definition."); + + return NULL; + } + + def1 = SSA_NAME_DEF_STMT (op1); + if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) + && loop->inner + && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1)) + && is_gimple_assign (def1)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + report_vect_op (def_stmt, "detected double reduction: "); + + *double_reduc = true; + return def_stmt; + } + + return NULL; + } + code = gimple_assign_rhs_code (def_stmt); if (check_reduction @@ -1697,7 +1774,6 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi, return NULL; } - /* Check that one def is the reduction def, defined by PHI, the other def is either defined in the loop ("vect_internal_def"), or it's an induction (defined by a loop-header phi-node). */ @@ -2296,7 +2372,7 @@ get_initial_def_for_induction (gimple iv_phi) access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi)); gcc_assert (access_fn); ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn, - &init_expr, &step_expr); + &init_expr, &step_expr); gcc_assert (ok); pe = loop_preheader_edge (iv_loop); @@ -2306,7 +2382,8 @@ get_initial_def_for_induction (gimple iv_phi) /* iv_loop is nested in the loop to be vectorized. init_expr had already been created during vectorization of previous stmts; We obtain it from the STMT_VINFO_VEC_STMT of the defining stmt. */ - tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, loop_preheader_edge (iv_loop)); + tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, + loop_preheader_edge (iv_loop)); vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL); } else @@ -2507,18 +2584,16 @@ get_initial_def_for_induction (gimple iv_phi) vector of partial results. Option1 (adjust in epilog): Initialize the vector as follows: - add: [0,0,...,0,0] - mult: [1,1,...,1,1] - min/max: [init_val,init_val,..,init_val,init_val] - bit and/or: [init_val,init_val,..,init_val,init_val] + add/bit or/xor: [0,0,...,0,0] + mult/bit and: [1,1,...,1,1] + min/max: [init_val,init_val,..,init_val,init_val] and when necessary (e.g. add/mult case) let the caller know that it needs to adjust the result by init_val. Option2: Initialize the vector as follows: - add: [0,0,...,0,init_val] - mult: [1,1,...,1,init_val] - min/max: [init_val,init_val,...,init_val] - bit and/or: [init_val,init_val,...,init_val] + add/bit or/xor: [init_val,0,0,...,0] + mult/bit and: [init_val,1,1,...,1] + min/max: [init_val,init_val,...,init_val] and no adjustments are needed. For example, for the following code: @@ -2533,11 +2608,14 @@ get_initial_def_for_induction (gimple iv_phi) the result at the end by 'init_val'. FORNOW, we are using the 'adjust in epilog' scheme, because this way the - initialization vector is simpler (same element in all entries). + initialization vector is simpler (same element in all entries), if + ADJUSTMENT_DEF is not NULL, and Option2 otherwise. + A cost model should help decide between these two schemes. */ tree -get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def) +get_initial_def_for_reduction (gimple stmt, tree init_val, + tree *adjustment_def) { stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); @@ -2551,47 +2629,118 @@ get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def) tree t = NULL_TREE; int i; bool nested_in_vect_loop = false; + tree init_value; + REAL_VALUE_TYPE real_init_val = dconst0; + int int_init_val = 0; gcc_assert (vectype); nunits = TYPE_VECTOR_SUBPARTS (vectype); gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type) || SCALAR_FLOAT_TYPE_P (scalar_type)); + if (nested_in_vect_loop_p (loop, stmt)) nested_in_vect_loop = true; else gcc_assert (loop == (gimple_bb (stmt))->loop_father); - switch (code) - { - case WIDEN_SUM_EXPR: - case DOT_PROD_EXPR: - case PLUS_EXPR: - case MINUS_EXPR: - if (nested_in_vect_loop) - *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt, NULL); - else - *adjustment_def = init_val; - /* Create a vector of zeros for init_def. */ - if (SCALAR_FLOAT_TYPE_P (scalar_type)) - def_for_init = build_real (scalar_type, dconst0); - else - def_for_init = build_int_cst (scalar_type, 0); - - for (i = nunits - 1; i >= 0; --i) - t = tree_cons (NULL_TREE, def_for_init, t); - init_def = build_vector (vectype, t); - break; + /* In case of double reduction we only create a vector variable to be put + in the reduction phi node. The actual statement creation is done in + vect_create_epilog_for_reduction. */ + if (TREE_CODE (init_val) == SSA_NAME + && vinfo_for_stmt (SSA_NAME_DEF_STMT (init_val)) + && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SSA_NAME_DEF_STMT (init_val))) + == vect_double_reduction_def) + { + *adjustment_def = NULL; + return vect_create_destination_var (init_val, vectype); + } - case MIN_EXPR: - case MAX_EXPR: - *adjustment_def = NULL_TREE; - init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL); - break; + if (TREE_CONSTANT (init_val)) + { + if (SCALAR_FLOAT_TYPE_P (scalar_type)) + init_value = build_real (scalar_type, TREE_REAL_CST (init_val)); + else + init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val)); + } + else + init_value = init_val; - default: - gcc_unreachable (); - } + switch (code) + { + case WIDEN_SUM_EXPR: + case DOT_PROD_EXPR: + case PLUS_EXPR: + case MINUS_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + case MULT_EXPR: + case BIT_AND_EXPR: + /* ADJUSMENT_DEF is NULL when called from + vect_create_epilog_for_reduction to vectorize double reduction. */ + if (adjustment_def) + { + if (nested_in_vect_loop) + *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt, + NULL); + else + *adjustment_def = init_val; + } + + if (code == MULT_EXPR || code == BIT_AND_EXPR) + { + real_init_val = dconst1; + int_init_val = 1; + } + + if (SCALAR_FLOAT_TYPE_P (scalar_type)) + def_for_init = build_real (scalar_type, real_init_val); + else + def_for_init = build_int_cst (scalar_type, int_init_val); + + /* Create a vector of '0' or '1' except the first element. */ + for (i = nunits - 2; i >= 0; --i) + t = tree_cons (NULL_TREE, def_for_init, t); + + /* Option1: the first element is '0' or '1' as well. */ + if (adjustment_def) + { + t = tree_cons (NULL_TREE, def_for_init, t); + init_def = build_vector (vectype, t); + break; + } + + /* Option2: the first element is INIT_VAL. */ + t = tree_cons (NULL_TREE, init_value, t); + if (TREE_CONSTANT (init_val)) + init_def = build_vector (vectype, t); + else + init_def = build_constructor_from_list (vectype, t); + + break; + + case MIN_EXPR: + case MAX_EXPR: + if (adjustment_def) + { + *adjustment_def = NULL_TREE; + init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL); + break; + } + + for (i = nunits - 1; i >= 0; --i) + t = tree_cons (NULL_TREE, init_value, t); + + if (TREE_CONSTANT (init_val)) + init_def = build_vector (vectype, t); + else + init_def = build_constructor_from_list (vectype, t); + + break; + + default: + gcc_unreachable (); + } return init_def; } @@ -2613,6 +2762,7 @@ get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def) REDUCTION_PHI is the phi-node that carries the reduction computation. REDUC_INDEX is the index of the operand in the right hand side of the statement that is defined by REDUCTION_PHI. + DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled. This function: 1. Creates the reduction def-use cycle: sets the arguments for @@ -2657,14 +2807,15 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, int ncopies, enum tree_code reduc_code, gimple reduction_phi, - int reduc_index) + int reduc_index, + bool double_reduc) { stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info prev_phi_info; tree vectype; enum machine_mode mode; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL; basic_block exit_bb; tree scalar_dest; tree scalar_type; @@ -2694,6 +2845,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, if (nested_in_vect_loop_p (loop, stmt)) { + outer_loop = loop; loop = loop->inner; nested_in_vect_loop = true; } @@ -2726,7 +2878,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, the scalar def before the loop, that defines the initial value of the reduction variable. */ vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt, - &adjustment_def); + &adjustment_def); phi = reduction_phi; def = vect_def; @@ -2744,8 +2896,8 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, { fprintf (vect_dump, "transform reduction: created def-use cycle: "); print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM); - fprintf (vect_dump, "\n"); - print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0, TDF_SLIM); + fprintf (vect_dump, "\n"); + print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0, TDF_SLIM); } phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)); @@ -2831,15 +2983,25 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, bitsize = TYPE_SIZE (scalar_type); bytesize = TYPE_SIZE_UNIT (scalar_type); + /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore, + partial results are added and not subtracted. */ + if (code == MINUS_EXPR) + code = PLUS_EXPR; /* In case this is a reduction in an inner-loop while vectorizing an outer loop - we don't need to extract a single scalar result at the end of the - inner-loop. The final vector of partial results will be used in the - vectorized outer-loop, or reduced to a scalar result at the end of the - outer-loop. */ - if (nested_in_vect_loop) + inner-loop (unless it is double reduction, i.e., the use of reduction is + outside the outer-loop). The final vector of partial results will be used + in the vectorized outer-loop, or reduced to a scalar result at the end of + the outer-loop. */ + if (nested_in_vect_loop && !double_reduc) goto vect_finalize_reduction; + /* The epilogue is created for the outer-loop, i.e., for the loop being + vectorized. */ + if (double_reduc) + loop = outer_loop; + /* FORNOW */ gcc_assert (ncopies == 1); @@ -2914,6 +3076,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, bit_offset /= 2) { tree bitpos = size_int (bit_offset); + epilog_stmt = gimple_build_assign_with_ops (shift_code, vec_dest, new_temp, bitpos); new_name = make_ssa_name (vec_dest, epilog_stmt); @@ -2987,7 +3150,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, { tree rhs; - gcc_assert (!nested_in_vect_loop); + gcc_assert (!nested_in_vect_loop || double_reduc); if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "extract scalar result"); @@ -3007,6 +3170,9 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, vect_finalize_reduction: + if (double_reduc) + loop = loop->inner; + /* 2.5 Adjust the final result by the initial value of the reduction variable. (When such adjustment is not needed, then 'adjustment_def' is zero). For example, if code is PLUS we create: @@ -3016,11 +3182,6 @@ vect_finalize_reduction: { if (nested_in_vect_loop) { - /* For MINUS_EXPR we create new_temp = loop_exit_def + adjustment_def - since the initial value is [0,0,...,0]. */ - if (code == MINUS_EXPR) - code = PLUS_EXPR; - gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE); expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def); new_dest = vect_create_destination_var (scalar_dest, vectype); @@ -3055,6 +3216,7 @@ vect_finalize_reduction: VEC_quick_push (gimple, phis, exit_phi); } } + /* We expect to have found an exit_phi because of loop-closed-ssa form. */ gcc_assert (!VEC_empty (gimple, phis)); @@ -3063,12 +3225,13 @@ vect_finalize_reduction: if (nested_in_vect_loop) { stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi); + gimple vect_phi; /* FORNOW. Currently not supporting the case that an inner-loop reduction is not used in the outer-loop (but only outside the - outer-loop). */ - gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo) - && !STMT_VINFO_LIVE_P (stmt_vinfo)); + outer-loop), unless it is double reduction. */ + gcc_assert ((STMT_VINFO_RELEVANT_P (stmt_vinfo) + && !STMT_VINFO_LIVE_P (stmt_vinfo)) || double_reduc); epilog_stmt = adjustment_def ? epilog_stmt : new_phi; STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt; @@ -3078,7 +3241,88 @@ vect_finalize_reduction: if (adjustment_def) STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi)); - continue; + + if (!double_reduc + || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_double_reduction_def) + continue; + + /* Handle double reduction: + + stmt1: s1 = phi - double reduction phi (outer loop) + stmt2: s3 = phi - (regular) reduction phi (inner loop) + stmt3: s4 = use (s3) - (regular) reduction stmt (inner loop) + stmt4: s2 = phi - double reduction stmt (outer loop) + + At that point the regular reduction (stmt2 and stmt3) is already + vectorized, as well as the exit phi node, stmt4. + Here we vectorize the phi node of double reduction, stmt1, and + update all relevant statements. */ + + /* Go through all the uses of s2 to find double reduction phi node, + i.e., stmt1 above. */ + orig_name = PHI_RESULT (exit_phi); + FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name) + { + stmt_vec_info use_stmt_vinfo = vinfo_for_stmt (use_stmt); + stmt_vec_info new_phi_vinfo; + tree vect_phi_init, preheader_arg, vect_phi_res, init_def; + basic_block bb = gimple_bb (use_stmt); + gimple use; + + /* Check that USE_STMT is really double reduction phi node. */ + if (gimple_code (use_stmt) != GIMPLE_PHI + || gimple_phi_num_args (use_stmt) != 2 + || !use_stmt_vinfo + || STMT_VINFO_DEF_TYPE (use_stmt_vinfo) + != vect_double_reduction_def + || bb->loop_father != outer_loop) + continue; + + /* Create vector phi node for double reduction: + vs1 = phi + vs1 was created previously in this function by a call to + vect_get_vec_def_for_operand and is stored in vec_initial_def; + vs2 is defined by EPILOG_STMT, the vectorized EXIT_PHI; + vs0 is created here. */ + + /* Create vector phi node. */ + vect_phi = create_phi_node (vec_initial_def, bb); + new_phi_vinfo = new_stmt_vec_info (vect_phi, + loop_vec_info_for_loop (outer_loop), NULL); + set_vinfo_for_stmt (vect_phi, new_phi_vinfo); + + /* Create vs0 - initial def of the double reduction phi. */ + preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt, + loop_preheader_edge (outer_loop)); + init_def = get_initial_def_for_reduction (stmt, preheader_arg, + NULL); + vect_phi_init = vect_init_vector (use_stmt, init_def, vectype, + NULL); + + /* Update phi node arguments with vs0 and vs2. */ + add_phi_arg (vect_phi, vect_phi_init, + loop_preheader_edge (outer_loop)); + add_phi_arg (vect_phi, PHI_RESULT (epilog_stmt), + loop_latch_edge (outer_loop)); + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "created double reduction phi node: "); + print_gimple_stmt (vect_dump, vect_phi, 0, TDF_SLIM); + } + + vect_phi_res = PHI_RESULT (vect_phi); + + /* Replace the use, i.e., set the correct vs1 in the regular + reduction phi node. FORNOW, NCOPIES is always 1, so the loop + is redundant. */ + use = reduction_phi; + for (j = 0; j < ncopies; j++) + { + edge pr_edge = loop_preheader_edge (loop); + SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res); + use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use)); + } + } } /* Replace the uses: */ @@ -3087,6 +3331,7 @@ vect_finalize_reduction: FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) SET_USE (use_p, new_temp); } + VEC_free (gimple, heap, phis); } @@ -3171,6 +3416,10 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, gimple reduc_def_stmt = NULL; /* The default is that the reduction variable is the last in statement. */ int reduc_index = 2; + bool double_reduc = false, dummy; + basic_block def_bb; + struct loop * def_stmt_loop; + tree def_arg; if (nested_in_vect_loop_p (loop, stmt)) { @@ -3185,7 +3434,6 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, return false; /* 1. Is vectorizable reduction? */ - /* Not supportable if the reduction variable is used in the loop. */ if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer) return false; @@ -3300,10 +3548,11 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, if (orig_stmt) gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo, reduc_def_stmt, - !nested_cycle)); + !nested_cycle, + &dummy)); else gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, reduc_def_stmt, - !nested_cycle)); + !nested_cycle, &dummy)); if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt))) return false; @@ -3400,25 +3649,43 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, orig_code = code; } - if (nested_cycle) - epilog_reduc_code = orig_code; - else - if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code)) - return false; + if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code)) + return false; - reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype, optab_default); + reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype, + optab_default); if (!reduc_optab) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "no optab for reduction."); epilog_reduc_code = ERROR_MARK; } - if (optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing) + + if (reduc_optab + && optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "reduc op not supported by target."); epilog_reduc_code = ERROR_MARK; } + + def_bb = gimple_bb (reduc_def_stmt); + def_stmt_loop = def_bb->loop_father; + def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt, + loop_preheader_edge (def_stmt_loop)); + if (TREE_CODE (def_arg) == SSA_NAME + && vinfo_for_stmt (SSA_NAME_DEF_STMT (def_arg)) + && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SSA_NAME_DEF_STMT (def_arg))) + == vect_double_reduction_def) + double_reduc = true; + + if (double_reduc && ncopies > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "multiple types in double reduction"); + + return false; + } if (!vec_stmt) /* transformation not required. */ { @@ -3560,8 +3827,10 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, epilog reduction code. */ if (!single_defuse_cycle) new_temp = gimple_assign_lhs (*vec_stmt); + vect_create_epilog_for_reduction (new_temp, stmt, epilog_copies, - epilog_reduc_code, first_phi, reduc_index); + epilog_reduc_code, first_phi, reduc_index, + double_reduc); return true; } diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 1c9415b7031..891ee1860f0 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -331,7 +331,7 @@ process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p, ... inner-loop: d = def_stmt - outer-loop-tail-bb: + outer-loop-tail-bb (or outer-loop-exit-bb in double reduction): stmt # use (d) */ else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father)) { @@ -341,7 +341,8 @@ process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p, switch (relevant) { case vect_unused_in_scope: - relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ? + relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def + || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ? vect_used_in_outer_by_reduction : vect_unused_in_scope; break; @@ -393,7 +394,8 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) basic_block bb; gimple phi; bool live_p; - enum vect_relevant relevant; + enum vect_relevant relevant, tmp_relevant; + enum vect_def_type def_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ==="); @@ -465,49 +467,64 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) identify stmts that are used solely by a reduction, and therefore the order of the results that they produce does not have to be kept. */ - if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) + def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo); + tmp_relevant = relevant; + switch (def_type) { - enum vect_relevant tmp_relevant = relevant; - switch (tmp_relevant) - { - case vect_unused_in_scope: - gcc_assert (gimple_code (stmt) != GIMPLE_PHI); - relevant = vect_used_by_reduction; - break; + case vect_reduction_def: + switch (tmp_relevant) + { + case vect_unused_in_scope: + relevant = vect_used_by_reduction; + break; - case vect_used_by_reduction: - if (gimple_code (stmt) == GIMPLE_PHI) - break; - /* fall through */ + case vect_used_by_reduction: + if (gimple_code (stmt) == GIMPLE_PHI) + break; + /* fall through */ - default: - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "unsupported use of reduction."); - VEC_free (gimple, heap, worklist); - return false; - } + default: + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "unsupported use of reduction."); - live_p = false; - } - else if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) - { - enum vect_relevant tmp_relevant = relevant; - switch (tmp_relevant) - { - case vect_unused_in_scope: - case vect_used_in_outer_by_reduction: - case vect_used_in_outer: - break; + VEC_free (gimple, heap, worklist); + return false; + } - default: + live_p = false; + break; + + case vect_nested_cycle: + if (tmp_relevant != vect_unused_in_scope + && tmp_relevant != vect_used_in_outer_by_reduction + && tmp_relevant != vect_used_in_outer) + { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "unsupported use of nested cycle."); VEC_free (gimple, heap, worklist); return false; - } + } + + live_p = false; + break; + + case vect_double_reduction_def: + if (tmp_relevant != vect_unused_in_scope + && tmp_relevant != vect_used_by_reduction) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "unsupported use of double reduction."); + + VEC_free (gimple, heap, worklist); + return false; + } + + live_p = false; + break; - live_p = false; + default: + break; } FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) @@ -974,6 +991,7 @@ vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def) /* Case 4: operand is defined by a loop header phi - reduction */ case vect_reduction_def: + case vect_double_reduction_def: case vect_nested_cycle: { struct loop *loop; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 05f5e4783f7..c7dab10c13f 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -61,6 +61,7 @@ enum vect_def_type { vect_internal_def, vect_induction_def, vect_reduction_def, + vect_double_reduction_def, vect_nested_cycle, vect_unknown_def_type }; @@ -822,7 +823,7 @@ extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *, /* In tree-vect-loop.c. */ /* FORNOW: Used in tree-parloops.c. */ extern void destroy_loop_vec_info (loop_vec_info, bool); -extern gimple vect_is_simple_reduction (loop_vec_info, gimple, bool); +extern gimple vect_is_simple_reduction (loop_vec_info, gimple, bool, bool *); /* Drive for loop analysis stage. */ extern loop_vec_info vect_analyze_loop (struct loop *); /* Drive for loop transformation stage. */ -- 2.30.2