From 37b141851078b5119156780c2d897639d483625b Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 9 Nov 2016 08:19:05 +0000 Subject: [PATCH] re PR target/78007 (Important loop from 482.sphinx3 is not vectorized) 2016-11-09 Richard Biener PR tree-optimization/78007 * tree-vect-stmts.c (vectorizable_bswap): New function. (vectorizable_call): Call vectorizable_bswap for BUILT_IN_BSWAP{16,32,64} if arguments are not promoted. * gcc.dg/vect/vect-bswap32.c: Adjust. * gcc.dg/vect/vect-bswap64.c: Likewise. From-SVN: r241992 --- gcc/ChangeLog | 7 ++ gcc/testsuite/ChangeLog | 6 ++ gcc/testsuite/gcc.dg/vect/vect-bswap32.c | 4 +- gcc/testsuite/gcc.dg/vect/vect-bswap64.c | 4 +- gcc/tree-vect-stmts.c | 116 +++++++++++++++++++++++ 5 files changed, 133 insertions(+), 4 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a96c955bf07..4b15823670a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2016-11-09 Richard Biener + + PR tree-optimization/78007 + * tree-vect-stmts.c (vectorizable_bswap): New function. + (vectorizable_call): Call vectorizable_bswap for + BUILT_IN_BSWAP{16,32,64} if arguments are not promoted. + 2016-11-09 Richard Biener * tree-vect-data-refs.c (vect_compute_data_ref_alignment): diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e822e6fc24b..888e9e2f6c0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2016-11-09 Richard Biener + + PR tree-optimization/78007 + * gcc.dg/vect/vect-bswap32.c: Adjust. + * gcc.dg/vect/vect-bswap64.c: Likewise. + 2016-11-09 Kugan Vivekanandarajah * gcc.dg/ipa/vrp7.c: New test. diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap32.c b/gcc/testsuite/gcc.dg/vect/vect-bswap32.c index 17132baf57b..7f3a915ee97 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-bswap32.c +++ b/gcc/testsuite/gcc.dg/vect/vect-bswap32.c @@ -1,4 +1,4 @@ -/* { dg-require-effective-target vect_bswap } */ +/* { dg-additional-options "-msse4" { target sse4_runtime } } */ #include "tree-vect.h" @@ -42,4 +42,4 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_bswap || sse4_runtime } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap64.c b/gcc/testsuite/gcc.dg/vect/vect-bswap64.c index 745a7e73265..b9e421d1de4 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-bswap64.c +++ b/gcc/testsuite/gcc.dg/vect/vect-bswap64.c @@ -1,4 +1,4 @@ -/* { dg-require-effective-target vect_bswap } */ +/* { dg-additional-options "-msse4" { target sse4_runtime } } */ #include "tree-vect.h" @@ -42,4 +42,4 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_bswap || sse4_runtime } } } } */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index c29e73df946..b0b131d9245 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -2432,6 +2432,116 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi, return true; } +/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */ + +static bool +vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi, + gimple **vec_stmt, slp_tree slp_node, + tree vectype_in, enum vect_def_type *dt) +{ + tree op, vectype; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + unsigned ncopies, nunits; + + op = gimple_call_arg (stmt, 0); + vectype = STMT_VINFO_VECTYPE (stmt_info); + nunits = TYPE_VECTOR_SUBPARTS (vectype); + + /* Multiple types in SLP are handled by creating the appropriate number of + vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in + case of SLP. */ + if (slp_node) + ncopies = 1; + else + ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; + + gcc_assert (ncopies >= 1); + + tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in); + if (! char_vectype) + return false; + + unsigned char *elts + = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (char_vectype)); + unsigned char *elt = elts; + unsigned word_bytes = TYPE_VECTOR_SUBPARTS (char_vectype) / nunits; + for (unsigned i = 0; i < nunits; ++i) + for (unsigned j = 0; j < word_bytes; ++j) + *elt++ = (i + 1) * word_bytes - j - 1; + + if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, elts)) + return false; + + if (! vec_stmt) + { + STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ===" + "\n"); + if (! PURE_SLP_STMT (stmt_info)) + { + add_stmt_cost (stmt_info->vinfo->target_cost_data, + 1, vector_stmt, stmt_info, 0, vect_prologue); + add_stmt_cost (stmt_info->vinfo->target_cost_data, + ncopies, vec_perm, stmt_info, 0, vect_body); + } + return true; + } + + tree *telts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (char_vectype)); + for (unsigned i = 0; i < TYPE_VECTOR_SUBPARTS (char_vectype); ++i) + telts[i] = build_int_cst (char_type_node, elts[i]); + tree bswap_vconst = build_vector (char_vectype, telts); + + /* Transform. */ + vec vec_oprnds = vNULL; + gimple *new_stmt = NULL; + stmt_vec_info prev_stmt_info = NULL; + for (unsigned j = 0; j < ncopies; j++) + { + /* Handle uses. */ + if (j == 0) + vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1); + else + vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL); + + /* Arguments are ready. create the new vector stmt. */ + unsigned i; + tree vop; + FOR_EACH_VEC_ELT (vec_oprnds, i, vop) + { + tree tem = make_ssa_name (char_vectype); + new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR, + char_vectype, vop)); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + tree tem2 = make_ssa_name (char_vectype); + new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR, + tem, tem, bswap_vconst); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + tem = make_ssa_name (vectype); + new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR, + vectype, tem2)); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + if (slp_node) + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); + } + + if (slp_node) + continue; + + if (j == 0) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + + prev_stmt_info = vinfo_for_stmt (new_stmt); + } + + vec_oprnds.release (); + return true; +} + /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT in a single step. On success, store the binary pack code in @@ -2658,6 +2768,12 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, { 0, 1, 2, ... vf - 1 } vector. */ gcc_assert (nargs == 0); } + else if (modifier == NONE + && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16) + || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32) + || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64))) + return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node, + vectype_in, dt); else { if (dump_enabled_p ()) -- 2.30.2