From: Jakub Jelinek Date: Wed, 2 Oct 2019 10:18:50 +0000 (+0200) Subject: re PR tree-optimization/91940 (__builtin_bswap16 loop optimization) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9ff9a0a5e6edd8729f559bf86ca06f781c4da246;p=gcc.git re PR tree-optimization/91940 (__builtin_bswap16 loop optimization) PR tree-optimization/91940 * tree-vect-patterns.c: Include tree-vector-builder.h and vec-perm-indices.h. (vect_recog_rotate_pattern): Also handle __builtin_bswap16, either by unpromoting the argument back to uint16_t, or by converting into a rotate, or into shifts plus ior. * gcc.dg/vect/vect-bswap16.c: Add -msse4 on x86, run on all targets, expect vectorized 1 loops message on both vect_bswap and sse4_runtime targets. * gcc.dg/vect/vect-bswap16a.c: New test. From-SVN: r276442 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b7830a44794..47093ed5296 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2019-10-02 Jakub Jelinek + + PR tree-optimization/91940 + * tree-vect-patterns.c: Include tree-vector-builder.h and + vec-perm-indices.h. + (vect_recog_rotate_pattern): Also handle __builtin_bswap16, either by + unpromoting the argument back to uint16_t, or by converting into a + rotate, or into shifts plus ior. + 2019-10-02 Richard Biener * tree-vectorizer.h (stmt_vec_info_type::cycle_phi_info_type): diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 834ee454f9a..4cb43036465 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2019-10-02 Jakub Jelinek + + PR tree-optimization/91940 + * gcc.dg/vect/vect-bswap16.c: Add -msse4 on x86, run on all targets, + expect vectorized 1 loops message on both vect_bswap and sse4_runtime + targets. + * gcc.dg/vect/vect-bswap16a.c: New test. + 2019-10-02 Joseph Myers * gcc.dg/asm-scope-1.c, gcc.dg/cpp/c11-scope-1.c, diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap16.c b/gcc/testsuite/gcc.dg/vect/vect-bswap16.c index 3c98b07e425..d29b352b832 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-bswap16.c +++ b/gcc/testsuite/gcc.dg/vect/vect-bswap16.c @@ -1,4 +1,4 @@ -/* { dg-require-effective-target vect_bswap } */ +/* { dg-additional-options "-msse4" { target sse4_runtime } } */ #include "tree-vect.h" @@ -39,4 +39,4 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_bswap || sse4_runtime } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap16a.c b/gcc/testsuite/gcc.dg/vect/vect-bswap16a.c new file mode 100644 index 00000000000..730dc4e8352 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bswap16a.c @@ -0,0 +1,5 @@ +/* { dg-additional-options "-msse2 -mno-sse3" { target sse2_runtime } } */ + +#include "vect-bswap16.c" + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_shift } } } } */ diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index 4dfebbefce6..09db74bdd77 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -46,6 +46,8 @@ along with GCC; see the file COPYING3. If not see #include "cgraph.h" #include "omp-simd-clone.h" #include "predict.h" +#include "tree-vector-builder.h" +#include "vec-perm-indices.h" /* Return true if we have a useful VR_RANGE range for VAR, storing it in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */ @@ -2168,24 +2170,107 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out) enum vect_def_type dt; optab optab1, optab2; edge ext_def = NULL; + bool bswap16_p = false; - if (!is_gimple_assign (last_stmt)) - return NULL; + if (is_gimple_assign (last_stmt)) + { + rhs_code = gimple_assign_rhs_code (last_stmt); + switch (rhs_code) + { + case LROTATE_EXPR: + case RROTATE_EXPR: + break; + default: + return NULL; + } - rhs_code = gimple_assign_rhs_code (last_stmt); - switch (rhs_code) + lhs = gimple_assign_lhs (last_stmt); + oprnd0 = gimple_assign_rhs1 (last_stmt); + type = TREE_TYPE (oprnd0); + oprnd1 = gimple_assign_rhs2 (last_stmt); + } + else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16)) { - case LROTATE_EXPR: - case RROTATE_EXPR: - break; - default: - return NULL; + /* __builtin_bswap16 (x) is another form of x r>> 8. + The vectorizer has bswap support, but only if the argument isn't + promoted. */ + lhs = gimple_call_lhs (last_stmt); + oprnd0 = gimple_call_arg (last_stmt, 0); + type = TREE_TYPE (oprnd0); + if (TYPE_PRECISION (TREE_TYPE (lhs)) != 16 + || TYPE_PRECISION (type) <= 16 + || TREE_CODE (oprnd0) != SSA_NAME + || BITS_PER_UNIT != 8 + || !TYPE_UNSIGNED (TREE_TYPE (lhs))) + return NULL; + + stmt_vec_info def_stmt_info; + if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt)) + return NULL; + + if (dt != vect_internal_def) + return NULL; + + if (gimple_assign_cast_p (def_stmt)) + { + def = gimple_assign_rhs1 (def_stmt); + if (INTEGRAL_TYPE_P (TREE_TYPE (def)) + && TYPE_PRECISION (TREE_TYPE (def)) == 16) + oprnd0 = def; + } + + type = TREE_TYPE (lhs); + vectype = get_vectype_for_scalar_type (type); + if (vectype == NULL_TREE) + return NULL; + + if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype)) + { + /* The encoding uses one stepped pattern for each byte in the + 16-bit word. */ + vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3); + for (unsigned i = 0; i < 3; ++i) + for (unsigned j = 0; j < 2; ++j) + elts.quick_push ((i + 1) * 2 - j - 1); + + vec_perm_indices indices (elts, 1, + TYPE_VECTOR_SUBPARTS (char_vectype)); + if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices)) + { + /* vectorizable_bswap can handle the __builtin_bswap16 if we + undo the argument promotion. */ + if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0))) + { + def = vect_recog_temp_ssa_var (type, NULL); + def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0); + append_pattern_def_seq (stmt_vinfo, def_stmt); + oprnd0 = def; + } + + /* Pattern detected. */ + vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt); + + *type_out = vectype; + + /* Pattern supported. Create a stmt to be used to replace the + pattern, with the unpromoted argument. */ + var = vect_recog_temp_ssa_var (type, NULL); + pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt), + 1, oprnd0); + gimple_call_set_lhs (pattern_stmt, var); + gimple_call_set_fntype (as_a (pattern_stmt), + gimple_call_fntype (last_stmt)); + return pattern_stmt; + } + } + + oprnd1 = build_int_cst (integer_type_node, 8); + rhs_code = LROTATE_EXPR; + bswap16_p = true; } + else + return NULL; - lhs = gimple_assign_lhs (last_stmt); - oprnd0 = gimple_assign_rhs1 (last_stmt); - type = TREE_TYPE (oprnd0); - oprnd1 = gimple_assign_rhs2 (last_stmt); if (TREE_CODE (oprnd0) != SSA_NAME || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type) || !INTEGRAL_TYPE_P (type) @@ -2210,14 +2295,39 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out) optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector); if (optab1 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing) - return NULL; + { + use_rotate: + if (bswap16_p) + { + if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0))) + { + def = vect_recog_temp_ssa_var (type, NULL); + def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0); + append_pattern_def_seq (stmt_vinfo, def_stmt); + oprnd0 = def; + } + + /* Pattern detected. */ + vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt); + + *type_out = vectype; + + /* Pattern supported. Create a stmt to be used to replace the + pattern. */ + var = vect_recog_temp_ssa_var (type, NULL); + pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0, + oprnd1); + return pattern_stmt; + } + return NULL; + } if (is_a (vinfo) || dt != vect_internal_def) { optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar); if (optab2 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing) - return NULL; + goto use_rotate; } /* If vector/vector or vector/scalar shifts aren't supported by the target, @@ -2242,6 +2352,14 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out) *type_out = vectype; + if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0))) + { + def = vect_recog_temp_ssa_var (type, NULL); + def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0); + append_pattern_def_seq (stmt_vinfo, def_stmt); + oprnd0 = def; + } + if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME) ext_def = vect_get_external_def_edge (vinfo, oprnd1);