From 1bda738bab8193f0fb4551672d3be928d2015cd2 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 29 May 2018 13:58:24 +0200 Subject: [PATCH] re PR target/85918 (Conversions to/from [unsigned] long long are not vectorized for AVX512DQ target) PR target/85918 * tree.def (VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR, VEC_PACK_FLOAT_EXPR): New tree codes. * tree-pretty-print.c (op_code_prio): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR and VEC_UNPACK_FIX_TRUNC_LO_EXPR. (dump_generic_node): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR. * tree-inline.c (estimate_operator_cost): Likewise. * gimple-pretty-print.c (dump_binary_rhs): Handle VEC_PACK_FLOAT_EXPR. * fold-const.c (const_binop): Likewise. (const_unop): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR and VEC_UNPACK_FIX_TRUNC_LO_EXPR. * tree-cfg.c (verify_gimple_assign_unary): Likewise. (verify_gimple_assign_binary): Handle VEC_PACK_FLOAT_EXPR. * cfgexpand.c (expand_debug_expr): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR. * expr.c (expand_expr_real_2): Likewise. * optabs.def (vec_packs_float_optab, vec_packu_float_optab, vec_unpack_sfix_trunc_hi_optab, vec_unpack_sfix_trunc_lo_optab, vec_unpack_ufix_trunc_hi_optab, vec_unpack_ufix_trunc_lo_optab): New optabs. * optabs.c (expand_widen_pattern_expr): For VEC_UNPACK_FIX_TRUNC_HI_EXPR and VEC_UNPACK_FIX_TRUNC_LO_EXPR use sign from result type rather than operand's type. (expand_binop_directly): For vec_packu_float_optab and vec_packs_float_optab allow result type to be different from operand's type. * optabs-tree.c (optab_for_tree_code): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR. Formatting fixes. * tree-vect-generic.c (expand_vector_operations_1): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR. * tree-vect-stmts.c (supportable_widening_operation): Handle FIX_TRUNC_EXPR. (supportable_narrowing_operation): Handle FLOAT_EXPR. * config/i386/i386.md (fixprefix, floatprefix): New code attributes. * config/i386/sse.md (*floatv2div2sf2): Rename to ... (floatv2div2sf2): ... this. Formatting fix. (vpckfloat_concat_mode, vpckfloat_temp_mode, vpckfloat_op_mode): New mode attributes. (vec_pack_float_): New expander. (vunpckfixt_mode, vunpckfixt_model, vunpckfixt_extract_mode): New mode attributes. (vec_unpack_fix_trunc_lo_, vec_unpack_fix_trunc_hi_): New expanders. * doc/md.texi (vec_packs_float_@var{m}, vec_packu_float_@var{m}, vec_unpack_sfix_trunc_hi_@var{m}, vec_unpack_sfix_trunc_lo_@var{m}, vec_unpack_ufix_trunc_hi_@var{m}, vec_unpack_ufix_trunc_lo_@var{m}): Document. * doc/generic.texi (VEC_UNPACK_FLOAT_HI_EXPR, VEC_UNPACK_FLOAT_LO_EXPR): Fix pasto in description. (VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR, VEC_PACK_FLOAT_EXPR): Document. * gcc.target/i386/avx512dq-pr85918.c: Add -mprefer-vector-width=512 and -fno-vect-cost-model options. Add aligned(64) attribute to the arrays. Add suffix 1 to all functions and use 4 iterations rather than N. Add functions with conversions to and from float. Add new set of functions with 8 iterations and another one with 16 iterations, expect 24 vectorized loops instead of just 4. * gcc.target/i386/avx512dq-pr85918-2.c: New test. From-SVN: r260893 --- gcc/ChangeLog | 57 +++ gcc/cfgexpand.c | 3 + gcc/config/i386/i386.md | 2 + gcc/config/i386/sse.md | 81 +++- gcc/doc/generic.texi | 27 +- gcc/doc/md.texi | 22 + gcc/expr.c | 14 + gcc/fold-const.c | 15 +- gcc/gimple-pretty-print.c | 1 + gcc/optabs-tree.c | 61 ++- gcc/optabs.c | 15 +- gcc/optabs.def | 6 + gcc/testsuite/ChangeLog | 11 + .../gcc.target/i386/avx512dq-pr85918-2.c | 435 ++++++++++++++++++ .../gcc.target/i386/avx512dq-pr85918.c | 187 +++++++- gcc/tree-cfg.c | 20 + gcc/tree-inline.c | 3 + gcc/tree-pretty-print.c | 22 + gcc/tree-vect-generic.c | 5 +- gcc/tree-vect-stmts.c | 27 +- gcc/tree.def | 15 + 21 files changed, 970 insertions(+), 59 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-pr85918-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d2d02b2e0d1..6a3747c929a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,60 @@ +2018-05-29 Jakub Jelinek + + PR target/85918 + * tree.def (VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR, + VEC_PACK_FLOAT_EXPR): New tree codes. + * tree-pretty-print.c (op_code_prio): Handle + VEC_UNPACK_FIX_TRUNC_HI_EXPR and VEC_UNPACK_FIX_TRUNC_LO_EXPR. + (dump_generic_node): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR, + VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR. + * tree-inline.c (estimate_operator_cost): Likewise. + * gimple-pretty-print.c (dump_binary_rhs): Handle VEC_PACK_FLOAT_EXPR. + * fold-const.c (const_binop): Likewise. + (const_unop): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR and + VEC_UNPACK_FIX_TRUNC_LO_EXPR. + * tree-cfg.c (verify_gimple_assign_unary): Likewise. + (verify_gimple_assign_binary): Handle VEC_PACK_FLOAT_EXPR. + * cfgexpand.c (expand_debug_expr): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR, + VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR. + * expr.c (expand_expr_real_2): Likewise. + * optabs.def (vec_packs_float_optab, vec_packu_float_optab, + vec_unpack_sfix_trunc_hi_optab, vec_unpack_sfix_trunc_lo_optab, + vec_unpack_ufix_trunc_hi_optab, vec_unpack_ufix_trunc_lo_optab): New + optabs. + * optabs.c (expand_widen_pattern_expr): For + VEC_UNPACK_FIX_TRUNC_HI_EXPR and VEC_UNPACK_FIX_TRUNC_LO_EXPR use + sign from result type rather than operand's type. + (expand_binop_directly): For vec_packu_float_optab and + vec_packs_float_optab allow result type to be different from operand's + type. + * optabs-tree.c (optab_for_tree_code): Handle + VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and + VEC_PACK_FLOAT_EXPR. Formatting fixes. + * tree-vect-generic.c (expand_vector_operations_1): Handle + VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and + VEC_PACK_FLOAT_EXPR. + * tree-vect-stmts.c (supportable_widening_operation): Handle + FIX_TRUNC_EXPR. + (supportable_narrowing_operation): Handle FLOAT_EXPR. + * config/i386/i386.md (fixprefix, floatprefix): New code attributes. + * config/i386/sse.md (*floatv2div2sf2): Rename to ... + (floatv2div2sf2): ... this. Formatting fix. + (vpckfloat_concat_mode, vpckfloat_temp_mode, vpckfloat_op_mode): New + mode attributes. + (vec_pack_float_): New expander. + (vunpckfixt_mode, vunpckfixt_model, vunpckfixt_extract_mode): New mode + attributes. + (vec_unpack_fix_trunc_lo_, + vec_unpack_fix_trunc_hi_): New expanders. + * doc/md.texi (vec_packs_float_@var{m}, vec_packu_float_@var{m}, + vec_unpack_sfix_trunc_hi_@var{m}, vec_unpack_sfix_trunc_lo_@var{m}, + vec_unpack_ufix_trunc_hi_@var{m}, vec_unpack_ufix_trunc_lo_@var{m}): + Document. + * doc/generic.texi (VEC_UNPACK_FLOAT_HI_EXPR, + VEC_UNPACK_FLOAT_LO_EXPR): Fix pasto in description. + (VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR, + VEC_PACK_FLOAT_EXPR): Document. + 2018-05-29 Richard Biener * tree-vectorizer.h (struct vec_info): Add stmt_vec_infos diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c index 5c323be9b33..c61104d1683 100644 --- a/gcc/cfgexpand.c +++ b/gcc/cfgexpand.c @@ -5101,8 +5101,11 @@ expand_debug_expr (tree exp) case REALIGN_LOAD_EXPR: case VEC_COND_EXPR: case VEC_PACK_FIX_TRUNC_EXPR: + case VEC_PACK_FLOAT_EXPR: case VEC_PACK_SAT_EXPR: case VEC_PACK_TRUNC_EXPR: + case VEC_UNPACK_FIX_TRUNC_HI_EXPR: + case VEC_UNPACK_FIX_TRUNC_LO_EXPR: case VEC_UNPACK_FLOAT_HI_EXPR: case VEC_UNPACK_FLOAT_LO_EXPR: case VEC_UNPACK_HI_EXPR: diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 12995be295d..209bf3f97f4 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -982,11 +982,13 @@ (define_code_iterator any_fix [fix unsigned_fix]) (define_code_attr fixsuffix [(fix "") (unsigned_fix "u")]) (define_code_attr fixunssuffix [(fix "") (unsigned_fix "uns")]) +(define_code_attr fixprefix [(fix "s") (unsigned_fix "u")]) ;; Used in signed and unsigned float. (define_code_iterator any_float [float unsigned_float]) (define_code_attr floatsuffix [(float "") (unsigned_float "u")]) (define_code_attr floatunssuffix [(float "") (unsigned_float "uns")]) +(define_code_attr floatprefix [(float "s") (unsigned_float "u")]) ;; All integer modes. (define_mode_iterator SWI1248x [QI HI SI DI]) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ed37b98933e..dd65e57ad6a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -4887,9 +4887,9 @@ (set_attr "prefix" "evex") (set_attr "mode" "")]) -(define_insn "*floatv2div2sf2" +(define_insn "floatv2div2sf2" [(set (match_operand:V4SF 0 "register_operand" "=v") - (vec_concat:V4SF + (vec_concat:V4SF (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm")) (const_vector:V2SF [(const_int 0) (const_int 0)])))] "TARGET_AVX512DQ && TARGET_AVX512VL" @@ -4898,6 +4898,33 @@ (set_attr "prefix" "evex") (set_attr "mode" "V4SF")]) +(define_mode_attr vpckfloat_concat_mode + [(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")]) +(define_mode_attr vpckfloat_temp_mode + [(V8DI "V8SF") (V4DI "V4SF") (V2DI "V4SF")]) +(define_mode_attr vpckfloat_op_mode + [(V8DI "v8sf") (V4DI "v4sf") (V2DI "v2sf")]) + +(define_expand "vec_pack_float_" + [(match_operand: 0 "register_operand") + (any_float: + (match_operand:VI8_AVX512VL 1 "register_operand")) + (match_operand:VI8_AVX512VL 2 "register_operand")] + "TARGET_AVX512DQ" +{ + rtx r1 = gen_reg_rtx (mode); + rtx r2 = gen_reg_rtx (mode); + rtx (*gen) (rtx, rtx) = gen_float2; + emit_insn (gen (r1, operands[1])); + emit_insn (gen (r2, operands[2])); + if (mode == V2DImode) + emit_insn (gen_sse_movlhps (operands[0], r1, r2)); + else + emit_insn (gen_avx_vec_concat (operands[0], + r1, r2)); + DONE; +}) + (define_insn "floatv2div2sf2_mask" [(set (match_operand:V4SF 0 "register_operand" "=v") (vec_concat:V4SF @@ -5177,6 +5204,56 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) +(define_mode_attr vunpckfixt_mode + [(V16SF "V8DI") (V8SF "V4DI") (V4SF "V2DI")]) +(define_mode_attr vunpckfixt_model + [(V16SF "v8di") (V8SF "v4di") (V4SF "v2di")]) +(define_mode_attr vunpckfixt_extract_mode + [(V16SF "v16sf") (V8SF "v8sf") (V4SF "v8sf")]) + +(define_expand "vec_unpack_fix_trunc_lo_" + [(match_operand: 0 "register_operand") + (any_fix: + (match_operand:VF1_AVX512VL 1 "register_operand"))] + "TARGET_AVX512DQ" +{ + rtx tem = operands[1]; + if (mode != V4SFmode) + { + tem = gen_reg_rtx (mode); + emit_insn (gen_vec_extract_lo_ (tem, + operands[1])); + } + rtx (*gen) (rtx, rtx) + = gen_fix_trunc2; + emit_insn (gen (operands[0], tem)); + DONE; +}) + +(define_expand "vec_unpack_fix_trunc_hi_" + [(match_operand: 0 "register_operand") + (any_fix: + (match_operand:VF1_AVX512VL 1 "register_operand"))] + "TARGET_AVX512DQ" +{ + rtx tem; + if (mode != V4SFmode) + { + tem = gen_reg_rtx (mode); + emit_insn (gen_vec_extract_hi_ (tem, + operands[1])); + } + else + { + tem = gen_reg_rtx (V4SFmode); + emit_insn (gen_avx_vpermilv4sf (tem, operands[1], GEN_INT (0x4e))); + } + rtx (*gen) (rtx, rtx) + = gen_fix_trunc2; + emit_insn (gen (operands[0], tem)); + DONE; +}) + (define_insn "ufix_trunc2" [(set (match_operand: 0 "register_operand" "=v") (unsigned_fix: diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi index 653a17c8e4d..a4d0af40886 100644 --- a/gcc/doc/generic.texi +++ b/gcc/doc/generic.texi @@ -1789,9 +1789,12 @@ a value from @code{enum annot_expr_kind}, the third is an @code{INTEGER_CST}. @tindex VEC_UNPACK_LO_EXPR @tindex VEC_UNPACK_FLOAT_HI_EXPR @tindex VEC_UNPACK_FLOAT_LO_EXPR +@tindex VEC_UNPACK_FIX_TRUNC_HI_EXPR +@tindex VEC_UNPACK_FIX_TRUNC_LO_EXPR @tindex VEC_PACK_TRUNC_EXPR @tindex VEC_PACK_SAT_EXPR @tindex VEC_PACK_FIX_TRUNC_EXPR +@tindex VEC_PACK_FLOAT_EXPR @tindex VEC_COND_EXPR @tindex SAD_EXPR @@ -1846,10 +1849,22 @@ where the values are converted from fixed point to floating point. The single operand is a vector that contains @code{N} elements of the same integral type. The result is a vector that contains half as many elements of a floating point type whose size is twice as wide. In the case of -@code{VEC_UNPACK_HI_EXPR} the high @code{N/2} elements of the vector are -extracted, converted and widened. In the case of @code{VEC_UNPACK_LO_EXPR} +@code{VEC_UNPACK_FLOAT_HI_EXPR} the high @code{N/2} elements of the vector are +extracted, converted and widened. In the case of @code{VEC_UNPACK_FLOAT_LO_EXPR} the low @code{N/2} elements of the vector are extracted, converted and widened. +@item VEC_UNPACK_FIX_TRUNC_HI_EXPR +@itemx VEC_UNPACK_FIX_TRUNC_LO_EXPR +These nodes represent unpacking of the high and low parts of the input vector, +where the values are truncated from floating point to fixed point. The +single operand is a vector that contains @code{N} elements of the same +floating point type. The result is a vector that contains half as many +elements of an integral type whose size is twice as wide. In the case of +@code{VEC_UNPACK_FIX_TRUNC_HI_EXPR} the high @code{N/2} elements of the +vector are extracted and converted with truncation. In the case of +@code{VEC_UNPACK_FIX_TRUNC_LO_EXPR} the low @code{N/2} elements of the +vector are extracted and converted with truncation. + @item VEC_PACK_TRUNC_EXPR This node represents packing of truncated elements of the two input vectors into the output vector. Input operands are vectors that contain the same @@ -1875,6 +1890,14 @@ twice as many elements of an integral type whose size is half as wide. The elements of the two vectors are merged (concatenated) to form the output vector. +@item VEC_PACK_FLOAT_EXPR +This node represents packing of elements of the two input vectors into the +output vector, where the values are converted from fixed point to floating +point. Input operands are vectors that contain the same number of elements +of an integral type. The result is a vector that contains twice as many +elements of floating point type whose size is half as wide. The elements of +the two vectors are merged (concatenated) to form the output vector. + @item VEC_COND_EXPR These nodes represent @code{?:} expressions. The three operands must be vectors of the same size and number of elements. The second and third diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 02fbfb392a9..be37619f340 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5371,6 +5371,14 @@ of two vectors. Operands 1 and 2 are vectors of the same mode having N floating point elements of size S@. Operand 0 is the resulting vector in which 2*N elements of size N/2 are concatenated. +@cindex @code{vec_packs_float_@var{m}} instruction pattern +@cindex @code{vec_packu_float_@var{m}} instruction pattern +@item @samp{vec_packs_float_@var{m}}, @samp{vec_packu_float_@var{m}} +Narrow, convert to floating point type and merge the elements +of two vectors. Operands 1 and 2 are vectors of the same mode having N +signed/unsigned integral elements of size S@. Operand 0 is the resulting vector +in which 2*N elements of size N/2 are concatenated. + @cindex @code{vec_unpacks_hi_@var{m}} instruction pattern @cindex @code{vec_unpacks_lo_@var{m}} instruction pattern @item @samp{vec_unpacks_hi_@var{m}}, @samp{vec_unpacks_lo_@var{m}} @@ -5400,6 +5408,20 @@ has N elements of size S@. Convert the high/low elements of the vector using floating point conversion and place the resulting N/2 values of size 2*S in the output vector (operand 0). +@cindex @code{vec_unpack_sfix_trunc_hi_@var{m}} instruction pattern +@cindex @code{vec_unpack_sfix_trunc_lo_@var{m}} instruction pattern +@cindex @code{vec_unpack_ufix_trunc_hi_@var{m}} instruction pattern +@cindex @code{vec_unpack_ufix_trunc_lo_@var{m}} instruction pattern +@item @samp{vec_unpack_sfix_trunc_hi_@var{m}}, +@itemx @samp{vec_unpack_sfix_trunc_lo_@var{m}} +@itemx @samp{vec_unpack_ufix_trunc_hi_@var{m}} +@itemx @samp{vec_unpack_ufix_trunc_lo_@var{m}} +Extract, convert to signed/unsigned integer type and widen the high/low part of a +vector of floating point elements. The input vector (operand 1) +has N elements of size S@. Convert the high/low elements of the vector +to integers and place the resulting N/2 values of size 2*S in +the output vector (operand 0). + @cindex @code{vec_widen_umult_hi_@var{m}} instruction pattern @cindex @code{vec_widen_umult_lo_@var{m}} instruction pattern @cindex @code{vec_widen_smult_hi_@var{m}} instruction pattern diff --git a/gcc/expr.c b/gcc/expr.c index ecc52923a28..51fbc326000 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -9458,6 +9458,8 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, case VEC_UNPACK_HI_EXPR: case VEC_UNPACK_LO_EXPR: + case VEC_UNPACK_FIX_TRUNC_HI_EXPR: + case VEC_UNPACK_FIX_TRUNC_LO_EXPR: { op0 = expand_normal (treeop0); temp = expand_widen_pattern_expr (ops, op0, NULL_RTX, NULL_RTX, @@ -9497,6 +9499,18 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, mode = TYPE_MODE (TREE_TYPE (treeop0)); goto binop; + case VEC_PACK_FLOAT_EXPR: + mode = TYPE_MODE (TREE_TYPE (treeop0)); + expand_operands (treeop0, treeop1, + subtarget, &op0, &op1, EXPAND_NORMAL); + this_optab = optab_for_tree_code (code, TREE_TYPE (treeop0), + optab_default); + target = expand_binop (mode, this_optab, op0, op1, target, + TYPE_UNSIGNED (TREE_TYPE (treeop0)), + OPTAB_LIB_WIDEN); + gcc_assert (target); + return target; + case VEC_PERM_EXPR: { expand_operands (treeop0, treeop1, target, &op0, &op1, EXPAND_NORMAL); diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 0f57f078199..3258aad44be 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -1622,6 +1622,7 @@ const_binop (enum tree_code code, tree type, tree arg1, tree arg2) case VEC_PACK_TRUNC_EXPR: case VEC_PACK_FIX_TRUNC_EXPR: + case VEC_PACK_FLOAT_EXPR: { unsigned int HOST_WIDE_INT out_nelts, in_nelts, i; @@ -1643,7 +1644,9 @@ const_binop (enum tree_code code, tree type, tree arg1, tree arg2) ? VECTOR_CST_ELT (arg1, i) : VECTOR_CST_ELT (arg2, i - in_nelts)); elt = fold_convert_const (code == VEC_PACK_TRUNC_EXPR - ? NOP_EXPR : FIX_TRUNC_EXPR, + ? NOP_EXPR + : code == VEC_PACK_FLOAT_EXPR + ? FLOAT_EXPR : FIX_TRUNC_EXPR, TREE_TYPE (type), elt); if (elt == NULL_TREE || !CONSTANT_CLASS_P (elt)) return NULL_TREE; @@ -1817,6 +1820,8 @@ const_unop (enum tree_code code, tree type, tree arg0) case VEC_UNPACK_HI_EXPR: case VEC_UNPACK_FLOAT_LO_EXPR: case VEC_UNPACK_FLOAT_HI_EXPR: + case VEC_UNPACK_FIX_TRUNC_LO_EXPR: + case VEC_UNPACK_FIX_TRUNC_HI_EXPR: { unsigned HOST_WIDE_INT out_nelts, in_nelts, i; enum tree_code subcode; @@ -1831,13 +1836,17 @@ const_unop (enum tree_code code, tree type, tree arg0) unsigned int offset = 0; if ((!BYTES_BIG_ENDIAN) ^ (code == VEC_UNPACK_LO_EXPR - || code == VEC_UNPACK_FLOAT_LO_EXPR)) + || code == VEC_UNPACK_FLOAT_LO_EXPR + || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR)) offset = out_nelts; if (code == VEC_UNPACK_LO_EXPR || code == VEC_UNPACK_HI_EXPR) subcode = NOP_EXPR; - else + else if (code == VEC_UNPACK_FLOAT_LO_EXPR + || code == VEC_UNPACK_FLOAT_HI_EXPR) subcode = FLOAT_EXPR; + else + subcode = FIX_TRUNC_EXPR; tree_vector_builder elts (type, out_nelts, 1); for (i = 0; i < out_nelts; i++) diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c index 49e9e1276da..c0d6e153857 100644 --- a/gcc/gimple-pretty-print.c +++ b/gcc/gimple-pretty-print.c @@ -429,6 +429,7 @@ dump_binary_rhs (pretty_printer *buffer, gassign *gs, int spc, case VEC_PACK_TRUNC_EXPR: case VEC_PACK_SAT_EXPR: case VEC_PACK_FIX_TRUNC_EXPR: + case VEC_PACK_FLOAT_EXPR: case VEC_WIDEN_LSHIFT_HI_EXPR: case VEC_WIDEN_LSHIFT_LO_EXPR: case VEC_SERIES_EXPR: diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c index 73e66544ef9..11cea175caa 100644 --- a/gcc/optabs-tree.c +++ b/gcc/optabs-tree.c @@ -144,46 +144,58 @@ optab_for_tree_code (enum tree_code code, const_tree type, ? ssmsub_widen_optab : smsub_widen_optab)); case VEC_WIDEN_MULT_HI_EXPR: - return TYPE_UNSIGNED (type) ? - vec_widen_umult_hi_optab : vec_widen_smult_hi_optab; + return (TYPE_UNSIGNED (type) + ? vec_widen_umult_hi_optab : vec_widen_smult_hi_optab); case VEC_WIDEN_MULT_LO_EXPR: - return TYPE_UNSIGNED (type) ? - vec_widen_umult_lo_optab : vec_widen_smult_lo_optab; + return (TYPE_UNSIGNED (type) + ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab); case VEC_WIDEN_MULT_EVEN_EXPR: - return TYPE_UNSIGNED (type) ? - vec_widen_umult_even_optab : vec_widen_smult_even_optab; + return (TYPE_UNSIGNED (type) + ? vec_widen_umult_even_optab : vec_widen_smult_even_optab); case VEC_WIDEN_MULT_ODD_EXPR: - return TYPE_UNSIGNED (type) ? - vec_widen_umult_odd_optab : vec_widen_smult_odd_optab; + return (TYPE_UNSIGNED (type) + ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab); case VEC_WIDEN_LSHIFT_HI_EXPR: - return TYPE_UNSIGNED (type) ? - vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab; + return (TYPE_UNSIGNED (type) + ? vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab); case VEC_WIDEN_LSHIFT_LO_EXPR: - return TYPE_UNSIGNED (type) ? - vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab; + return (TYPE_UNSIGNED (type) + ? vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab); case VEC_UNPACK_HI_EXPR: - return TYPE_UNSIGNED (type) ? - vec_unpacku_hi_optab : vec_unpacks_hi_optab; + return (TYPE_UNSIGNED (type) + ? vec_unpacku_hi_optab : vec_unpacks_hi_optab); case VEC_UNPACK_LO_EXPR: - return TYPE_UNSIGNED (type) ? - vec_unpacku_lo_optab : vec_unpacks_lo_optab; + return (TYPE_UNSIGNED (type) + ? vec_unpacku_lo_optab : vec_unpacks_lo_optab); case VEC_UNPACK_FLOAT_HI_EXPR: /* The signedness is determined from input operand. */ - return TYPE_UNSIGNED (type) ? - vec_unpacku_float_hi_optab : vec_unpacks_float_hi_optab; + return (TYPE_UNSIGNED (type) + ? vec_unpacku_float_hi_optab : vec_unpacks_float_hi_optab); case VEC_UNPACK_FLOAT_LO_EXPR: /* The signedness is determined from input operand. */ - return TYPE_UNSIGNED (type) ? - vec_unpacku_float_lo_optab : vec_unpacks_float_lo_optab; + return (TYPE_UNSIGNED (type) + ? vec_unpacku_float_lo_optab : vec_unpacks_float_lo_optab); + + case VEC_UNPACK_FIX_TRUNC_HI_EXPR: + /* The signedness is determined from output operand. */ + return (TYPE_UNSIGNED (type) + ? vec_unpack_ufix_trunc_hi_optab + : vec_unpack_sfix_trunc_hi_optab); + + case VEC_UNPACK_FIX_TRUNC_LO_EXPR: + /* The signedness is determined from output operand. */ + return (TYPE_UNSIGNED (type) + ? vec_unpack_ufix_trunc_lo_optab + : vec_unpack_sfix_trunc_lo_optab); case VEC_PACK_TRUNC_EXPR: return vec_pack_trunc_optab; @@ -193,8 +205,13 @@ optab_for_tree_code (enum tree_code code, const_tree type, case VEC_PACK_FIX_TRUNC_EXPR: /* The signedness is determined from output operand. */ - return TYPE_UNSIGNED (type) ? - vec_pack_ufix_trunc_optab : vec_pack_sfix_trunc_optab; + return (TYPE_UNSIGNED (type) + ? vec_pack_ufix_trunc_optab : vec_pack_sfix_trunc_optab); + + case VEC_PACK_FLOAT_EXPR: + /* The signedness is determined from input operand. */ + return (TYPE_UNSIGNED (type) + ? vec_packu_float_optab : vec_packs_float_optab); case VEC_DUPLICATE_EXPR: return vec_duplicate_optab; diff --git a/gcc/optabs.c b/gcc/optabs.c index 278046aa061..cadf4676c98 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -259,8 +259,15 @@ expand_widen_pattern_expr (sepops ops, rtx op0, rtx op1, rtx wide_op, oprnd0 = ops->op0; tmode0 = TYPE_MODE (TREE_TYPE (oprnd0)); - widen_pattern_optab = - optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default); + if (ops->code == VEC_UNPACK_FIX_TRUNC_HI_EXPR + || ops->code == VEC_UNPACK_FIX_TRUNC_LO_EXPR) + /* The sign is from the result type rather than operand's type + for these ops. */ + widen_pattern_optab + = optab_for_tree_code (ops->code, ops->type, optab_default); + else + widen_pattern_optab + = optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default); if (ops->code == WIDEN_MULT_PLUS_EXPR || ops->code == WIDEN_MULT_MINUS_EXPR) icode = find_widening_optab_handler (widen_pattern_optab, @@ -1068,7 +1075,9 @@ expand_binop_directly (enum insn_code icode, machine_mode mode, optab binoptab, || binoptab == vec_pack_usat_optab || binoptab == vec_pack_ssat_optab || binoptab == vec_pack_ufix_trunc_optab - || binoptab == vec_pack_sfix_trunc_optab) + || binoptab == vec_pack_sfix_trunc_optab + || binoptab == vec_packu_float_optab + || binoptab == vec_packs_float_optab) { /* The mode of the result is different then the mode of the arguments. */ diff --git a/gcc/optabs.def b/gcc/optabs.def index a1ecb757bb2..11af7aaeb15 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -327,10 +327,16 @@ OPTAB_D (vec_pack_ssat_optab, "vec_pack_ssat_$a") OPTAB_D (vec_pack_trunc_optab, "vec_pack_trunc_$a") OPTAB_D (vec_pack_ufix_trunc_optab, "vec_pack_ufix_trunc_$a") OPTAB_D (vec_pack_usat_optab, "vec_pack_usat_$a") +OPTAB_D (vec_packs_float_optab, "vec_packs_float_$a") +OPTAB_D (vec_packu_float_optab, "vec_packu_float_$a") OPTAB_D (vec_perm_optab, "vec_perm$a") OPTAB_D (vec_realign_load_optab, "vec_realign_load_$a") OPTAB_D (vec_set_optab, "vec_set$a") OPTAB_D (vec_shr_optab, "vec_shr_$a") +OPTAB_D (vec_unpack_sfix_trunc_hi_optab, "vec_unpack_sfix_trunc_hi_$a") +OPTAB_D (vec_unpack_sfix_trunc_lo_optab, "vec_unpack_sfix_trunc_lo_$a") +OPTAB_D (vec_unpack_ufix_trunc_hi_optab, "vec_unpack_ufix_trunc_hi_$a") +OPTAB_D (vec_unpack_ufix_trunc_lo_optab, "vec_unpack_ufix_trunc_lo_$a") OPTAB_D (vec_unpacks_float_hi_optab, "vec_unpacks_float_hi_$a") OPTAB_D (vec_unpacks_float_lo_optab, "vec_unpacks_float_lo_$a") OPTAB_D (vec_unpacks_hi_optab, "vec_unpacks_hi_$a") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 030573496eb..28a28448741 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,14 @@ +2018-05-29 Jakub Jelinek + + PR target/85918 + * gcc.target/i386/avx512dq-pr85918.c: Add -mprefer-vector-width=512 + and -fno-vect-cost-model options. Add aligned(64) attribute to the + arrays. Add suffix 1 to all functions and use 4 iterations rather + than N. Add functions with conversions to and from float. + Add new set of functions with 8 iterations and another one + with 16 iterations, expect 24 vectorized loops instead of just 4. + * gcc.target/i386/avx512dq-pr85918-2.c: New test. + 2018-05-29 Javier Miranda * gnat.dg/equal2.adb: New testcase. diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-pr85918-2.c b/gcc/testsuite/gcc.target/i386/avx512dq-pr85918-2.c new file mode 100644 index 00000000000..fdf7b1584d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512dq-pr85918-2.c @@ -0,0 +1,435 @@ +/* PR target/85918 */ +/* { dg-do run } */ +/* { dg-require-effective-target avx512dq } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-options "-O3 -mavx512dq -mavx512vl -mprefer-vector-width=512 -fno-vect-cost-model" } */ + +#define AVX512DQ +#define AVX512VL +#define DO_TEST avx512dqvl_test + +static void avx512dqvl_test (void); + +#include "avx512-check.h" + +#define N 16 + +long long ll[N] __attribute__((aligned (64))); +unsigned long long ull[N] __attribute__((aligned (64))); +float f[N] __attribute__((aligned (64))); +double d[N] __attribute__((aligned (64))); + +__attribute__((noipa)) void +ll2d1 (void) +{ + int i; + + for (i = 0; i < 4; i++) + d[i] = ll[i]; +} + +__attribute__((noipa)) void +ull2d1 (void) +{ + int i; + + for (i = 0; i < 4; i++) + d[i] = ull[i]; +} + +__attribute__((noipa)) void +d2ll1 (void) +{ + int i; + + for (i = 0; i < 4; i++) + ll[i] = d[i]; +} + +__attribute__((noipa)) void +d2ull1 (void) +{ + int i; + + for (i = 0; i < 4; i++) + ull[i] = d[i]; +} + +__attribute__((noipa)) void +ll2f1 (void) +{ + int i; + + for (i = 0; i < 4; i++) + f[i] = ll[i]; +} + +__attribute__((noipa)) void +ull2f1 (void) +{ + int i; + + for (i = 0; i < 4; i++) + f[i] = ull[i]; +} + +__attribute__((noipa)) void +f2ll1 (void) +{ + int i; + + for (i = 0; i < 4; i++) + ll[i] = f[i]; +} + +__attribute__((noipa)) void +f2ull1 (void) +{ + int i; + + for (i = 0; i < 4; i++) + ull[i] = f[i]; +} + +__attribute__((noipa)) void +ll2d2 (void) +{ + int i; + + for (i = 0; i < 8; i++) + d[i] = ll[i]; +} + +__attribute__((noipa)) void +ull2d2 (void) +{ + int i; + + for (i = 0; i < 8; i++) + d[i] = ull[i]; +} + +__attribute__((noipa)) void +d2ll2 (void) +{ + int i; + + for (i = 0; i < 8; i++) + ll[i] = d[i]; +} + +__attribute__((noipa)) void +d2ull2 (void) +{ + int i; + + for (i = 0; i < 8; i++) + ull[i] = d[i]; +} + +__attribute__((noipa)) void +ll2f2 (void) +{ + int i; + + for (i = 0; i < 8; i++) + f[i] = ll[i]; +} + +__attribute__((noipa)) void +ull2f2 (void) +{ + int i; + + for (i = 0; i < 8; i++) + f[i] = ull[i]; +} + +__attribute__((noipa)) void +f2ll2 (void) +{ + int i; + + for (i = 0; i < 8; i++) + ll[i] = f[i]; +} + +__attribute__((noipa)) void +f2ull2 (void) +{ + int i; + + for (i = 0; i < 8; i++) + ull[i] = f[i]; +} + +__attribute__((noipa)) void +ll2d3 (void) +{ + int i; + + for (i = 0; i < 16; i++) + d[i] = ll[i]; +} + +__attribute__((noipa)) void +ull2d3 (void) +{ + int i; + + for (i = 0; i < 16; i++) + d[i] = ull[i]; +} + +__attribute__((noipa)) void +d2ll3 (void) +{ + int i; + + for (i = 0; i < 16; i++) + ll[i] = d[i]; +} + +__attribute__((noipa)) void +d2ull3 (void) +{ + int i; + + for (i = 0; i < 16; i++) + ull[i] = d[i]; +} + +__attribute__((noipa)) void +ll2f3 (void) +{ + int i; + + for (i = 0; i < 16; i++) + f[i] = ll[i]; +} + +__attribute__((noipa)) void +ull2f3 (void) +{ + int i; + + for (i = 0; i < 16; i++) + f[i] = ull[i]; +} + +__attribute__((noipa)) void +f2ll3 (void) +{ + int i; + + for (i = 0; i < 16; i++) + ll[i] = f[i]; +} + +__attribute__((noipa)) void +f2ull3 (void) +{ + int i; + + for (i = 0; i < 16; i++) + ull[i] = f[i]; +} + +unsigned long long ullt[] = { + 13835058055282163712ULL, 9223653511831486464ULL, 9218868437227405312ULL, + 1ULL, 9305281255077576704ULL, 1191936ULL, 18446462598732840960ULL, 0ULL, + 9223372036854775808ULL, 4611686018427387904ULL, 2305843009213693952ULL, + 9ULL, 9223653511831486464ULL, 0ULL, 65536ULL, 131071ULL +}; +float uft[] = { + 13835058055282163712.0f, 9223653511831486464.0f, 9218868437227405312.0f, + 1.0f, 9305281255077576704.0f, 1191936.0f, 18446462598732840960.0f, 0.0f, + 9223372036854775808.0f, 4611686018427387904.0f, 2305843009213693952.0f, + 9.0f, 9223653511831486464.0f, 0.0f, 65536.0f, 131071.0f +}; +long long llt[] = { + 9223090561878065152LL, -9223372036854775807LL - 1, -9223090561878065152LL, + -4LL, -8074672656898588672LL, 8074672656898588672LL, 29LL, -15LL, + 7574773098260463616LL, -7579276697887834112LL, -8615667562136469504LL, + 148LL, -255LL, 9151595917793558528LL, -9218868437227405312LL, 9LL +}; +float ft[] = { + 9223090561878065152.0f, -9223372036854775808.0f, -9223090561878065152.0f, + -4.0f, -8074672656898588672.0f, 8074672656898588672.0f, 29.0f, -15.0f, + 7574773098260463616.0f, -7579276697887834112.0f, -8615667562136469504.0f, + 148.0f, -255.0f, 9151595917793558528.0f, -9218868437227405312.0f, 9.0f +}; + +static void +avx512dqvl_test (void) +{ + int i; + for (i = 0; i < 4; i++) + { + ll[i] = llt[i]; + ull[i] = ullt[i]; + } + ll2d1 (); + for (i = 0; i < 4; i++) + if (d[i] != ft[i]) + abort (); + ull2d1 (); + for (i = 0; i < 4; i++) + if (d[i] != uft[i]) + abort (); + else + d[i] = ft[i + 4]; + d2ll1 (); + for (i = 0; i < 4; i++) + if (ll[i] != llt[i + 4]) + abort (); + else + d[i] = uft[i + 4]; + d2ull1 (); + for (i = 0; i < 4; i++) + if (ull[i] != ullt[i + 4]) + abort (); + else + { + ll[i] = llt[i + 8]; + ull[i] = ullt[i + 8]; + } + ll2f1 (); + for (i = 0; i < 4; i++) + if (f[i] != ft[i + 8]) + abort (); + ull2f1 (); + for (i = 0; i < 4; i++) + if (f[i] != uft[i + 8]) + abort (); + else + f[i] = ft[i + 12]; + f2ll1 (); + for (i = 0; i < 4; i++) + if (ll[i] != llt[i + 12]) + abort (); + else + f[i] = uft[i + 12]; + f2ull1 (); + for (i = 0; i < 4; i++) + if (ull[i] != ullt[i + 12]) + abort (); + for (i = 0; i < 8; i++) + { + ll[i] = llt[i]; + ull[i] = ullt[i]; + } + ll2d2 (); + for (i = 0; i < 8; i++) + if (d[i] != ft[i]) + abort (); + ull2d2 (); + for (i = 0; i < 8; i++) + if (d[i] != uft[i]) + abort (); + else + { + d[i] = ft[i]; + ll[i] = 1234567LL; + ull[i] = 7654321ULL; + } + d2ll2 (); + for (i = 0; i < 8; i++) + if (ll[i] != llt[i]) + abort (); + else + d[i] = uft[i]; + d2ull2 (); + for (i = 0; i < 8; i++) + if (ull[i] != ullt[i]) + abort (); + else + { + ll[i] = llt[i + 8]; + ull[i] = ullt[i + 8]; + } + ll2f2 (); + for (i = 0; i < 8; i++) + if (f[i] != ft[i + 8]) + abort (); + ull2f2 (); + for (i = 0; i < 8; i++) + if (f[i] != uft[i + 8]) + abort (); + else + { + f[i] = ft[i + 8]; + ll[i] = 1234567LL; + ull[i] = 7654321ULL; + } + f2ll2 (); + for (i = 0; i < 8; i++) + if (ll[i] != llt[i + 8]) + abort (); + else + f[i] = uft[i + 8]; + f2ull2 (); + for (i = 0; i < 8; i++) + if (ull[i] != ullt[i + 8]) + abort (); + for (i = 0; i < 16; i++) + { + ll[i] = llt[i]; + ull[i] = ullt[i]; + } + ll2d3 (); + for (i = 0; i < 16; i++) + if (d[i] != ft[i]) + abort (); + ull2d3 (); + for (i = 0; i < 16; i++) + if (d[i] != uft[i]) + abort (); + else + { + d[i] = ft[i]; + ll[i] = 1234567LL; + ull[i] = 7654321ULL; + } + d2ll3 (); + for (i = 0; i < 16; i++) + if (ll[i] != llt[i]) + abort (); + else + d[i] = uft[i]; + d2ull3 (); + for (i = 0; i < 16; i++) + if (ull[i] != ullt[i]) + abort (); + else + { + ll[i] = llt[i]; + ull[i] = ullt[i]; + f[i] = 3.0f; + d[i] = 4.0; + } + ll2f3 (); + for (i = 0; i < 16; i++) + if (f[i] != ft[i]) + abort (); + ull2f3 (); + for (i = 0; i < 16; i++) + if (f[i] != uft[i]) + abort (); + else + { + f[i] = ft[i]; + ll[i] = 1234567LL; + ull[i] = 7654321ULL; + } + f2ll3 (); + for (i = 0; i < 16; i++) + if (ll[i] != llt[i]) + abort (); + else + f[i] = uft[i]; + f2ull3 (); + for (i = 0; i < 16; i++) + if (ull[i] != ullt[i]) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-pr85918.c b/gcc/testsuite/gcc.target/i386/avx512dq-pr85918.c index 3c69f93d5b7..79593f285b7 100644 --- a/gcc/testsuite/gcc.target/i386/avx512dq-pr85918.c +++ b/gcc/testsuite/gcc.target/i386/avx512dq-pr85918.c @@ -1,42 +1,203 @@ /* PR target/85918 */ /* { dg-do compile } */ -/* { dg-options "-O3 -mavx512dq -mavx512vl -fdump-tree-vect-details" } */ -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */ +/* { dg-options "-O3 -mavx512dq -mavx512vl -mprefer-vector-width=512 -fno-vect-cost-model -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 24 "vect" } } */ #define N 1024 -long long ll[N]; -unsigned long long ull[N]; -double d[N]; +long long ll[N] __attribute__((aligned (64))); +unsigned long long ull[N] __attribute__((aligned (64))); +float f[N] __attribute__((aligned (64))); +double d[N] __attribute__((aligned (64))); -void ll2d (void) +void ll2d1 (void) { int i; - for (i = 0; i < N; i++) + for (i = 0; i < 4; i++) d[i] = ll[i]; } -void ull2d (void) +void ull2d1 (void) { int i; - for (i = 0; i < N; i++) + for (i = 0; i < 4; i++) d[i] = ull[i]; } -void d2ll (void) +void d2ll1 (void) { int i; - for (i = 0; i < N; i++) + for (i = 0; i < 4; i++) ll[i] = d[i]; } -void d2ull (void) +void d2ull1 (void) { int i; - for (i = 0; i < N; i++) + for (i = 0; i < 4; i++) ull[i] = d[i]; } + +void ll2f1 (void) +{ + int i; + + for (i = 0; i < 4; i++) + f[i] = ll[i]; +} + +void ull2f1 (void) +{ + int i; + + for (i = 0; i < 4; i++) + f[i] = ull[i]; +} + +void f2ll1 (void) +{ + int i; + + for (i = 0; i < 4; i++) + ll[i] = f[i]; +} + +void f2ull1 (void) +{ + int i; + + for (i = 0; i < 4; i++) + ull[i] = f[i]; +} + +void ll2d2 (void) +{ + int i; + + for (i = 0; i < 8; i++) + d[i] = ll[i]; +} + +void ull2d2 (void) +{ + int i; + + for (i = 0; i < 8; i++) + d[i] = ull[i]; +} + +void d2ll2 (void) +{ + int i; + + for (i = 0; i < 8; i++) + ll[i] = d[i]; +} + +void d2ull2 (void) +{ + int i; + + for (i = 0; i < 8; i++) + ull[i] = d[i]; +} + +void ll2f2 (void) +{ + int i; + + for (i = 0; i < 8; i++) + f[i] = ll[i]; +} + +void ull2f2 (void) +{ + int i; + + for (i = 0; i < 8; i++) + f[i] = ull[i]; +} + +void f2ll2 (void) +{ + int i; + + for (i = 0; i < 8; i++) + ll[i] = f[i]; +} + +void f2ull2 (void) +{ + int i; + + for (i = 0; i < 8; i++) + ull[i] = f[i]; +} + +void ll2d3 (void) +{ + int i; + + for (i = 0; i < 16; i++) + d[i] = ll[i]; +} + +void ull2d3 (void) +{ + int i; + + for (i = 0; i < 16; i++) + d[i] = ull[i]; +} + +void d2ll3 (void) +{ + int i; + + for (i = 0; i < 16; i++) + ll[i] = d[i]; +} + +void d2ull3 (void) +{ + int i; + + for (i = 0; i < 16; i++) + ull[i] = d[i]; +} + +void ll2f3 (void) +{ + int i; + + for (i = 0; i < 16; i++) + f[i] = ll[i]; +} + +void ull2f3 (void) +{ + int i; + + for (i = 0; i < 16; i++) + f[i] = ull[i]; +} + +void f2ll3 (void) +{ + int i; + + for (i = 0; i < 16; i++) + ll[i] = f[i]; +} + +void f2ull3 (void) +{ + int i; + + for (i = 0; i < 16; i++) + ull[i] = f[i]; +} diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index 68f4fd3a7cc..ab2feed19d5 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -3676,6 +3676,8 @@ verify_gimple_assign_unary (gassign *stmt) case VEC_UNPACK_LO_EXPR: case VEC_UNPACK_FLOAT_HI_EXPR: case VEC_UNPACK_FLOAT_LO_EXPR: + case VEC_UNPACK_FIX_TRUNC_HI_EXPR: + case VEC_UNPACK_FIX_TRUNC_LO_EXPR: /* FIXME. */ return false; @@ -4003,6 +4005,24 @@ verify_gimple_assign_binary (gassign *stmt) return false; } + case VEC_PACK_FLOAT_EXPR: + if (TREE_CODE (rhs1_type) != VECTOR_TYPE + || TREE_CODE (lhs_type) != VECTOR_TYPE + || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type)) + || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs_type)) + || !types_compatible_p (rhs1_type, rhs2_type) + || maybe_ne (GET_MODE_SIZE (element_mode (rhs1_type)), + 2 * GET_MODE_SIZE (element_mode (lhs_type)))) + { + error ("type mismatch in vector pack expression"); + debug_generic_expr (lhs_type); + debug_generic_expr (rhs1_type); + debug_generic_expr (rhs2_type); + return true; + } + + return false; + case MULT_EXPR: case MULT_HIGHPART_EXPR: case TRUNC_DIV_EXPR: diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index 78811317e26..ae36cc710fd 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -3924,9 +3924,12 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights, case VEC_UNPACK_LO_EXPR: case VEC_UNPACK_FLOAT_HI_EXPR: case VEC_UNPACK_FLOAT_LO_EXPR: + case VEC_UNPACK_FIX_TRUNC_HI_EXPR: + case VEC_UNPACK_FIX_TRUNC_LO_EXPR: case VEC_PACK_TRUNC_EXPR: case VEC_PACK_SAT_EXPR: case VEC_PACK_FIX_TRUNC_EXPR: + case VEC_PACK_FLOAT_EXPR: case VEC_WIDEN_LSHIFT_HI_EXPR: case VEC_WIDEN_LSHIFT_LO_EXPR: case VEC_DUPLICATE_EXPR: diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index 5a8c8eb7946..125507ee348 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -3235,6 +3235,18 @@ dump_generic_node (pretty_printer *pp, tree node, int spc, dump_flags_t flags, pp_string (pp, " > "); break; + case VEC_UNPACK_FIX_TRUNC_HI_EXPR: + pp_string (pp, " VEC_UNPACK_FIX_TRUNC_HI_EXPR < "); + dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false); + pp_string (pp, " > "); + break; + + case VEC_UNPACK_FIX_TRUNC_LO_EXPR: + pp_string (pp, " VEC_UNPACK_FIX_TRUNC_LO_EXPR < "); + dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false); + pp_string (pp, " > "); + break; + case VEC_PACK_TRUNC_EXPR: pp_string (pp, " VEC_PACK_TRUNC_EXPR < "); dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false); @@ -3259,6 +3271,14 @@ dump_generic_node (pretty_printer *pp, tree node, int spc, dump_flags_t flags, pp_string (pp, " > "); break; + case VEC_PACK_FLOAT_EXPR: + pp_string (pp, " VEC_PACK_FLOAT_EXPR < "); + dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false); + pp_string (pp, ", "); + dump_generic_node (pp, TREE_OPERAND (node, 1), spc, flags, false); + pp_string (pp, " > "); + break; + case BLOCK: dump_block_node (pp, node, spc, flags); break; @@ -3575,6 +3595,8 @@ op_code_prio (enum tree_code code) case VEC_UNPACK_LO_EXPR: case VEC_UNPACK_FLOAT_HI_EXPR: case VEC_UNPACK_FLOAT_LO_EXPR: + case VEC_UNPACK_FIX_TRUNC_HI_EXPR: + case VEC_UNPACK_FIX_TRUNC_LO_EXPR: case VEC_PACK_TRUNC_EXPR: case VEC_PACK_SAT_EXPR: return 16; diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c index 2ade60b3398..46502c42c74 100644 --- a/gcc/tree-vect-generic.c +++ b/gcc/tree-vect-generic.c @@ -1653,7 +1653,8 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi) /* The signedness is determined from input argument. */ if (code == VEC_UNPACK_FLOAT_HI_EXPR - || code == VEC_UNPACK_FLOAT_LO_EXPR) + || code == VEC_UNPACK_FLOAT_LO_EXPR + || code == VEC_PACK_FLOAT_EXPR) { type = TREE_TYPE (rhs1); /* We do not know how to scalarize those. */ @@ -1670,6 +1671,8 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi) || code == VEC_WIDEN_MULT_ODD_EXPR || code == VEC_UNPACK_HI_EXPR || code == VEC_UNPACK_LO_EXPR + || code == VEC_UNPACK_FIX_TRUNC_HI_EXPR + || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR || code == VEC_PACK_TRUNC_EXPR || code == VEC_PACK_SAT_EXPR || code == VEC_PACK_FIX_TRUNC_EXPR diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 759ea23b6f2..caa157fd3bd 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -10249,10 +10249,10 @@ vect_is_simple_use (tree operand, vec_info *vinfo, vector form (i.e., when operating on arguments of type VECTYPE_IN producing a result of type VECTYPE_OUT). - Widening operations we currently support are NOP (CONVERT), FLOAT - and WIDEN_MULT. This function checks if these operations are supported - by the target platform either directly (via vector tree-codes), or via - target builtins. + Widening operations we currently support are NOP (CONVERT), FLOAT, + FIX_TRUNC and WIDEN_MULT. This function checks if these operations + are supported by the target platform either directly (via vector + tree-codes), or via target builtins. Output: - CODE1 and CODE2 are codes of vector operations to be used when @@ -10382,10 +10382,9 @@ supportable_widening_operation (enum tree_code code, gimple *stmt, break; case FIX_TRUNC_EXPR: - /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/ - VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for - computing the operation. */ - return false; + c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR; + c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR; + break; default: gcc_unreachable (); @@ -10493,8 +10492,8 @@ supportable_widening_operation (enum tree_code code, gimple *stmt, vector form (i.e., when operating on arguments of type VECTYPE_IN and producing a result of type VECTYPE_OUT). - Narrowing operations we currently support are NOP (CONVERT) and - FIX_TRUNC. This function checks if these operations are supported by + Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC + and FLOAT. This function checks if these operations are supported by the target platform directly via vector tree-codes. Output: @@ -10535,9 +10534,8 @@ supportable_narrowing_operation (enum tree_code code, break; case FLOAT_EXPR: - /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR - tree code and optabs used for computing the operation. */ - return false; + c1 = VEC_PACK_FLOAT_EXPR; + break; default: gcc_unreachable (); @@ -10566,6 +10564,9 @@ supportable_narrowing_operation (enum tree_code code, || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2, TYPE_VECTOR_SUBPARTS (narrow_vectype))); + if (code == FLOAT_EXPR) + return false; + /* Check if it's a multi-step conversion that can be done using intermediate types. */ prev_mode = vec_mode; diff --git a/gcc/tree.def b/gcc/tree.def index c660b2c3f65..9696fee6813 100644 --- a/gcc/tree.def +++ b/gcc/tree.def @@ -1371,6 +1371,15 @@ DEFTREECODE (VEC_UNPACK_LO_EXPR, "vec_unpack_lo_expr", tcc_unary, 1) DEFTREECODE (VEC_UNPACK_FLOAT_HI_EXPR, "vec_unpack_float_hi_expr", tcc_unary, 1) DEFTREECODE (VEC_UNPACK_FLOAT_LO_EXPR, "vec_unpack_float_lo_expr", tcc_unary, 1) +/* Unpack (extract) the high/low elements of the input vector, convert + floating point values to integer and widen elements into the output + vector. The input vector has twice as many elements as the output + vector, that are half the size of the elements of the output vector. */ +DEFTREECODE (VEC_UNPACK_FIX_TRUNC_HI_EXPR, "vec_unpack_fix_trunc_hi_expr", + tcc_unary, 1) +DEFTREECODE (VEC_UNPACK_FIX_TRUNC_LO_EXPR, "vec_unpack_fix_trunc_lo_expr", + tcc_unary, 1) + /* Pack (demote/narrow and merge) the elements of the two input vectors into the output vector using truncation/saturation. The elements of the input vectors are twice the size of the elements of the @@ -1384,6 +1393,12 @@ DEFTREECODE (VEC_PACK_SAT_EXPR, "vec_pack_sat_expr", tcc_binary, 2) the output vector. */ DEFTREECODE (VEC_PACK_FIX_TRUNC_EXPR, "vec_pack_fix_trunc_expr", tcc_binary, 2) +/* Convert fixed point values of the two input vectors to floating point + and pack (narrow and merge) the elements into the output vector. The + elements of the input vector are twice the size of the elements of + the output vector. */ +DEFTREECODE (VEC_PACK_FLOAT_EXPR, "vec_pack_float_expr", tcc_binary, 2) + /* Widening vector shift left in bits. Operand 0 is a vector to be shifted with N elements of size S. Operand 1 is an integer shift amount in bits. -- 2.30.2