+2018-05-29 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/85918
+ * tree.def (VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR,
+ VEC_PACK_FLOAT_EXPR): New tree codes.
+ * tree-pretty-print.c (op_code_prio): Handle
+ VEC_UNPACK_FIX_TRUNC_HI_EXPR and VEC_UNPACK_FIX_TRUNC_LO_EXPR.
+ (dump_generic_node): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR,
+ VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR.
+ * tree-inline.c (estimate_operator_cost): Likewise.
+ * gimple-pretty-print.c (dump_binary_rhs): Handle VEC_PACK_FLOAT_EXPR.
+ * fold-const.c (const_binop): Likewise.
+ (const_unop): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR and
+ VEC_UNPACK_FIX_TRUNC_LO_EXPR.
+ * tree-cfg.c (verify_gimple_assign_unary): Likewise.
+ (verify_gimple_assign_binary): Handle VEC_PACK_FLOAT_EXPR.
+ * cfgexpand.c (expand_debug_expr): Handle VEC_UNPACK_FIX_TRUNC_HI_EXPR,
+ VEC_UNPACK_FIX_TRUNC_LO_EXPR and VEC_PACK_FLOAT_EXPR.
+ * expr.c (expand_expr_real_2): Likewise.
+ * optabs.def (vec_packs_float_optab, vec_packu_float_optab,
+ vec_unpack_sfix_trunc_hi_optab, vec_unpack_sfix_trunc_lo_optab,
+ vec_unpack_ufix_trunc_hi_optab, vec_unpack_ufix_trunc_lo_optab): New
+ optabs.
+ * optabs.c (expand_widen_pattern_expr): For
+ VEC_UNPACK_FIX_TRUNC_HI_EXPR and VEC_UNPACK_FIX_TRUNC_LO_EXPR use
+ sign from result type rather than operand's type.
+ (expand_binop_directly): For vec_packu_float_optab and
+ vec_packs_float_optab allow result type to be different from operand's
+ type.
+ * optabs-tree.c (optab_for_tree_code): Handle
+ VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and
+ VEC_PACK_FLOAT_EXPR. Formatting fixes.
+ * tree-vect-generic.c (expand_vector_operations_1): Handle
+ VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR and
+ VEC_PACK_FLOAT_EXPR.
+ * tree-vect-stmts.c (supportable_widening_operation): Handle
+ FIX_TRUNC_EXPR.
+ (supportable_narrowing_operation): Handle FLOAT_EXPR.
+ * config/i386/i386.md (fixprefix, floatprefix): New code attributes.
+ * config/i386/sse.md (*float<floatunssuffix>v2div2sf2): Rename to ...
+ (float<floatunssuffix>v2div2sf2): ... this. Formatting fix.
+ (vpckfloat_concat_mode, vpckfloat_temp_mode, vpckfloat_op_mode): New
+ mode attributes.
+ (vec_pack<floatprefix>_float_<mode>): New expander.
+ (vunpckfixt_mode, vunpckfixt_model, vunpckfixt_extract_mode): New mode
+ attributes.
+ (vec_unpack_<fixprefix>fix_trunc_lo_<mode>,
+ vec_unpack_<fixprefix>fix_trunc_hi_<mode>): New expanders.
+ * doc/md.texi (vec_packs_float_@var{m}, vec_packu_float_@var{m},
+ vec_unpack_sfix_trunc_hi_@var{m}, vec_unpack_sfix_trunc_lo_@var{m},
+ vec_unpack_ufix_trunc_hi_@var{m}, vec_unpack_ufix_trunc_lo_@var{m}):
+ Document.
+ * doc/generic.texi (VEC_UNPACK_FLOAT_HI_EXPR,
+ VEC_UNPACK_FLOAT_LO_EXPR): Fix pasto in description.
+ (VEC_UNPACK_FIX_TRUNC_HI_EXPR, VEC_UNPACK_FIX_TRUNC_LO_EXPR,
+ VEC_PACK_FLOAT_EXPR): Document.
+
2018-05-29 Richard Biener <rguenther@suse.de>
* tree-vectorizer.h (struct vec_info): Add stmt_vec_infos
case REALIGN_LOAD_EXPR:
case VEC_COND_EXPR:
case VEC_PACK_FIX_TRUNC_EXPR:
+ case VEC_PACK_FLOAT_EXPR:
case VEC_PACK_SAT_EXPR:
case VEC_PACK_TRUNC_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_HI_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_LO_EXPR:
case VEC_UNPACK_FLOAT_HI_EXPR:
case VEC_UNPACK_FLOAT_LO_EXPR:
case VEC_UNPACK_HI_EXPR:
(define_code_iterator any_fix [fix unsigned_fix])
(define_code_attr fixsuffix [(fix "") (unsigned_fix "u")])
(define_code_attr fixunssuffix [(fix "") (unsigned_fix "uns")])
+(define_code_attr fixprefix [(fix "s") (unsigned_fix "u")])
;; Used in signed and unsigned float.
(define_code_iterator any_float [float unsigned_float])
(define_code_attr floatsuffix [(float "") (unsigned_float "u")])
(define_code_attr floatunssuffix [(float "") (unsigned_float "uns")])
+(define_code_attr floatprefix [(float "s") (unsigned_float "u")])
;; All integer modes.
(define_mode_iterator SWI1248x [QI HI SI DI])
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "*float<floatunssuffix>v2div2sf2"
+(define_insn "float<floatunssuffix>v2div2sf2"
[(set (match_operand:V4SF 0 "register_operand" "=v")
- (vec_concat:V4SF
+ (vec_concat:V4SF
(any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
(const_vector:V2SF [(const_int 0) (const_int 0)])))]
"TARGET_AVX512DQ && TARGET_AVX512VL"
(set_attr "prefix" "evex")
(set_attr "mode" "V4SF")])
+(define_mode_attr vpckfloat_concat_mode
+ [(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")])
+(define_mode_attr vpckfloat_temp_mode
+ [(V8DI "V8SF") (V4DI "V4SF") (V2DI "V4SF")])
+(define_mode_attr vpckfloat_op_mode
+ [(V8DI "v8sf") (V4DI "v4sf") (V2DI "v2sf")])
+
+(define_expand "vec_pack<floatprefix>_float_<mode>"
+ [(match_operand:<ssePSmode> 0 "register_operand")
+ (any_float:<ssePSmode>
+ (match_operand:VI8_AVX512VL 1 "register_operand"))
+ (match_operand:VI8_AVX512VL 2 "register_operand")]
+ "TARGET_AVX512DQ"
+{
+ rtx r1 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
+ rtx r2 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
+ rtx (*gen) (rtx, rtx) = gen_float<floatunssuffix><mode><vpckfloat_op_mode>2;
+ emit_insn (gen (r1, operands[1]));
+ emit_insn (gen (r2, operands[2]));
+ if (<MODE>mode == V2DImode)
+ emit_insn (gen_sse_movlhps (operands[0], r1, r2));
+ else
+ emit_insn (gen_avx_vec_concat<vpckfloat_concat_mode> (operands[0],
+ r1, r2));
+ DONE;
+})
+
(define_insn "float<floatunssuffix>v2div2sf2_mask"
[(set (match_operand:V4SF 0 "register_operand" "=v")
(vec_concat:V4SF
(set_attr "prefix" "evex")
(set_attr "mode" "TI")])
+(define_mode_attr vunpckfixt_mode
+ [(V16SF "V8DI") (V8SF "V4DI") (V4SF "V2DI")])
+(define_mode_attr vunpckfixt_model
+ [(V16SF "v8di") (V8SF "v4di") (V4SF "v2di")])
+(define_mode_attr vunpckfixt_extract_mode
+ [(V16SF "v16sf") (V8SF "v8sf") (V4SF "v8sf")])
+
+(define_expand "vec_unpack_<fixprefix>fix_trunc_lo_<mode>"
+ [(match_operand:<vunpckfixt_mode> 0 "register_operand")
+ (any_fix:<vunpckfixt_mode>
+ (match_operand:VF1_AVX512VL 1 "register_operand"))]
+ "TARGET_AVX512DQ"
+{
+ rtx tem = operands[1];
+ if (<MODE>mode != V4SFmode)
+ {
+ tem = gen_reg_rtx (<ssehalfvecmode>mode);
+ emit_insn (gen_vec_extract_lo_<vunpckfixt_extract_mode> (tem,
+ operands[1]));
+ }
+ rtx (*gen) (rtx, rtx)
+ = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
+ emit_insn (gen (operands[0], tem));
+ DONE;
+})
+
+(define_expand "vec_unpack_<fixprefix>fix_trunc_hi_<mode>"
+ [(match_operand:<vunpckfixt_mode> 0 "register_operand")
+ (any_fix:<vunpckfixt_mode>
+ (match_operand:VF1_AVX512VL 1 "register_operand"))]
+ "TARGET_AVX512DQ"
+{
+ rtx tem;
+ if (<MODE>mode != V4SFmode)
+ {
+ tem = gen_reg_rtx (<ssehalfvecmode>mode);
+ emit_insn (gen_vec_extract_hi_<vunpckfixt_extract_mode> (tem,
+ operands[1]));
+ }
+ else
+ {
+ tem = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_avx_vpermilv4sf (tem, operands[1], GEN_INT (0x4e)));
+ }
+ rtx (*gen) (rtx, rtx)
+ = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
+ emit_insn (gen (operands[0], tem));
+ DONE;
+})
+
(define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
[(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
(unsigned_fix:<sseintvecmode>
@tindex VEC_UNPACK_LO_EXPR
@tindex VEC_UNPACK_FLOAT_HI_EXPR
@tindex VEC_UNPACK_FLOAT_LO_EXPR
+@tindex VEC_UNPACK_FIX_TRUNC_HI_EXPR
+@tindex VEC_UNPACK_FIX_TRUNC_LO_EXPR
@tindex VEC_PACK_TRUNC_EXPR
@tindex VEC_PACK_SAT_EXPR
@tindex VEC_PACK_FIX_TRUNC_EXPR
+@tindex VEC_PACK_FLOAT_EXPR
@tindex VEC_COND_EXPR
@tindex SAD_EXPR
single operand is a vector that contains @code{N} elements of the same
integral type. The result is a vector that contains half as many elements
of a floating point type whose size is twice as wide. In the case of
-@code{VEC_UNPACK_HI_EXPR} the high @code{N/2} elements of the vector are
-extracted, converted and widened. In the case of @code{VEC_UNPACK_LO_EXPR}
+@code{VEC_UNPACK_FLOAT_HI_EXPR} the high @code{N/2} elements of the vector are
+extracted, converted and widened. In the case of @code{VEC_UNPACK_FLOAT_LO_EXPR}
the low @code{N/2} elements of the vector are extracted, converted and widened.
+@item VEC_UNPACK_FIX_TRUNC_HI_EXPR
+@itemx VEC_UNPACK_FIX_TRUNC_LO_EXPR
+These nodes represent unpacking of the high and low parts of the input vector,
+where the values are truncated from floating point to fixed point. The
+single operand is a vector that contains @code{N} elements of the same
+floating point type. The result is a vector that contains half as many
+elements of an integral type whose size is twice as wide. In the case of
+@code{VEC_UNPACK_FIX_TRUNC_HI_EXPR} the high @code{N/2} elements of the
+vector are extracted and converted with truncation. In the case of
+@code{VEC_UNPACK_FIX_TRUNC_LO_EXPR} the low @code{N/2} elements of the
+vector are extracted and converted with truncation.
+
@item VEC_PACK_TRUNC_EXPR
This node represents packing of truncated elements of the two input vectors
into the output vector. Input operands are vectors that contain the same
elements of the two vectors are merged (concatenated) to form the output
vector.
+@item VEC_PACK_FLOAT_EXPR
+This node represents packing of elements of the two input vectors into the
+output vector, where the values are converted from fixed point to floating
+point. Input operands are vectors that contain the same number of elements
+of an integral type. The result is a vector that contains twice as many
+elements of floating point type whose size is half as wide. The elements of
+the two vectors are merged (concatenated) to form the output vector.
+
@item VEC_COND_EXPR
These nodes represent @code{?:} expressions. The three operands must be
vectors of the same size and number of elements. The second and third
floating point elements of size S@. Operand 0 is the resulting vector
in which 2*N elements of size N/2 are concatenated.
+@cindex @code{vec_packs_float_@var{m}} instruction pattern
+@cindex @code{vec_packu_float_@var{m}} instruction pattern
+@item @samp{vec_packs_float_@var{m}}, @samp{vec_packu_float_@var{m}}
+Narrow, convert to floating point type and merge the elements
+of two vectors. Operands 1 and 2 are vectors of the same mode having N
+signed/unsigned integral elements of size S@. Operand 0 is the resulting vector
+in which 2*N elements of size N/2 are concatenated.
+
@cindex @code{vec_unpacks_hi_@var{m}} instruction pattern
@cindex @code{vec_unpacks_lo_@var{m}} instruction pattern
@item @samp{vec_unpacks_hi_@var{m}}, @samp{vec_unpacks_lo_@var{m}}
floating point conversion and place the resulting N/2 values of size 2*S in
the output vector (operand 0).
+@cindex @code{vec_unpack_sfix_trunc_hi_@var{m}} instruction pattern
+@cindex @code{vec_unpack_sfix_trunc_lo_@var{m}} instruction pattern
+@cindex @code{vec_unpack_ufix_trunc_hi_@var{m}} instruction pattern
+@cindex @code{vec_unpack_ufix_trunc_lo_@var{m}} instruction pattern
+@item @samp{vec_unpack_sfix_trunc_hi_@var{m}},
+@itemx @samp{vec_unpack_sfix_trunc_lo_@var{m}}
+@itemx @samp{vec_unpack_ufix_trunc_hi_@var{m}}
+@itemx @samp{vec_unpack_ufix_trunc_lo_@var{m}}
+Extract, convert to signed/unsigned integer type and widen the high/low part of a
+vector of floating point elements. The input vector (operand 1)
+has N elements of size S@. Convert the high/low elements of the vector
+to integers and place the resulting N/2 values of size 2*S in
+the output vector (operand 0).
+
@cindex @code{vec_widen_umult_hi_@var{m}} instruction pattern
@cindex @code{vec_widen_umult_lo_@var{m}} instruction pattern
@cindex @code{vec_widen_smult_hi_@var{m}} instruction pattern
case VEC_UNPACK_HI_EXPR:
case VEC_UNPACK_LO_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_HI_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_LO_EXPR:
{
op0 = expand_normal (treeop0);
temp = expand_widen_pattern_expr (ops, op0, NULL_RTX, NULL_RTX,
mode = TYPE_MODE (TREE_TYPE (treeop0));
goto binop;
+ case VEC_PACK_FLOAT_EXPR:
+ mode = TYPE_MODE (TREE_TYPE (treeop0));
+ expand_operands (treeop0, treeop1,
+ subtarget, &op0, &op1, EXPAND_NORMAL);
+ this_optab = optab_for_tree_code (code, TREE_TYPE (treeop0),
+ optab_default);
+ target = expand_binop (mode, this_optab, op0, op1, target,
+ TYPE_UNSIGNED (TREE_TYPE (treeop0)),
+ OPTAB_LIB_WIDEN);
+ gcc_assert (target);
+ return target;
+
case VEC_PERM_EXPR:
{
expand_operands (treeop0, treeop1, target, &op0, &op1, EXPAND_NORMAL);
case VEC_PACK_TRUNC_EXPR:
case VEC_PACK_FIX_TRUNC_EXPR:
+ case VEC_PACK_FLOAT_EXPR:
{
unsigned int HOST_WIDE_INT out_nelts, in_nelts, i;
? VECTOR_CST_ELT (arg1, i)
: VECTOR_CST_ELT (arg2, i - in_nelts));
elt = fold_convert_const (code == VEC_PACK_TRUNC_EXPR
- ? NOP_EXPR : FIX_TRUNC_EXPR,
+ ? NOP_EXPR
+ : code == VEC_PACK_FLOAT_EXPR
+ ? FLOAT_EXPR : FIX_TRUNC_EXPR,
TREE_TYPE (type), elt);
if (elt == NULL_TREE || !CONSTANT_CLASS_P (elt))
return NULL_TREE;
case VEC_UNPACK_HI_EXPR:
case VEC_UNPACK_FLOAT_LO_EXPR:
case VEC_UNPACK_FLOAT_HI_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_LO_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_HI_EXPR:
{
unsigned HOST_WIDE_INT out_nelts, in_nelts, i;
enum tree_code subcode;
unsigned int offset = 0;
if ((!BYTES_BIG_ENDIAN) ^ (code == VEC_UNPACK_LO_EXPR
- || code == VEC_UNPACK_FLOAT_LO_EXPR))
+ || code == VEC_UNPACK_FLOAT_LO_EXPR
+ || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR))
offset = out_nelts;
if (code == VEC_UNPACK_LO_EXPR || code == VEC_UNPACK_HI_EXPR)
subcode = NOP_EXPR;
- else
+ else if (code == VEC_UNPACK_FLOAT_LO_EXPR
+ || code == VEC_UNPACK_FLOAT_HI_EXPR)
subcode = FLOAT_EXPR;
+ else
+ subcode = FIX_TRUNC_EXPR;
tree_vector_builder elts (type, out_nelts, 1);
for (i = 0; i < out_nelts; i++)
case VEC_PACK_TRUNC_EXPR:
case VEC_PACK_SAT_EXPR:
case VEC_PACK_FIX_TRUNC_EXPR:
+ case VEC_PACK_FLOAT_EXPR:
case VEC_WIDEN_LSHIFT_HI_EXPR:
case VEC_WIDEN_LSHIFT_LO_EXPR:
case VEC_SERIES_EXPR:
? ssmsub_widen_optab : smsub_widen_optab));
case VEC_WIDEN_MULT_HI_EXPR:
- return TYPE_UNSIGNED (type) ?
- vec_widen_umult_hi_optab : vec_widen_smult_hi_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_widen_umult_hi_optab : vec_widen_smult_hi_optab);
case VEC_WIDEN_MULT_LO_EXPR:
- return TYPE_UNSIGNED (type) ?
- vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab);
case VEC_WIDEN_MULT_EVEN_EXPR:
- return TYPE_UNSIGNED (type) ?
- vec_widen_umult_even_optab : vec_widen_smult_even_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_widen_umult_even_optab : vec_widen_smult_even_optab);
case VEC_WIDEN_MULT_ODD_EXPR:
- return TYPE_UNSIGNED (type) ?
- vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab);
case VEC_WIDEN_LSHIFT_HI_EXPR:
- return TYPE_UNSIGNED (type) ?
- vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab);
case VEC_WIDEN_LSHIFT_LO_EXPR:
- return TYPE_UNSIGNED (type) ?
- vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab);
case VEC_UNPACK_HI_EXPR:
- return TYPE_UNSIGNED (type) ?
- vec_unpacku_hi_optab : vec_unpacks_hi_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_unpacku_hi_optab : vec_unpacks_hi_optab);
case VEC_UNPACK_LO_EXPR:
- return TYPE_UNSIGNED (type) ?
- vec_unpacku_lo_optab : vec_unpacks_lo_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_unpacku_lo_optab : vec_unpacks_lo_optab);
case VEC_UNPACK_FLOAT_HI_EXPR:
/* The signedness is determined from input operand. */
- return TYPE_UNSIGNED (type) ?
- vec_unpacku_float_hi_optab : vec_unpacks_float_hi_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_unpacku_float_hi_optab : vec_unpacks_float_hi_optab);
case VEC_UNPACK_FLOAT_LO_EXPR:
/* The signedness is determined from input operand. */
- return TYPE_UNSIGNED (type) ?
- vec_unpacku_float_lo_optab : vec_unpacks_float_lo_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_unpacku_float_lo_optab : vec_unpacks_float_lo_optab);
+
+ case VEC_UNPACK_FIX_TRUNC_HI_EXPR:
+ /* The signedness is determined from output operand. */
+ return (TYPE_UNSIGNED (type)
+ ? vec_unpack_ufix_trunc_hi_optab
+ : vec_unpack_sfix_trunc_hi_optab);
+
+ case VEC_UNPACK_FIX_TRUNC_LO_EXPR:
+ /* The signedness is determined from output operand. */
+ return (TYPE_UNSIGNED (type)
+ ? vec_unpack_ufix_trunc_lo_optab
+ : vec_unpack_sfix_trunc_lo_optab);
case VEC_PACK_TRUNC_EXPR:
return vec_pack_trunc_optab;
case VEC_PACK_FIX_TRUNC_EXPR:
/* The signedness is determined from output operand. */
- return TYPE_UNSIGNED (type) ?
- vec_pack_ufix_trunc_optab : vec_pack_sfix_trunc_optab;
+ return (TYPE_UNSIGNED (type)
+ ? vec_pack_ufix_trunc_optab : vec_pack_sfix_trunc_optab);
+
+ case VEC_PACK_FLOAT_EXPR:
+ /* The signedness is determined from input operand. */
+ return (TYPE_UNSIGNED (type)
+ ? vec_packu_float_optab : vec_packs_float_optab);
case VEC_DUPLICATE_EXPR:
return vec_duplicate_optab;
oprnd0 = ops->op0;
tmode0 = TYPE_MODE (TREE_TYPE (oprnd0));
- widen_pattern_optab =
- optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default);
+ if (ops->code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
+ || ops->code == VEC_UNPACK_FIX_TRUNC_LO_EXPR)
+ /* The sign is from the result type rather than operand's type
+ for these ops. */
+ widen_pattern_optab
+ = optab_for_tree_code (ops->code, ops->type, optab_default);
+ else
+ widen_pattern_optab
+ = optab_for_tree_code (ops->code, TREE_TYPE (oprnd0), optab_default);
if (ops->code == WIDEN_MULT_PLUS_EXPR
|| ops->code == WIDEN_MULT_MINUS_EXPR)
icode = find_widening_optab_handler (widen_pattern_optab,
|| binoptab == vec_pack_usat_optab
|| binoptab == vec_pack_ssat_optab
|| binoptab == vec_pack_ufix_trunc_optab
- || binoptab == vec_pack_sfix_trunc_optab)
+ || binoptab == vec_pack_sfix_trunc_optab
+ || binoptab == vec_packu_float_optab
+ || binoptab == vec_packs_float_optab)
{
/* The mode of the result is different then the mode of the
arguments. */
OPTAB_D (vec_pack_trunc_optab, "vec_pack_trunc_$a")
OPTAB_D (vec_pack_ufix_trunc_optab, "vec_pack_ufix_trunc_$a")
OPTAB_D (vec_pack_usat_optab, "vec_pack_usat_$a")
+OPTAB_D (vec_packs_float_optab, "vec_packs_float_$a")
+OPTAB_D (vec_packu_float_optab, "vec_packu_float_$a")
OPTAB_D (vec_perm_optab, "vec_perm$a")
OPTAB_D (vec_realign_load_optab, "vec_realign_load_$a")
OPTAB_D (vec_set_optab, "vec_set$a")
OPTAB_D (vec_shr_optab, "vec_shr_$a")
+OPTAB_D (vec_unpack_sfix_trunc_hi_optab, "vec_unpack_sfix_trunc_hi_$a")
+OPTAB_D (vec_unpack_sfix_trunc_lo_optab, "vec_unpack_sfix_trunc_lo_$a")
+OPTAB_D (vec_unpack_ufix_trunc_hi_optab, "vec_unpack_ufix_trunc_hi_$a")
+OPTAB_D (vec_unpack_ufix_trunc_lo_optab, "vec_unpack_ufix_trunc_lo_$a")
OPTAB_D (vec_unpacks_float_hi_optab, "vec_unpacks_float_hi_$a")
OPTAB_D (vec_unpacks_float_lo_optab, "vec_unpacks_float_lo_$a")
OPTAB_D (vec_unpacks_hi_optab, "vec_unpacks_hi_$a")
+2018-05-29 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/85918
+ * gcc.target/i386/avx512dq-pr85918.c: Add -mprefer-vector-width=512
+ and -fno-vect-cost-model options. Add aligned(64) attribute to the
+ arrays. Add suffix 1 to all functions and use 4 iterations rather
+ than N. Add functions with conversions to and from float.
+ Add new set of functions with 8 iterations and another one
+ with 16 iterations, expect 24 vectorized loops instead of just 4.
+ * gcc.target/i386/avx512dq-pr85918-2.c: New test.
+
2018-05-29 Javier Miranda <miranda@adacore.com>
* gnat.dg/equal2.adb: New testcase.
--- /dev/null
+/* PR target/85918 */
+/* { dg-do run } */
+/* { dg-require-effective-target avx512dq } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-options "-O3 -mavx512dq -mavx512vl -mprefer-vector-width=512 -fno-vect-cost-model" } */
+
+#define AVX512DQ
+#define AVX512VL
+#define DO_TEST avx512dqvl_test
+
+static void avx512dqvl_test (void);
+
+#include "avx512-check.h"
+
+#define N 16
+
+long long ll[N] __attribute__((aligned (64)));
+unsigned long long ull[N] __attribute__((aligned (64)));
+float f[N] __attribute__((aligned (64)));
+double d[N] __attribute__((aligned (64)));
+
+__attribute__((noipa)) void
+ll2d1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ d[i] = ll[i];
+}
+
+__attribute__((noipa)) void
+ull2d1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ d[i] = ull[i];
+}
+
+__attribute__((noipa)) void
+d2ll1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ ll[i] = d[i];
+}
+
+__attribute__((noipa)) void
+d2ull1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ ull[i] = d[i];
+}
+
+__attribute__((noipa)) void
+ll2f1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ f[i] = ll[i];
+}
+
+__attribute__((noipa)) void
+ull2f1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ f[i] = ull[i];
+}
+
+__attribute__((noipa)) void
+f2ll1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ ll[i] = f[i];
+}
+
+__attribute__((noipa)) void
+f2ull1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ ull[i] = f[i];
+}
+
+__attribute__((noipa)) void
+ll2d2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ d[i] = ll[i];
+}
+
+__attribute__((noipa)) void
+ull2d2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ d[i] = ull[i];
+}
+
+__attribute__((noipa)) void
+d2ll2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ ll[i] = d[i];
+}
+
+__attribute__((noipa)) void
+d2ull2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ ull[i] = d[i];
+}
+
+__attribute__((noipa)) void
+ll2f2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ f[i] = ll[i];
+}
+
+__attribute__((noipa)) void
+ull2f2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ f[i] = ull[i];
+}
+
+__attribute__((noipa)) void
+f2ll2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ ll[i] = f[i];
+}
+
+__attribute__((noipa)) void
+f2ull2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ ull[i] = f[i];
+}
+
+__attribute__((noipa)) void
+ll2d3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ d[i] = ll[i];
+}
+
+__attribute__((noipa)) void
+ull2d3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ d[i] = ull[i];
+}
+
+__attribute__((noipa)) void
+d2ll3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ ll[i] = d[i];
+}
+
+__attribute__((noipa)) void
+d2ull3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ ull[i] = d[i];
+}
+
+__attribute__((noipa)) void
+ll2f3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ f[i] = ll[i];
+}
+
+__attribute__((noipa)) void
+ull2f3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ f[i] = ull[i];
+}
+
+__attribute__((noipa)) void
+f2ll3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ ll[i] = f[i];
+}
+
+__attribute__((noipa)) void
+f2ull3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ ull[i] = f[i];
+}
+
+unsigned long long ullt[] = {
+ 13835058055282163712ULL, 9223653511831486464ULL, 9218868437227405312ULL,
+ 1ULL, 9305281255077576704ULL, 1191936ULL, 18446462598732840960ULL, 0ULL,
+ 9223372036854775808ULL, 4611686018427387904ULL, 2305843009213693952ULL,
+ 9ULL, 9223653511831486464ULL, 0ULL, 65536ULL, 131071ULL
+};
+float uft[] = {
+ 13835058055282163712.0f, 9223653511831486464.0f, 9218868437227405312.0f,
+ 1.0f, 9305281255077576704.0f, 1191936.0f, 18446462598732840960.0f, 0.0f,
+ 9223372036854775808.0f, 4611686018427387904.0f, 2305843009213693952.0f,
+ 9.0f, 9223653511831486464.0f, 0.0f, 65536.0f, 131071.0f
+};
+long long llt[] = {
+ 9223090561878065152LL, -9223372036854775807LL - 1, -9223090561878065152LL,
+ -4LL, -8074672656898588672LL, 8074672656898588672LL, 29LL, -15LL,
+ 7574773098260463616LL, -7579276697887834112LL, -8615667562136469504LL,
+ 148LL, -255LL, 9151595917793558528LL, -9218868437227405312LL, 9LL
+};
+float ft[] = {
+ 9223090561878065152.0f, -9223372036854775808.0f, -9223090561878065152.0f,
+ -4.0f, -8074672656898588672.0f, 8074672656898588672.0f, 29.0f, -15.0f,
+ 7574773098260463616.0f, -7579276697887834112.0f, -8615667562136469504.0f,
+ 148.0f, -255.0f, 9151595917793558528.0f, -9218868437227405312.0f, 9.0f
+};
+
+static void
+avx512dqvl_test (void)
+{
+ int i;
+ for (i = 0; i < 4; i++)
+ {
+ ll[i] = llt[i];
+ ull[i] = ullt[i];
+ }
+ ll2d1 ();
+ for (i = 0; i < 4; i++)
+ if (d[i] != ft[i])
+ abort ();
+ ull2d1 ();
+ for (i = 0; i < 4; i++)
+ if (d[i] != uft[i])
+ abort ();
+ else
+ d[i] = ft[i + 4];
+ d2ll1 ();
+ for (i = 0; i < 4; i++)
+ if (ll[i] != llt[i + 4])
+ abort ();
+ else
+ d[i] = uft[i + 4];
+ d2ull1 ();
+ for (i = 0; i < 4; i++)
+ if (ull[i] != ullt[i + 4])
+ abort ();
+ else
+ {
+ ll[i] = llt[i + 8];
+ ull[i] = ullt[i + 8];
+ }
+ ll2f1 ();
+ for (i = 0; i < 4; i++)
+ if (f[i] != ft[i + 8])
+ abort ();
+ ull2f1 ();
+ for (i = 0; i < 4; i++)
+ if (f[i] != uft[i + 8])
+ abort ();
+ else
+ f[i] = ft[i + 12];
+ f2ll1 ();
+ for (i = 0; i < 4; i++)
+ if (ll[i] != llt[i + 12])
+ abort ();
+ else
+ f[i] = uft[i + 12];
+ f2ull1 ();
+ for (i = 0; i < 4; i++)
+ if (ull[i] != ullt[i + 12])
+ abort ();
+ for (i = 0; i < 8; i++)
+ {
+ ll[i] = llt[i];
+ ull[i] = ullt[i];
+ }
+ ll2d2 ();
+ for (i = 0; i < 8; i++)
+ if (d[i] != ft[i])
+ abort ();
+ ull2d2 ();
+ for (i = 0; i < 8; i++)
+ if (d[i] != uft[i])
+ abort ();
+ else
+ {
+ d[i] = ft[i];
+ ll[i] = 1234567LL;
+ ull[i] = 7654321ULL;
+ }
+ d2ll2 ();
+ for (i = 0; i < 8; i++)
+ if (ll[i] != llt[i])
+ abort ();
+ else
+ d[i] = uft[i];
+ d2ull2 ();
+ for (i = 0; i < 8; i++)
+ if (ull[i] != ullt[i])
+ abort ();
+ else
+ {
+ ll[i] = llt[i + 8];
+ ull[i] = ullt[i + 8];
+ }
+ ll2f2 ();
+ for (i = 0; i < 8; i++)
+ if (f[i] != ft[i + 8])
+ abort ();
+ ull2f2 ();
+ for (i = 0; i < 8; i++)
+ if (f[i] != uft[i + 8])
+ abort ();
+ else
+ {
+ f[i] = ft[i + 8];
+ ll[i] = 1234567LL;
+ ull[i] = 7654321ULL;
+ }
+ f2ll2 ();
+ for (i = 0; i < 8; i++)
+ if (ll[i] != llt[i + 8])
+ abort ();
+ else
+ f[i] = uft[i + 8];
+ f2ull2 ();
+ for (i = 0; i < 8; i++)
+ if (ull[i] != ullt[i + 8])
+ abort ();
+ for (i = 0; i < 16; i++)
+ {
+ ll[i] = llt[i];
+ ull[i] = ullt[i];
+ }
+ ll2d3 ();
+ for (i = 0; i < 16; i++)
+ if (d[i] != ft[i])
+ abort ();
+ ull2d3 ();
+ for (i = 0; i < 16; i++)
+ if (d[i] != uft[i])
+ abort ();
+ else
+ {
+ d[i] = ft[i];
+ ll[i] = 1234567LL;
+ ull[i] = 7654321ULL;
+ }
+ d2ll3 ();
+ for (i = 0; i < 16; i++)
+ if (ll[i] != llt[i])
+ abort ();
+ else
+ d[i] = uft[i];
+ d2ull3 ();
+ for (i = 0; i < 16; i++)
+ if (ull[i] != ullt[i])
+ abort ();
+ else
+ {
+ ll[i] = llt[i];
+ ull[i] = ullt[i];
+ f[i] = 3.0f;
+ d[i] = 4.0;
+ }
+ ll2f3 ();
+ for (i = 0; i < 16; i++)
+ if (f[i] != ft[i])
+ abort ();
+ ull2f3 ();
+ for (i = 0; i < 16; i++)
+ if (f[i] != uft[i])
+ abort ();
+ else
+ {
+ f[i] = ft[i];
+ ll[i] = 1234567LL;
+ ull[i] = 7654321ULL;
+ }
+ f2ll3 ();
+ for (i = 0; i < 16; i++)
+ if (ll[i] != llt[i])
+ abort ();
+ else
+ f[i] = uft[i];
+ f2ull3 ();
+ for (i = 0; i < 16; i++)
+ if (ull[i] != ullt[i])
+ abort ();
+}
/* PR target/85918 */
/* { dg-do compile } */
-/* { dg-options "-O3 -mavx512dq -mavx512vl -fdump-tree-vect-details" } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
+/* { dg-options "-O3 -mavx512dq -mavx512vl -mprefer-vector-width=512 -fno-vect-cost-model -fdump-tree-vect-details" } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 24 "vect" } } */
#define N 1024
-long long ll[N];
-unsigned long long ull[N];
-double d[N];
+long long ll[N] __attribute__((aligned (64)));
+unsigned long long ull[N] __attribute__((aligned (64)));
+float f[N] __attribute__((aligned (64)));
+double d[N] __attribute__((aligned (64)));
-void ll2d (void)
+void ll2d1 (void)
{
int i;
- for (i = 0; i < N; i++)
+ for (i = 0; i < 4; i++)
d[i] = ll[i];
}
-void ull2d (void)
+void ull2d1 (void)
{
int i;
- for (i = 0; i < N; i++)
+ for (i = 0; i < 4; i++)
d[i] = ull[i];
}
-void d2ll (void)
+void d2ll1 (void)
{
int i;
- for (i = 0; i < N; i++)
+ for (i = 0; i < 4; i++)
ll[i] = d[i];
}
-void d2ull (void)
+void d2ull1 (void)
{
int i;
- for (i = 0; i < N; i++)
+ for (i = 0; i < 4; i++)
ull[i] = d[i];
}
+
+void ll2f1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ f[i] = ll[i];
+}
+
+void ull2f1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ f[i] = ull[i];
+}
+
+void f2ll1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ ll[i] = f[i];
+}
+
+void f2ull1 (void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ ull[i] = f[i];
+}
+
+void ll2d2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ d[i] = ll[i];
+}
+
+void ull2d2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ d[i] = ull[i];
+}
+
+void d2ll2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ ll[i] = d[i];
+}
+
+void d2ull2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ ull[i] = d[i];
+}
+
+void ll2f2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ f[i] = ll[i];
+}
+
+void ull2f2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ f[i] = ull[i];
+}
+
+void f2ll2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ ll[i] = f[i];
+}
+
+void f2ull2 (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ ull[i] = f[i];
+}
+
+void ll2d3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ d[i] = ll[i];
+}
+
+void ull2d3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ d[i] = ull[i];
+}
+
+void d2ll3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ ll[i] = d[i];
+}
+
+void d2ull3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ ull[i] = d[i];
+}
+
+void ll2f3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ f[i] = ll[i];
+}
+
+void ull2f3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ f[i] = ull[i];
+}
+
+void f2ll3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ ll[i] = f[i];
+}
+
+void f2ull3 (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ ull[i] = f[i];
+}
case VEC_UNPACK_LO_EXPR:
case VEC_UNPACK_FLOAT_HI_EXPR:
case VEC_UNPACK_FLOAT_LO_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_HI_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_LO_EXPR:
/* FIXME. */
return false;
return false;
}
+ case VEC_PACK_FLOAT_EXPR:
+ if (TREE_CODE (rhs1_type) != VECTOR_TYPE
+ || TREE_CODE (lhs_type) != VECTOR_TYPE
+ || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1_type))
+ || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs_type))
+ || !types_compatible_p (rhs1_type, rhs2_type)
+ || maybe_ne (GET_MODE_SIZE (element_mode (rhs1_type)),
+ 2 * GET_MODE_SIZE (element_mode (lhs_type))))
+ {
+ error ("type mismatch in vector pack expression");
+ debug_generic_expr (lhs_type);
+ debug_generic_expr (rhs1_type);
+ debug_generic_expr (rhs2_type);
+ return true;
+ }
+
+ return false;
+
case MULT_EXPR:
case MULT_HIGHPART_EXPR:
case TRUNC_DIV_EXPR:
case VEC_UNPACK_LO_EXPR:
case VEC_UNPACK_FLOAT_HI_EXPR:
case VEC_UNPACK_FLOAT_LO_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_HI_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_LO_EXPR:
case VEC_PACK_TRUNC_EXPR:
case VEC_PACK_SAT_EXPR:
case VEC_PACK_FIX_TRUNC_EXPR:
+ case VEC_PACK_FLOAT_EXPR:
case VEC_WIDEN_LSHIFT_HI_EXPR:
case VEC_WIDEN_LSHIFT_LO_EXPR:
case VEC_DUPLICATE_EXPR:
pp_string (pp, " > ");
break;
+ case VEC_UNPACK_FIX_TRUNC_HI_EXPR:
+ pp_string (pp, " VEC_UNPACK_FIX_TRUNC_HI_EXPR < ");
+ dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false);
+ pp_string (pp, " > ");
+ break;
+
+ case VEC_UNPACK_FIX_TRUNC_LO_EXPR:
+ pp_string (pp, " VEC_UNPACK_FIX_TRUNC_LO_EXPR < ");
+ dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false);
+ pp_string (pp, " > ");
+ break;
+
case VEC_PACK_TRUNC_EXPR:
pp_string (pp, " VEC_PACK_TRUNC_EXPR < ");
dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false);
pp_string (pp, " > ");
break;
+ case VEC_PACK_FLOAT_EXPR:
+ pp_string (pp, " VEC_PACK_FLOAT_EXPR < ");
+ dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false);
+ pp_string (pp, ", ");
+ dump_generic_node (pp, TREE_OPERAND (node, 1), spc, flags, false);
+ pp_string (pp, " > ");
+ break;
+
case BLOCK:
dump_block_node (pp, node, spc, flags);
break;
case VEC_UNPACK_LO_EXPR:
case VEC_UNPACK_FLOAT_HI_EXPR:
case VEC_UNPACK_FLOAT_LO_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_HI_EXPR:
+ case VEC_UNPACK_FIX_TRUNC_LO_EXPR:
case VEC_PACK_TRUNC_EXPR:
case VEC_PACK_SAT_EXPR:
return 16;
/* The signedness is determined from input argument. */
if (code == VEC_UNPACK_FLOAT_HI_EXPR
- || code == VEC_UNPACK_FLOAT_LO_EXPR)
+ || code == VEC_UNPACK_FLOAT_LO_EXPR
+ || code == VEC_PACK_FLOAT_EXPR)
{
type = TREE_TYPE (rhs1);
/* We do not know how to scalarize those. */
|| code == VEC_WIDEN_MULT_ODD_EXPR
|| code == VEC_UNPACK_HI_EXPR
|| code == VEC_UNPACK_LO_EXPR
+ || code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
+ || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR
|| code == VEC_PACK_TRUNC_EXPR
|| code == VEC_PACK_SAT_EXPR
|| code == VEC_PACK_FIX_TRUNC_EXPR
vector form (i.e., when operating on arguments of type VECTYPE_IN
producing a result of type VECTYPE_OUT).
- Widening operations we currently support are NOP (CONVERT), FLOAT
- and WIDEN_MULT. This function checks if these operations are supported
- by the target platform either directly (via vector tree-codes), or via
- target builtins.
+ Widening operations we currently support are NOP (CONVERT), FLOAT,
+ FIX_TRUNC and WIDEN_MULT. This function checks if these operations
+ are supported by the target platform either directly (via vector
+ tree-codes), or via target builtins.
Output:
- CODE1 and CODE2 are codes of vector operations to be used when
break;
case FIX_TRUNC_EXPR:
- /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
- VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
- computing the operation. */
- return false;
+ c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
+ c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
+ break;
default:
gcc_unreachable ();
vector form (i.e., when operating on arguments of type VECTYPE_IN
and producing a result of type VECTYPE_OUT).
- Narrowing operations we currently support are NOP (CONVERT) and
- FIX_TRUNC. This function checks if these operations are supported by
+ Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
+ and FLOAT. This function checks if these operations are supported by
the target platform directly via vector tree-codes.
Output:
break;
case FLOAT_EXPR:
- /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
- tree code and optabs used for computing the operation. */
- return false;
+ c1 = VEC_PACK_FLOAT_EXPR;
+ break;
default:
gcc_unreachable ();
|| known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
TYPE_VECTOR_SUBPARTS (narrow_vectype)));
+ if (code == FLOAT_EXPR)
+ return false;
+
/* Check if it's a multi-step conversion that can be done using intermediate
types. */
prev_mode = vec_mode;
DEFTREECODE (VEC_UNPACK_FLOAT_HI_EXPR, "vec_unpack_float_hi_expr", tcc_unary, 1)
DEFTREECODE (VEC_UNPACK_FLOAT_LO_EXPR, "vec_unpack_float_lo_expr", tcc_unary, 1)
+/* Unpack (extract) the high/low elements of the input vector, convert
+ floating point values to integer and widen elements into the output
+ vector. The input vector has twice as many elements as the output
+ vector, that are half the size of the elements of the output vector. */
+DEFTREECODE (VEC_UNPACK_FIX_TRUNC_HI_EXPR, "vec_unpack_fix_trunc_hi_expr",
+ tcc_unary, 1)
+DEFTREECODE (VEC_UNPACK_FIX_TRUNC_LO_EXPR, "vec_unpack_fix_trunc_lo_expr",
+ tcc_unary, 1)
+
/* Pack (demote/narrow and merge) the elements of the two input vectors
into the output vector using truncation/saturation.
The elements of the input vectors are twice the size of the elements of the
the output vector. */
DEFTREECODE (VEC_PACK_FIX_TRUNC_EXPR, "vec_pack_fix_trunc_expr", tcc_binary, 2)
+/* Convert fixed point values of the two input vectors to floating point
+ and pack (narrow and merge) the elements into the output vector. The
+ elements of the input vector are twice the size of the elements of
+ the output vector. */
+DEFTREECODE (VEC_PACK_FLOAT_EXPR, "vec_pack_float_expr", tcc_binary, 2)
+
/* Widening vector shift left in bits.
Operand 0 is a vector to be shifted with N elements of size S.
Operand 1 is an integer shift amount in bits.