+2015-11-10 Ilya Enkovich <enkovich.gnu@gmail.com>
+
+ * expr.c (do_store_flag): Use expand_vec_cmp_expr for mask results.
+ (const_vector_mask_from_tree): New.
+ (const_vector_from_tree): Use const_vector_mask_from_tree
+ for boolean vectors.
+ * optabs-query.h (get_vec_cmp_icode): New.
+ * optabs-tree.c (expand_vec_cmp_expr_p): New.
+ * optabs-tree.h (expand_vec_cmp_expr_p): New.
+ * optabs.c (vector_compare_rtx): Add OPNO arg.
+ (expand_vec_cond_expr): Adjust to vector_compare_rtx change.
+ (expand_vec_cmp_expr): New.
+ * optabs.def (vec_cmp_optab): New.
+ (vec_cmpu_optab): New.
+ * optabs.h (expand_vec_cmp_expr): New.
+ * tree-vect-generic.c (expand_vector_comparison): Add vector
+ comparison optabs check.
+ * tree-vect-loop.c (vect_determine_vectorization_factor): Ignore mask
+ operations for VF. Add mask type computation.
+ * tree-vect-stmts.c (get_mask_type_for_scalar_type): New.
+ (vectorizable_comparison): New.
+ (vect_analyze_stmt): Add vectorizable_comparison.
+ (vect_transform_stmt): Likewise.
+ (vect_init_vector): Support boolean vector invariants.
+ (vect_get_vec_def_for_operand): Add VECTYPE arg.
+ (vectorizable_condition): Directly provide vectype for invariants
+ used in comparison.
+ * tree-vectorizer.h (get_mask_type_for_scalar_type): New.
+ (enum vect_var_kind): Add vect_mask_var.
+ (enum stmt_vec_info_type): Add comparison_vec_info_type.
+ (vectorizable_comparison): New.
+ (vect_get_vec_def_for_operand): Add VECTYPE arg.
+ * tree-vect-data-refs.c (vect_get_new_vect_var): Support vect_mask_var.
+ (vect_create_destination_var): Likewise.
+ * tree-vect-patterns.c (check_bool_pattern): Check fails
+ if we can vectorize comparison directly.
+ (search_type_for_mask): New.
+ (vect_recog_bool_pattern): Support cases when bool pattern
+ check fails.
+ * tree-vect-slp.c (vect_build_slp_tree_1): Allow
+ comparison statements.
+ (vect_get_constant_vectors): Support boolean vector
+ constants.
+ * config/i386/i386-protos.h (ix86_expand_mask_vec_cmp): New.
+ (ix86_expand_int_vec_cmp): New.
+ (ix86_expand_fp_vec_cmp): New.
+ * config/i386/i386.c (ix86_expand_sse_cmp): Allow NULL for
+ op_true and op_false.
+ (ix86_int_cmp_code_to_pcmp_immediate): New.
+ (ix86_fp_cmp_code_to_pcmp_immediate): New.
+ (ix86_cmp_code_to_pcmp_immediate): New.
+ (ix86_expand_mask_vec_cmp): New.
+ (ix86_expand_fp_vec_cmp): New.
+ (ix86_expand_int_sse_cmp): New.
+ (ix86_expand_int_vcond): Use ix86_expand_int_sse_cmp.
+ (ix86_expand_int_vec_cmp): New.
+ (ix86_get_mask_mode): New.
+ (TARGET_VECTORIZE_GET_MASK_MODE): New.
+ * config/i386/sse.md (avx512fmaskmodelower): New.
+ (vec_cmp<mode><avx512fmaskmodelower>): New.
+ (vec_cmp<mode><sseintvecmodelower>): New.
+ (vec_cmpv2div2di): New.
+ (vec_cmpu<mode><avx512fmaskmodelower>): New.
+ (vec_cmpu<mode><sseintvecmodelower>): New.
+ (vec_cmpuv2div2di): New.
+
2015-11-10 Richard Biener <rguenther@suse.de>
PR tree-optimization/68240
extern bool ix86_expand_int_vcond (rtx[]);
extern void ix86_expand_vec_perm (rtx[]);
extern bool ix86_expand_vec_perm_const (rtx[]);
+extern bool ix86_expand_mask_vec_cmp (rtx[]);
+extern bool ix86_expand_int_vec_cmp (rtx[]);
+extern bool ix86_expand_fp_vec_cmp (rtx[]);
extern void ix86_expand_sse_unpack (rtx, rtx, bool, bool);
extern bool ix86_expand_int_addcc (rtx[]);
extern rtx ix86_expand_call (rtx, rtx, rtx, rtx, rtx, bool);
cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
if (optimize
- || reg_overlap_mentioned_p (dest, op_true)
- || reg_overlap_mentioned_p (dest, op_false))
+ || (op_true && reg_overlap_mentioned_p (dest, op_true))
+ || (op_false && reg_overlap_mentioned_p (dest, op_false)))
dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
/* Compare patterns for int modes are unspec in AVX512F only. */
rtx t2, t3, x;
+ /* If we have an integer mask and FP value then we need
+ to cast mask to FP mode. */
+ if (mode != cmpmode && VECTOR_MODE_P (cmpmode))
+ {
+ cmp = force_reg (cmpmode, cmp);
+ cmp = gen_rtx_SUBREG (mode, cmp, 0);
+ }
+
if (vector_all_ones_operand (op_true, mode)
&& rtx_equal_p (op_false, CONST0_RTX (mode))
&& !maskcmp)
return true;
}
-/* Expand a floating-point vector conditional move; a vcond operation
- rather than a movcc operation. */
+/* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */
+
+static int
+ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code)
+{
+ switch (code)
+ {
+ case EQ:
+ return 0;
+ case LT:
+ case LTU:
+ return 1;
+ case LE:
+ case LEU:
+ return 2;
+ case NE:
+ return 4;
+ case GE:
+ case GEU:
+ return 5;
+ case GT:
+ case GTU:
+ return 6;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */
+
+static int
+ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code)
+{
+ switch (code)
+ {
+ case EQ:
+ return 0x08;
+ case NE:
+ return 0x04;
+ case GT:
+ return 0x16;
+ case LE:
+ return 0x1a;
+ case GE:
+ return 0x15;
+ case LT:
+ return 0x19;
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* Return immediate value to be used in UNSPEC_PCMP
+ for comparison CODE in MODE. */
+
+static int
+ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode)
+{
+ if (FLOAT_MODE_P (mode))
+ return ix86_fp_cmp_code_to_pcmp_immediate (code);
+ return ix86_int_cmp_code_to_pcmp_immediate (code);
+}
+
+/* Expand AVX-512 vector comparison. */
bool
-ix86_expand_fp_vcond (rtx operands[])
+ix86_expand_mask_vec_cmp (rtx operands[])
{
- enum rtx_code code = GET_CODE (operands[3]);
+ machine_mode mask_mode = GET_MODE (operands[0]);
+ machine_mode cmp_mode = GET_MODE (operands[2]);
+ enum rtx_code code = GET_CODE (operands[1]);
+ rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode));
+ int unspec_code;
+ rtx unspec;
+
+ switch (code)
+ {
+ case LEU:
+ case GTU:
+ case GEU:
+ case LTU:
+ unspec_code = UNSPEC_UNSIGNED_PCMP;
+ default:
+ unspec_code = UNSPEC_PCMP;
+ }
+
+ unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2],
+ operands[3], imm),
+ unspec_code);
+ emit_insn (gen_rtx_SET (operands[0], unspec));
+
+ return true;
+}
+
+/* Expand fp vector comparison. */
+
+bool
+ix86_expand_fp_vec_cmp (rtx operands[])
+{
+ enum rtx_code code = GET_CODE (operands[1]);
rtx cmp;
code = ix86_prepare_sse_fp_compare_args (operands[0], code,
- &operands[4], &operands[5]);
+ &operands[2], &operands[3]);
if (code == UNKNOWN)
{
rtx temp;
- switch (GET_CODE (operands[3]))
+ switch (GET_CODE (operands[1]))
{
case LTGT:
- temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
- operands[5], operands[0], operands[0]);
- cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
- operands[5], operands[1], operands[2]);
+ temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2],
+ operands[3], NULL, NULL);
+ cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2],
+ operands[3], NULL, NULL);
code = AND;
break;
case UNEQ:
- temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
- operands[5], operands[0], operands[0]);
- cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
- operands[5], operands[1], operands[2]);
+ temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2],
+ operands[3], NULL, NULL);
+ cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2],
+ operands[3], NULL, NULL);
code = IOR;
break;
default:
}
cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
OPTAB_DIRECT);
- ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
- return true;
}
+ else
+ cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3],
+ operands[1], operands[2]);
- if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
- operands[5], operands[1], operands[2]))
- return true;
+ if (operands[0] != cmp)
+ emit_move_insn (operands[0], cmp);
- cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
- operands[1], operands[2]);
- ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
return true;
}
-/* Expand a signed/unsigned integral vector conditional move. */
-
-bool
-ix86_expand_int_vcond (rtx operands[])
+static rtx
+ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
+ rtx op_true, rtx op_false, bool *negate)
{
- machine_mode data_mode = GET_MODE (operands[0]);
- machine_mode mode = GET_MODE (operands[4]);
- enum rtx_code code = GET_CODE (operands[3]);
- bool negate = false;
- rtx x, cop0, cop1;
+ machine_mode data_mode = GET_MODE (dest);
+ machine_mode mode = GET_MODE (cop0);
+ rtx x;
- cop0 = operands[4];
- cop1 = operands[5];
-
- /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
- and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
- if ((code == LT || code == GE)
- && data_mode == mode
- && cop1 == CONST0_RTX (mode)
- && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
- && GET_MODE_UNIT_SIZE (data_mode) > 1
- && GET_MODE_UNIT_SIZE (data_mode) <= 8
- && (GET_MODE_SIZE (data_mode) == 16
- || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
- {
- rtx negop = operands[2 - (code == LT)];
- int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
- if (negop == CONST1_RTX (data_mode))
- {
- rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
- operands[0], 1, OPTAB_DIRECT);
- if (res != operands[0])
- emit_move_insn (operands[0], res);
- return true;
- }
- else if (GET_MODE_INNER (data_mode) != DImode
- && vector_all_ones_operand (negop, data_mode))
- {
- rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
- operands[0], 0, OPTAB_DIRECT);
- if (res != operands[0])
- emit_move_insn (operands[0], res);
- return true;
- }
- }
-
- if (!nonimmediate_operand (cop1, mode))
- cop1 = force_reg (mode, cop1);
- if (!general_operand (operands[1], data_mode))
- operands[1] = force_reg (data_mode, operands[1]);
- if (!general_operand (operands[2], data_mode))
- operands[2] = force_reg (data_mode, operands[2]);
+ *negate = false;
/* XOP supports all of the comparisons on all 128-bit vector int types. */
if (TARGET_XOP
case LE:
case LEU:
code = reverse_condition (code);
- negate = true;
+ *negate = true;
break;
case GE:
case GEU:
code = reverse_condition (code);
- negate = true;
+ *negate = true;
/* FALLTHRU */
case LT:
case EQ:
/* SSE4.1 supports EQ. */
if (!TARGET_SSE4_1)
- return false;
+ return NULL;
break;
case GT:
case GTU:
/* SSE4.2 supports GT/GTU. */
if (!TARGET_SSE4_2)
- return false;
+ return NULL;
break;
default:
case V8HImode:
/* Perform a parallel unsigned saturating subtraction. */
x = gen_reg_rtx (mode);
- emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, cop1)));
+ emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0,
+ cop1)));
cop0 = x;
cop1 = CONST0_RTX (mode);
code = EQ;
- negate = !negate;
+ *negate = !*negate;
break;
default:
}
}
+ if (*negate)
+ std::swap (op_true, op_false);
+
/* Allow the comparison to be done in one mode, but the movcc to
happen in another mode. */
if (data_mode == mode)
{
- x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
- operands[1+negate], operands[2-negate]);
+ x = ix86_expand_sse_cmp (dest, code, cop0, cop1,
+ op_true, op_false);
}
else
{
gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
- operands[1+negate], operands[2-negate]);
+ op_true, op_false);
if (GET_MODE (x) == mode)
x = gen_lowpart (data_mode, x);
}
+ return x;
+}
+
+/* Expand integer vector comparison. */
+
+bool
+ix86_expand_int_vec_cmp (rtx operands[])
+{
+ rtx_code code = GET_CODE (operands[1]);
+ bool negate = false;
+ rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2],
+ operands[3], NULL, NULL, &negate);
+
+ if (!cmp)
+ return false;
+
+ if (negate)
+ cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
+ CONST0_RTX (GET_MODE (cmp)),
+ NULL, NULL, &negate);
+
+ gcc_assert (!negate);
+
+ if (operands[0] != cmp)
+ emit_move_insn (operands[0], cmp);
+
+ return true;
+}
+
+/* Expand a floating-point vector conditional move; a vcond operation
+ rather than a movcc operation. */
+
+bool
+ix86_expand_fp_vcond (rtx operands[])
+{
+ enum rtx_code code = GET_CODE (operands[3]);
+ rtx cmp;
+
+ code = ix86_prepare_sse_fp_compare_args (operands[0], code,
+ &operands[4], &operands[5]);
+ if (code == UNKNOWN)
+ {
+ rtx temp;
+ switch (GET_CODE (operands[3]))
+ {
+ case LTGT:
+ temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
+ operands[5], operands[0], operands[0]);
+ cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
+ operands[5], operands[1], operands[2]);
+ code = AND;
+ break;
+ case UNEQ:
+ temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
+ operands[5], operands[0], operands[0]);
+ cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
+ operands[5], operands[1], operands[2]);
+ code = IOR;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
+ OPTAB_DIRECT);
+ ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
+ return true;
+ }
+
+ if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
+ operands[5], operands[1], operands[2]))
+ return true;
+
+ cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
+ operands[1], operands[2]);
+ ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
+ return true;
+}
+
+/* Expand a signed/unsigned integral vector conditional move. */
+
+bool
+ix86_expand_int_vcond (rtx operands[])
+{
+ machine_mode data_mode = GET_MODE (operands[0]);
+ machine_mode mode = GET_MODE (operands[4]);
+ enum rtx_code code = GET_CODE (operands[3]);
+ bool negate = false;
+ rtx x, cop0, cop1;
+
+ cop0 = operands[4];
+ cop1 = operands[5];
+
+ /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
+ and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
+ if ((code == LT || code == GE)
+ && data_mode == mode
+ && cop1 == CONST0_RTX (mode)
+ && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
+ && GET_MODE_UNIT_SIZE (data_mode) > 1
+ && GET_MODE_UNIT_SIZE (data_mode) <= 8
+ && (GET_MODE_SIZE (data_mode) == 16
+ || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
+ {
+ rtx negop = operands[2 - (code == LT)];
+ int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
+ if (negop == CONST1_RTX (data_mode))
+ {
+ rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
+ operands[0], 1, OPTAB_DIRECT);
+ if (res != operands[0])
+ emit_move_insn (operands[0], res);
+ return true;
+ }
+ else if (GET_MODE_INNER (data_mode) != DImode
+ && vector_all_ones_operand (negop, data_mode))
+ {
+ rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
+ operands[0], 0, OPTAB_DIRECT);
+ if (res != operands[0])
+ emit_move_insn (operands[0], res);
+ return true;
+ }
+ }
+
+ if (!nonimmediate_operand (cop1, mode))
+ cop1 = force_reg (mode, cop1);
+ if (!general_operand (operands[1], data_mode))
+ operands[1] = force_reg (data_mode, operands[1]);
+ if (!general_operand (operands[2], data_mode))
+ operands[2] = force_reg (data_mode, operands[2]);
+
+ x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1,
+ operands[1], operands[2], &negate);
+
+ if (!x)
+ return false;
+
ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
operands[2-negate]);
return true;
(TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
}
+/* Implemenation of targetm.vectorize.get_mask_mode. */
+
+static machine_mode
+ix86_get_mask_mode (unsigned nunits, unsigned vector_size)
+{
+ unsigned elem_size = vector_size / nunits;
+
+ /* Scalar mask case. */
+ if (TARGET_AVX512F && vector_size == 64)
+ {
+ if (elem_size == 4 || elem_size == 8 || TARGET_AVX512BW)
+ return smallest_mode_for_size (nunits, MODE_INT);
+ }
+
+ machine_mode elem_mode
+ = smallest_mode_for_size (elem_size * BITS_PER_UNIT, MODE_INT);
+
+ gcc_assert (elem_size * nunits == vector_size);
+
+ return mode_for_vector (elem_mode, nunits);
+}
+
\f
/* Return class of registers which could be used for pseudo of MODE
#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
ix86_autovectorize_vector_sizes
+#undef TARGET_VECTORIZE_GET_MASK_MODE
+#define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
#undef TARGET_VECTORIZE_INIT_COST
#define TARGET_VECTORIZE_INIT_COST ix86_init_cost
#undef TARGET_VECTORIZE_ADD_STMT_COST
(V16SF "HI") (V8SF "QI") (V4SF "QI")
(V8DF "QI") (V4DF "QI") (V2DF "QI")])
+;; Mapping of vector modes to corresponding mask size
+(define_mode_attr avx512fmaskmodelower
+ [(V64QI "di") (V32QI "si") (V16QI "hi")
+ (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
+ (V16SI "hi") (V8SI "qi") (V4SI "qi")
+ (V8DI "qi") (V4DI "qi") (V2DI "qi")
+ (V16SF "hi") (V8SF "qi") (V4SF "qi")
+ (V8DF "qi") (V4DF "qi") (V2DF "qi")])
+
;; Mapping of vector float modes to an integer mode of the same size
(define_mode_attr sseintvecmode
[(V16SF "V16SI") (V8DF "V8DI")
(const_string "0")))
(set_attr "mode" "<MODE>")])
+(define_expand "vec_cmp<mode><avx512fmaskmodelower>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
+ (match_operator:<avx512fmaskmode> 1 ""
+ [(match_operand:V48_AVX512VL 2 "register_operand")
+ (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
+ "TARGET_AVX512F"
+{
+ bool ok = ix86_expand_mask_vec_cmp (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_expand "vec_cmp<mode><avx512fmaskmodelower>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
+ (match_operator:<avx512fmaskmode> 1 ""
+ [(match_operand:VI12_AVX512VL 2 "register_operand")
+ (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
+ "TARGET_AVX512BW"
+{
+ bool ok = ix86_expand_mask_vec_cmp (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_expand "vec_cmp<mode><sseintvecmodelower>"
+ [(set (match_operand:<sseintvecmode> 0 "register_operand")
+ (match_operator:<sseintvecmode> 1 ""
+ [(match_operand:VI_256 2 "register_operand")
+ (match_operand:VI_256 3 "nonimmediate_operand")]))]
+ "TARGET_AVX2"
+{
+ bool ok = ix86_expand_int_vec_cmp (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_expand "vec_cmp<mode><sseintvecmodelower>"
+ [(set (match_operand:<sseintvecmode> 0 "register_operand")
+ (match_operator:<sseintvecmode> 1 ""
+ [(match_operand:VI124_128 2 "register_operand")
+ (match_operand:VI124_128 3 "nonimmediate_operand")]))]
+ "TARGET_SSE2"
+{
+ bool ok = ix86_expand_int_vec_cmp (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_expand "vec_cmpv2div2di"
+ [(set (match_operand:V2DI 0 "register_operand")
+ (match_operator:V2DI 1 ""
+ [(match_operand:V2DI 2 "register_operand")
+ (match_operand:V2DI 3 "nonimmediate_operand")]))]
+ "TARGET_SSE4_2"
+{
+ bool ok = ix86_expand_int_vec_cmp (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_expand "vec_cmp<mode><sseintvecmodelower>"
+ [(set (match_operand:<sseintvecmode> 0 "register_operand")
+ (match_operator:<sseintvecmode> 1 ""
+ [(match_operand:VF_256 2 "register_operand")
+ (match_operand:VF_256 3 "nonimmediate_operand")]))]
+ "TARGET_AVX"
+{
+ bool ok = ix86_expand_fp_vec_cmp (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_expand "vec_cmp<mode><sseintvecmodelower>"
+ [(set (match_operand:<sseintvecmode> 0 "register_operand")
+ (match_operator:<sseintvecmode> 1 ""
+ [(match_operand:VF_128 2 "register_operand")
+ (match_operand:VF_128 3 "nonimmediate_operand")]))]
+ "TARGET_SSE"
+{
+ bool ok = ix86_expand_fp_vec_cmp (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
+ (match_operator:<avx512fmaskmode> 1 ""
+ [(match_operand:VI48_AVX512VL 2 "register_operand")
+ (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
+ "TARGET_AVX512F"
+{
+ bool ok = ix86_expand_mask_vec_cmp (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
+ (match_operator:<avx512fmaskmode> 1 ""
+ [(match_operand:VI12_AVX512VL 2 "register_operand")
+ (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
+ "TARGET_AVX512BW"
+{
+ bool ok = ix86_expand_mask_vec_cmp (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_expand "vec_cmpu<mode><sseintvecmodelower>"
+ [(set (match_operand:<sseintvecmode> 0 "register_operand")
+ (match_operator:<sseintvecmode> 1 ""
+ [(match_operand:VI_256 2 "register_operand")
+ (match_operand:VI_256 3 "nonimmediate_operand")]))]
+ "TARGET_AVX2"
+{
+ bool ok = ix86_expand_int_vec_cmp (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_expand "vec_cmpu<mode><sseintvecmodelower>"
+ [(set (match_operand:<sseintvecmode> 0 "register_operand")
+ (match_operator:<sseintvecmode> 1 ""
+ [(match_operand:VI124_128 2 "register_operand")
+ (match_operand:VI124_128 3 "nonimmediate_operand")]))]
+ "TARGET_SSE2"
+{
+ bool ok = ix86_expand_int_vec_cmp (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_expand "vec_cmpuv2div2di"
+ [(set (match_operand:V2DI 0 "register_operand")
+ (match_operator:V2DI 1 ""
+ [(match_operand:V2DI 2 "register_operand")
+ (match_operand:V2DI 3 "nonimmediate_operand")]))]
+ "TARGET_SSE4_2"
+{
+ bool ok = ix86_expand_int_vec_cmp (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
(define_expand "vcond<V_512:mode><VF_512:mode>"
[(set (match_operand:V_512 0 "register_operand")
(if_then_else:V_512
if (TREE_CODE (ops->type) == VECTOR_TYPE)
{
tree ifexp = build2 (ops->code, ops->type, arg0, arg1);
- tree if_true = constant_boolean_node (true, ops->type);
- tree if_false = constant_boolean_node (false, ops->type);
- return expand_vec_cond_expr (ops->type, ifexp, if_true, if_false, target);
+ if (VECTOR_BOOLEAN_TYPE_P (ops->type))
+ return expand_vec_cmp_expr (ops->type, ifexp, target);
+ else
+ {
+ tree if_true = constant_boolean_node (true, ops->type);
+ tree if_false = constant_boolean_node (false, ops->type);
+ return expand_vec_cond_expr (ops->type, ifexp, if_true,
+ if_false, target);
+ }
}
/* Get the rtx comparison code to use. We know that EXP is a comparison
return 1;
}
+/* Return a CONST_VECTOR rtx representing vector mask for
+ a VECTOR_CST of booleans. */
+static rtx
+const_vector_mask_from_tree (tree exp)
+{
+ rtvec v;
+ unsigned i;
+ int units;
+ tree elt;
+ machine_mode inner, mode;
+
+ mode = TYPE_MODE (TREE_TYPE (exp));
+ units = GET_MODE_NUNITS (mode);
+ inner = GET_MODE_INNER (mode);
+
+ v = rtvec_alloc (units);
+
+ for (i = 0; i < VECTOR_CST_NELTS (exp); ++i)
+ {
+ elt = VECTOR_CST_ELT (exp, i);
+
+ gcc_assert (TREE_CODE (elt) == INTEGER_CST);
+ if (integer_zerop (elt))
+ RTVEC_ELT (v, i) = CONST0_RTX (inner);
+ else if (integer_onep (elt)
+ || integer_minus_onep (elt))
+ RTVEC_ELT (v, i) = CONSTM1_RTX (inner);
+ else
+ gcc_unreachable ();
+ }
+
+ return gen_rtx_CONST_VECTOR (mode, v);
+}
+
/* Return a CONST_VECTOR rtx for a VECTOR_CST tree. */
static rtx
const_vector_from_tree (tree exp)
if (initializer_zerop (exp))
return CONST0_RTX (mode);
+ if (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (exp)))
+ return const_vector_mask_from_tree (exp);
+
units = GET_MODE_NUNITS (mode);
inner = GET_MODE_INNER (mode);
|| binoptab == smulv_optab);
}
+/* Return insn code for a comparison operator with VMODE
+ resultin MASK_MODE, unsigned if UNS is true. */
+
+static inline enum insn_code
+get_vec_cmp_icode (machine_mode vmode, machine_mode mask_mode, bool uns)
+{
+ optab tab = uns ? vec_cmpu_optab : vec_cmp_optab;
+ return convert_optab_handler (tab, vmode, mask_mode);
+}
+
/* Return insn code for a conditional operator with a comparison in
mode CMODE, unsigned if UNS is true, resulting in a value of mode VMODE. */
return false;
}
+/* Return TRUE if appropriate vector insn is available
+ for vector comparison expr with vector type VALUE_TYPE
+ and resulting mask with MASK_TYPE. */
+
+bool
+expand_vec_cmp_expr_p (tree value_type, tree mask_type)
+{
+ enum insn_code icode = get_vec_cmp_icode (TYPE_MODE (value_type),
+ TYPE_MODE (mask_type),
+ TYPE_UNSIGNED (value_type));
+ return (icode != CODE_FOR_nothing);
+}
+
/* Return TRUE iff, appropriate vector insns are available
for vector cond expr with vector type VALUE_TYPE and a comparison
with operand vector types in CMP_OP_TYPE. */
optab scalar_reduc_to_vector (optab, const_tree);
bool supportable_convert_operation (enum tree_code, tree, tree, tree *,
enum tree_code *);
+bool expand_vec_cmp_expr_p (tree, tree);
bool expand_vec_cond_expr_p (tree, tree);
void init_tree_optimization_optabs (tree);
}
/* Return comparison rtx for COND. Use UNSIGNEDP to select signed or
- unsigned operators. Do not generate compare instruction. */
+ unsigned operators. OPNO holds an index of the first comparison
+ operand in insn with code ICODE. Do not generate compare instruction. */
static rtx
vector_compare_rtx (enum tree_code tcode, tree t_op0, tree t_op1,
- bool unsignedp, enum insn_code icode)
+ bool unsignedp, enum insn_code icode,
+ unsigned int opno)
{
struct expand_operand ops[2];
rtx rtx_op0, rtx_op1;
create_input_operand (&ops[0], rtx_op0, m0);
create_input_operand (&ops[1], rtx_op1, m1);
- if (!maybe_legitimize_operands (icode, 4, 2, ops))
+ if (!maybe_legitimize_operands (icode, opno, 2, ops))
gcc_unreachable ();
return gen_rtx_fmt_ee (rcode, VOIDmode, ops[0].value, ops[1].value);
}
if (icode == CODE_FOR_nothing)
return 0;
- comparison = vector_compare_rtx (tcode, op0a, op0b, unsignedp, icode);
+ comparison = vector_compare_rtx (tcode, op0a, op0b, unsignedp, icode, 4);
rtx_op1 = expand_normal (op1);
rtx_op2 = expand_normal (op2);
return ops[0].value;
}
+/* Generate insns for a vector comparison into a mask. */
+
+rtx
+expand_vec_cmp_expr (tree type, tree exp, rtx target)
+{
+ struct expand_operand ops[4];
+ enum insn_code icode;
+ rtx comparison;
+ machine_mode mask_mode = TYPE_MODE (type);
+ machine_mode vmode;
+ bool unsignedp;
+ tree op0a, op0b;
+ enum tree_code tcode;
+
+ op0a = TREE_OPERAND (exp, 0);
+ op0b = TREE_OPERAND (exp, 1);
+ tcode = TREE_CODE (exp);
+
+ unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
+ vmode = TYPE_MODE (TREE_TYPE (op0a));
+
+ icode = get_vec_cmp_icode (vmode, mask_mode, unsignedp);
+ if (icode == CODE_FOR_nothing)
+ return 0;
+
+ comparison = vector_compare_rtx (tcode, op0a, op0b, unsignedp, icode, 2);
+ create_output_operand (&ops[0], target, mask_mode);
+ create_fixed_operand (&ops[1], comparison);
+ create_fixed_operand (&ops[2], XEXP (comparison, 0));
+ create_fixed_operand (&ops[3], XEXP (comparison, 1));
+ expand_insn (icode, 4, ops);
+ return ops[0].value;
+}
+
/* Expand a highpart multiply. */
rtx
OPTAB_CD(vec_store_lanes_optab, "vec_store_lanes$a$b")
OPTAB_CD(vcond_optab, "vcond$a$b")
OPTAB_CD(vcondu_optab, "vcondu$a$b")
+OPTAB_CD(vec_cmp_optab, "vec_cmp$a$b")
+OPTAB_CD(vec_cmpu_optab, "vec_cmpu$a$b")
OPTAB_NL(add_optab, "add$P$a3", PLUS, "add", '3', gen_int_fp_fixed_libfunc)
OPTAB_NX(add_optab, "add$F$a3")
/* Generate code for VEC_PERM_EXPR. */
extern rtx expand_vec_perm (machine_mode, rtx, rtx, rtx, rtx);
+/* Generate code for vector comparison. */
+extern rtx expand_vec_cmp_expr (tree, tree, rtx);
+
/* Generate code for VEC_COND_EXPR. */
extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx);
+2015-11-10 Ilya Enkovich <enkovich.gnu@gmail.com>
+
+ * gcc.dg/vect/slp-cond-5.c: New test.
+
2015-11-10 Richard Biener <rguenther@suse.de>
PR tree-optimization/68240
--- /dev/null
+/* { dg-require-effective-target vect_condition } */
+
+#include "tree-vect.h"
+
+#define N 128
+
+static inline int
+foo (int x, int y, int a, int b)
+{
+ if (x >= y && a > b)
+ return a;
+ else
+ return b;
+}
+
+__attribute__((noinline, noclone)) void
+bar (int * __restrict__ a, int * __restrict__ b,
+ int * __restrict__ c, int * __restrict__ d,
+ int * __restrict__ e, int w)
+{
+ int i;
+ for (i = 0; i < N/16; i++, a += 16, b += 16, c += 16, d += 16, e += 16)
+ {
+ e[0] = foo (c[0], d[0], a[0] * w, b[0] * w);
+ e[1] = foo (c[1], d[1], a[1] * w, b[1] * w);
+ e[2] = foo (c[2], d[2], a[2] * w, b[2] * w);
+ e[3] = foo (c[3], d[3], a[3] * w, b[3] * w);
+ e[4] = foo (c[4], d[4], a[4] * w, b[4] * w);
+ e[5] = foo (c[5], d[5], a[5] * w, b[5] * w);
+ e[6] = foo (c[6], d[6], a[6] * w, b[6] * w);
+ e[7] = foo (c[7], d[7], a[7] * w, b[7] * w);
+ e[8] = foo (c[8], d[8], a[8] * w, b[8] * w);
+ e[9] = foo (c[9], d[9], a[9] * w, b[9] * w);
+ e[10] = foo (c[10], d[10], a[10] * w, b[10] * w);
+ e[11] = foo (c[11], d[11], a[11] * w, b[11] * w);
+ e[12] = foo (c[12], d[12], a[12] * w, b[12] * w);
+ e[13] = foo (c[13], d[13], a[13] * w, b[13] * w);
+ e[14] = foo (c[14], d[14], a[14] * w, b[14] * w);
+ e[15] = foo (c[15], d[15], a[15] * w, b[15] * w);
+ }
+}
+
+
+int a[N], b[N], c[N], d[N], e[N];
+
+int main ()
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = i;
+ b[i] = 5;
+ e[i] = 0;
+
+ switch (i % 9)
+ {
+ case 0: asm (""); c[i] = i; d[i] = i + 1; break;
+ case 1: c[i] = 0; d[i] = 0; break;
+ case 2: c[i] = i + 1; d[i] = i - 1; break;
+ case 3: c[i] = i; d[i] = i + 7; break;
+ case 4: c[i] = i; d[i] = i; break;
+ case 5: c[i] = i + 16; d[i] = i + 3; break;
+ case 6: c[i] = i - 5; d[i] = i; break;
+ case 7: c[i] = i; d[i] = i; break;
+ case 8: c[i] = i; d[i] = i - 7; break;
+ }
+ }
+
+ bar (a, b, c, d, e, 2);
+ for (i = 0; i < N; i++)
+ if (e[i] != ((i % 3) == 0 || i <= 5 ? 10 : 2 * i))
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { i?86-*-* x86_64-*-* } } } } */
+
case vect_scalar_var:
prefix = "stmp";
break;
+ case vect_mask_var:
+ prefix = "mask";
+ break;
case vect_pointer_var:
prefix = "vectp";
break;
tree type;
enum vect_var_kind kind;
- kind = vectype ? vect_simple_var : vect_scalar_var;
+ kind = vectype
+ ? VECTOR_BOOLEAN_TYPE_P (vectype)
+ ? vect_mask_var
+ : vect_simple_var
+ : vect_scalar_var;
type = vectype ? vectype : TREE_TYPE (scalar_dest);
gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
tree op1, enum tree_code code)
{
tree t;
- if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
+ if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type)
+ && !expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
t = expand_vector_piecewise (gsi, do_compare, type,
TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
else
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
- int nbbs = loop->num_nodes;
+ unsigned nbbs = loop->num_nodes;
unsigned int vectorization_factor = 0;
tree scalar_type;
gphi *phi;
tree vectype;
unsigned int nunits;
stmt_vec_info stmt_info;
- int i;
+ unsigned i;
HOST_WIDE_INT dummy;
gimple *stmt, *pattern_stmt = NULL;
gimple_seq pattern_def_seq = NULL;
gimple_stmt_iterator pattern_def_si = gsi_none ();
bool analyze_pattern_stmt = false;
+ bool bool_result;
+ auto_vec<stmt_vec_info> mask_producers;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
return false;
}
+ bool_result = false;
+
if (STMT_VINFO_VECTYPE (stmt_info))
{
/* The only case when a vectype had been already set is for stmts
scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
else
scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
+
+ /* Bool ops don't participate in vectorization factor
+ computation. For comparison use compared types to
+ compute a factor. */
+ if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
+ {
+ mask_producers.safe_push (stmt_info);
+ bool_result = true;
+
+ if (gimple_code (stmt) == GIMPLE_ASSIGN
+ && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
+ == tcc_comparison
+ && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt)))
+ != BOOLEAN_TYPE)
+ scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
+ else
+ {
+ if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
+ {
+ pattern_def_seq = NULL;
+ gsi_next (&si);
+ }
+ continue;
+ }
+ }
+
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
return false;
}
- STMT_VINFO_VECTYPE (stmt_info) = vectype;
+ if (!bool_result)
+ STMT_VINFO_VECTYPE (stmt_info) = vectype;
if (dump_enabled_p ())
{
/* The vectorization factor is according to the smallest
scalar type (or the largest vector size, but we only
support one vector size per loop). */
- scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
- &dummy);
+ if (!bool_result)
+ scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
+ &dummy);
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
}
LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+ for (i = 0; i < mask_producers.length (); i++)
+ {
+ tree mask_type = NULL;
+
+ stmt = STMT_VINFO_STMT (mask_producers[i]);
+
+ if (gimple_code (stmt) == GIMPLE_ASSIGN
+ && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
+ && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt))) != BOOLEAN_TYPE)
+ {
+ scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
+ mask_type = get_mask_type_for_scalar_type (scalar_type);
+
+ if (!mask_type)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: unsupported mask\n");
+ return false;
+ }
+ }
+ else
+ {
+ tree rhs;
+ ssa_op_iter iter;
+ gimple *def_stmt;
+ enum vect_def_type dt;
+
+ FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
+ {
+ if (!vect_is_simple_use (rhs, mask_producers[i]->vinfo,
+ &def_stmt, &dt, &vectype))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: can't compute mask type "
+ "for statement, ");
+ dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
+ 0);
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
+ }
+ return false;
+ }
+
+ /* No vectype probably means external definition.
+ Allow it in case there is another operand which
+ allows to determine mask type. */
+ if (!vectype)
+ continue;
+
+ if (!mask_type)
+ mask_type = vectype;
+ else if (TYPE_VECTOR_SUBPARTS (mask_type)
+ != TYPE_VECTOR_SUBPARTS (vectype))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: different sized masks "
+ "types in statement, ");
+ dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
+ mask_type);
+ dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
+ dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
+ vectype);
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
+ }
+ return false;
+ }
+ }
+ }
+
+ /* No mask_type should mean loop invariant predicate.
+ This is probably a subject for optimization in
+ if-conversion. */
+ if (!mask_type)
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: can't compute mask type "
+ "for statement, ");
+ dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
+ 0);
+ dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
+ }
+ return false;
+ }
+
+ STMT_VINFO_VECTYPE (mask_producers[i]) = mask_type;
+ }
+
return true;
}
/* Helper function of vect_recog_bool_pattern. Called recursively, return
- true if bool VAR can be optimized that way. */
+ true if bool VAR can and should be optimized that way. Assume it shouldn't
+ in case it's a result of a comparison which can be directly vectorized into
+ a vector comparison. */
static bool
check_bool_pattern (tree var, vec_info *vinfo)
default:
if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
{
- tree vecitype, comp_vectype;
+ tree vecitype, comp_vectype, mask_type;
/* If the comparison can throw, then is_gimple_condexpr will be
false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
if (comp_vectype == NULL_TREE)
return false;
+ mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
+ if (mask_type
+ && expand_vec_cmp_expr_p (comp_vectype, mask_type))
+ return false;
+
if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
{
machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
}
+/* Return the proper type for converting bool VAR into
+ an integer value or NULL_TREE if no such type exists.
+ The type is chosen so that converted value has the
+ same number of elements as VAR's vector type. */
+
+static tree
+search_type_for_mask (tree var, vec_info *vinfo)
+{
+ gimple *def_stmt;
+ enum vect_def_type dt;
+ tree rhs1;
+ enum tree_code rhs_code;
+ tree res = NULL_TREE;
+
+ if (TREE_CODE (var) != SSA_NAME)
+ return NULL_TREE;
+
+ if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
+ || !TYPE_UNSIGNED (TREE_TYPE (var)))
+ && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
+ return NULL_TREE;
+
+ if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
+ return NULL_TREE;
+
+ if (dt != vect_internal_def)
+ return NULL_TREE;
+
+ if (!is_gimple_assign (def_stmt))
+ return NULL_TREE;
+
+ rhs_code = gimple_assign_rhs_code (def_stmt);
+ rhs1 = gimple_assign_rhs1 (def_stmt);
+
+ switch (rhs_code)
+ {
+ case SSA_NAME:
+ case BIT_NOT_EXPR:
+ CASE_CONVERT:
+ res = search_type_for_mask (rhs1, vinfo);
+ break;
+
+ case BIT_AND_EXPR:
+ case BIT_IOR_EXPR:
+ case BIT_XOR_EXPR:
+ if (!(res = search_type_for_mask (rhs1, vinfo)))
+ res = search_type_for_mask (gimple_assign_rhs2 (def_stmt), vinfo);
+ break;
+
+ default:
+ if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
+ {
+ if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
+ || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
+ {
+ machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
+ res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
+ }
+ else
+ res = TREE_TYPE (rhs1);
+ }
+ }
+
+ return res;
+}
+
+
/* Function vect_recog_bool_pattern
Try to find pattern like following:
enum tree_code rhs_code;
tree var, lhs, rhs, vectype;
stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
+ stmt_vec_info new_stmt_info;
vec_info *vinfo = stmt_vinfo->vinfo;
gimple *pattern_stmt;
if (vectype == NULL_TREE)
return NULL;
- if (!check_bool_pattern (var, vinfo))
- return NULL;
-
- rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
- lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
- if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
- pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
+ if (check_bool_pattern (var, vinfo))
+ {
+ rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
+ lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
+ if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
+ pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
+ else
+ pattern_stmt
+ = gimple_build_assign (lhs, NOP_EXPR, rhs);
+ }
else
- pattern_stmt
- = gimple_build_assign (lhs, NOP_EXPR, rhs);
+ {
+ tree type = search_type_for_mask (var, vinfo);
+ tree cst0, cst1, cmp, tmp;
+
+ if (!type)
+ return NULL;
+
+ /* We may directly use cond with narrowed type to avoid
+ multiple cond exprs with following result packing and
+ perform single cond with packed mask instead. In case
+ of widening we better make cond first and then extract
+ results. */
+ if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
+ type = TREE_TYPE (lhs);
+
+ cst0 = build_int_cst (type, 0);
+ cst1 = build_int_cst (type, 1);
+ tmp = vect_recog_temp_ssa_var (type, NULL);
+ cmp = build2 (NE_EXPR, boolean_type_node,
+ var, build_int_cst (TREE_TYPE (var), 0));
+ pattern_stmt = gimple_build_assign (tmp, COND_EXPR, cmp, cst1, cst0);
+
+ if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
+ {
+ tree new_vectype = get_vectype_for_scalar_type (type);
+ new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
+ set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
+ STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
+ new_pattern_def_seq (stmt_vinfo, pattern_stmt);
+
+ lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
+ pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
+ }
+ }
+
*type_out = vectype;
*type_in = vectype;
stmts->safe_push (last_stmt);
if (get_vectype_for_scalar_type (type) == NULL_TREE)
return NULL;
- if (!check_bool_pattern (var, vinfo))
- return NULL;
+ if (check_bool_pattern (var, vinfo))
+ {
+ rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
+ rhs = build2 (NE_EXPR, boolean_type_node,
+ rhs, build_int_cst (type, 0));
+ }
+ else
+ rhs = build2 (NE_EXPR, boolean_type_node,
+ var, build_int_cst (TREE_TYPE (var), 0)),
- rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
pattern_stmt
- = gimple_build_assign (lhs, COND_EXPR,
- build2 (NE_EXPR, boolean_type_node,
- rhs, build_int_cst (type, 0)),
+ = gimple_build_assign (lhs, COND_EXPR, rhs,
gimple_assign_rhs2 (last_stmt),
gimple_assign_rhs3 (last_stmt));
*type_out = vectype;
gcc_assert (vectype != NULL_TREE);
if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
return NULL;
- if (!check_bool_pattern (var, vinfo))
- return NULL;
- rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
+ if (check_bool_pattern (var, vinfo))
+ rhs = adjust_bool_pattern (var, TREE_TYPE (vectype),
+ NULL_TREE, stmts);
+ else
+ {
+ tree type = search_type_for_mask (var, vinfo);
+ tree cst0, cst1, cmp, new_vectype;
+
+ if (!type)
+ return NULL;
+
+ if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
+ type = TREE_TYPE (vectype);
+
+ cst0 = build_int_cst (type, 0);
+ cst1 = build_int_cst (type, 1);
+ new_vectype = get_vectype_for_scalar_type (type);
+
+ rhs = vect_recog_temp_ssa_var (type, NULL);
+ cmp = build2 (NE_EXPR, boolean_type_node,
+ var, build_int_cst (TREE_TYPE (var), 0));
+ pattern_stmt = gimple_build_assign (rhs, COND_EXPR,
+ cmp, cst1, cst0);
+
+ pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
+ set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
+ STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
+ append_pattern_def_seq (stmt_vinfo, pattern_stmt);
+ }
+
lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
{
tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
- new_pattern_def_seq (stmt_vinfo, cast_stmt);
+ append_pattern_def_seq (stmt_vinfo, cast_stmt);
rhs = rhs2;
}
pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
if (TREE_CODE_CLASS (rhs_code) != tcc_binary
&& TREE_CODE_CLASS (rhs_code) != tcc_unary
&& TREE_CODE_CLASS (rhs_code) != tcc_expression
+ && TREE_CODE_CLASS (rhs_code) != tcc_comparison
&& rhs_code != CALL_EXPR)
{
if (dump_enabled_p ())
struct loop *loop;
gimple_seq ctor_seq = NULL;
- vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
+ /* Check if vector type is a boolean vector. */
+ if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
+ && (VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo))
+ || (code == COND_EXPR && op_num < 2)))
+ vector_type
+ = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
+ else
+ vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
nunits = TYPE_VECTOR_SUBPARTS (vector_type);
if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
{
if (CONSTANT_CLASS_P (op))
{
- op = fold_unary (VIEW_CONVERT_EXPR,
- TREE_TYPE (vector_type), op);
+ if (VECTOR_BOOLEAN_TYPE_P (vector_type))
+ {
+ /* Can't use VIEW_CONVERT_EXPR for booleans because
+ of possibly different sizes of scalar value and
+ vector element. */
+ if (integer_zerop (op))
+ op = build_int_cst (TREE_TYPE (vector_type), 0);
+ else if (integer_onep (op))
+ op = build_int_cst (TREE_TYPE (vector_type), 1);
+ else
+ gcc_unreachable ();
+ }
+ else
+ op = fold_unary (VIEW_CONVERT_EXPR,
+ TREE_TYPE (vector_type), op);
gcc_assert (op && CONSTANT_CLASS_P (op));
}
else
if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
{
if (CONSTANT_CLASS_P (val))
- val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
+ val = fold_convert (TREE_TYPE (type), val);
else
{
new_temp = make_ssa_name (TREE_TYPE (type));
STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
In case OP is an invariant or constant, a new stmt that creates a vector def
- needs to be introduced. */
+ needs to be introduced. VECTYPE may be used to specify a required type for
+ vector invariant. */
tree
-vect_get_vec_def_for_operand (tree op, gimple *stmt)
+vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
{
tree vec_oprnd;
gimple *vec_stmt;
gimple *def_stmt;
stmt_vec_info def_stmt_info = NULL;
stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
+ tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
enum vect_def_type dt;
bool is_simple_use;
case vect_constant_def:
case vect_external_def:
{
- vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
+ if (vectype)
+ vector_type = vectype;
+ else if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
+ && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
+ vector_type = build_same_sized_truth_vector_type (stmt_vectype);
+ else
+ vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
+
gcc_assert (vector_type);
return vect_init_vector (stmt, op, vector_type, NULL);
}
{
gimple *gtemp;
vec_cond_lhs =
- vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt);
+ vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
+ stmt, comp_vectype);
vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
loop_vinfo, >emp, &dts[0]);
vec_cond_rhs =
vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
- stmt);
+ stmt, comp_vectype);
vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
loop_vinfo, >emp, &dts[1]);
if (reduc_index == 1)
return true;
}
+/* vectorizable_comparison.
+
+ Check if STMT is comparison expression that can be vectorized.
+ If VEC_STMT is also passed, vectorize the STMT: create a vectorized
+ comparison, put it in VEC_STMT, and insert it at GSI.
+
+ Return FALSE if not a vectorizable STMT, TRUE otherwise. */
+
+bool
+vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
+ gimple **vec_stmt, tree reduc_def,
+ slp_tree slp_node)
+{
+ tree lhs, rhs1, rhs2;
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
+ tree new_temp;
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
+ unsigned nunits;
+ int ncopies;
+ enum tree_code code;
+ stmt_vec_info prev_stmt_info = NULL;
+ int i, j;
+ bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+ vec<tree> vec_oprnds0 = vNULL;
+ vec<tree> vec_oprnds1 = vNULL;
+ gimple *def_stmt;
+ tree mask_type;
+ tree mask;
+
+ if (!VECTOR_BOOLEAN_TYPE_P (vectype))
+ return false;
+
+ mask_type = vectype;
+ nunits = TYPE_VECTOR_SUBPARTS (vectype);
+
+ if (slp_node || PURE_SLP_STMT (stmt_info))
+ ncopies = 1;
+ else
+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
+
+ gcc_assert (ncopies >= 1);
+ if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
+ return false;
+
+ if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
+ && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
+ && reduc_def))
+ return false;
+
+ if (STMT_VINFO_LIVE_P (stmt_info))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "value used after loop.\n");
+ return false;
+ }
+
+ if (!is_gimple_assign (stmt))
+ return false;
+
+ code = gimple_assign_rhs_code (stmt);
+
+ if (TREE_CODE_CLASS (code) != tcc_comparison)
+ return false;
+
+ rhs1 = gimple_assign_rhs1 (stmt);
+ rhs2 = gimple_assign_rhs2 (stmt);
+
+ if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
+ &dts[0], &vectype1))
+ return false;
+
+ if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
+ &dts[1], &vectype2))
+ return false;
+
+ if (vectype1 && vectype2
+ && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
+ return false;
+
+ vectype = vectype1 ? vectype1 : vectype2;
+
+ /* Invariant comparison. */
+ if (!vectype)
+ {
+ vectype = build_vector_type (TREE_TYPE (rhs1), nunits);
+ if (tree_to_shwi (TYPE_SIZE_UNIT (vectype)) != current_vector_size)
+ return false;
+ }
+ else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
+ return false;
+
+ if (!vec_stmt)
+ {
+ STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
+ vect_model_simple_cost (stmt_info, ncopies, dts, NULL, NULL);
+ return expand_vec_cmp_expr_p (vectype, mask_type);
+ }
+
+ /* Transform. */
+ if (!slp_node)
+ {
+ vec_oprnds0.create (1);
+ vec_oprnds1.create (1);
+ }
+
+ /* Handle def. */
+ lhs = gimple_assign_lhs (stmt);
+ mask = vect_create_destination_var (lhs, mask_type);
+
+ /* Handle cmp expr. */
+ for (j = 0; j < ncopies; j++)
+ {
+ gassign *new_stmt = NULL;
+ if (j == 0)
+ {
+ if (slp_node)
+ {
+ auto_vec<tree, 2> ops;
+ auto_vec<vec<tree>, 2> vec_defs;
+
+ ops.safe_push (rhs1);
+ ops.safe_push (rhs2);
+ vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
+ vec_oprnds1 = vec_defs.pop ();
+ vec_oprnds0 = vec_defs.pop ();
+ }
+ else
+ {
+ vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, NULL);
+ vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, NULL);
+ }
+ }
+ else
+ {
+ vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
+ vec_oprnds0.pop ());
+ vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
+ vec_oprnds1.pop ());
+ }
+
+ if (!slp_node)
+ {
+ vec_oprnds0.quick_push (vec_rhs1);
+ vec_oprnds1.quick_push (vec_rhs2);
+ }
+
+ /* Arguments are ready. Create the new vector stmt. */
+ FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
+ {
+ vec_rhs2 = vec_oprnds1[i];
+
+ new_temp = make_ssa_name (mask);
+ new_stmt = gimple_build_assign (new_temp, code, vec_rhs1, vec_rhs2);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ if (slp_node)
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+ }
+
+ if (slp_node)
+ continue;
+
+ if (j == 0)
+ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
+ else
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+
+ prev_stmt_info = vinfo_for_stmt (new_stmt);
+ }
+
+ vec_oprnds0.release ();
+ vec_oprnds1.release ();
+
+ return true;
+}
/* Make sure the statement is vectorizable. */
|| vectorizable_call (stmt, NULL, NULL, node)
|| vectorizable_store (stmt, NULL, NULL, node)
|| vectorizable_reduction (stmt, NULL, NULL, node)
- || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
+ || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
+ || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
else
{
if (bb_vinfo)
|| vectorizable_load (stmt, NULL, NULL, node, NULL)
|| vectorizable_call (stmt, NULL, NULL, node)
|| vectorizable_store (stmt, NULL, NULL, node)
- || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
+ || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
+ || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
}
if (!ok)
gcc_assert (done);
break;
+ case comparison_vec_info_type:
+ done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
+ gcc_assert (done);
+ break;
+
case call_vec_info_type:
done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
stmt = gsi_stmt (*gsi);
return vectype;
}
+/* Function get_mask_type_for_scalar_type.
+
+ Returns the mask type corresponding to a result of comparison
+ of vectors of specified SCALAR_TYPE as supported by target. */
+
+tree
+get_mask_type_for_scalar_type (tree scalar_type)
+{
+ tree vectype = get_vectype_for_scalar_type (scalar_type);
+
+ if (!vectype)
+ return NULL;
+
+ return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
+ current_vector_size);
+}
+
/* Function get_same_sized_vectype
Returns a vector type corresponding to SCALAR_TYPE of size
enum vect_var_kind {
vect_simple_var,
vect_pointer_var,
- vect_scalar_var
+ vect_scalar_var,
+ vect_mask_var
};
/* Defines type of operation. */
call_simd_clone_vec_info_type,
assignment_vec_info_type,
condition_vec_info_type,
+ comparison_vec_info_type,
reduc_vec_info_type,
induc_vec_info_type,
type_promotion_vec_info_type,
/* In tree-vect-stmts.c. */
extern unsigned int current_vector_size;
extern tree get_vectype_for_scalar_type (tree);
+extern tree get_mask_type_for_scalar_type (tree);
extern tree get_same_sized_vectype (tree, tree);
extern bool vect_is_simple_use (tree, vec_info *, gimple **,
enum vect_def_type *);
extern void vect_finish_stmt_generation (gimple *, gimple *,
gimple_stmt_iterator *);
extern bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
-extern tree vect_get_vec_def_for_operand (tree, gimple *);
+extern tree vect_get_vec_def_for_operand (tree, gimple *, tree = NULL);
extern tree vect_init_vector (gimple *, tree, tree,
gimple_stmt_iterator *);
extern tree vect_get_vec_def_for_stmt_copy (enum vect_def_type, tree);
extern bool vect_analyze_stmt (gimple *, bool *, slp_tree);
extern bool vectorizable_condition (gimple *, gimple_stmt_iterator *,
gimple **, tree, int, slp_tree);
+extern bool vectorizable_comparison (gimple *, gimple_stmt_iterator *,
+ gimple **, tree, int, slp_tree);
extern void vect_get_load_cost (struct data_reference *, int, bool,
unsigned int *, unsigned int *,
stmt_vector_for_cost *,