2016-03-22 Jakub Jelinek <jakub@redhat.com>
+ PR target/70300
+ * config/i386/i386.md (cvtsd2ss splitter): Unpack in destination
+ instead of source if operands[1] is xmm16 and above and
+ !TARGET_AVX512VL. Use avx512f_vec_dupv16sf_1 instead of
+ vec_interleave_lowv4sf if we need to unpack xmm16 and above.
+
PR c++/70295
* gimplify.c (gimplify_modify_expr): Call gimple_set_no_warning
on assign if (*from_p) is a comparison, set it to
{
/* If it is unsafe to overwrite upper half of source, we need
to move to destination and unpack there. */
- if ((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
- || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4)
- && true_regnum (operands[0]) != true_regnum (operands[1]))
+ if (((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
+ || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4)
+ && true_regnum (operands[0]) != true_regnum (operands[1]))
+ || (EXT_REX_SSE_REG_P (operands[1])
+ && !TARGET_AVX512VL))
{
rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0]));
emit_move_insn (tmp, operands[1]);
}
else
operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
- emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
- operands[3]));
+ /* FIXME: vec_interleave_lowv4sf for AVX512VL should allow
+ =v, v, then vbroadcastss will be only needed for AVX512F without
+ AVX512VL. */
+ if (!EXT_REX_SSE_REGNO_P (true_regnum (operands[3])))
+ emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
+ operands[3]));
+ else
+ {
+ rtx tmp = simplify_gen_subreg (V16SFmode, operands[3], V4SFmode, 0);
+ emit_insn (gen_avx512f_vec_dupv16sf_1 (tmp, tmp));
+ }
}
else
emit_insn (gen_vec_setv4sf_0 (operands[3],
--- /dev/null
+/* PR target/70300 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=amdfam10 -mavx512f" } */
+
+typedef _Complex A __attribute__ ((mode (SC)));
+typedef _Complex B __attribute__ ((mode (DC)));
+typedef _Complex C __attribute__ ((mode (TC)));
+
+C
+foo (A a, B b, C c, A d, B e, C f)
+{
+ b -= a;
+ d += a;
+ a += f;
+ return a + b + d + e;
+}
+
+__attribute__((target ("avx512vl"))) C
+bar (A a, B b, C c, A d, B e, C f)
+{
+ b -= a;
+ d += a;
+ a += f;
+ return a + b + d + e;
+}