+2019-05-14 Richard Biener <rguenther@suse.de>
+ H.J. Lu <hongjiu.lu@intel.com>
+
+ PR tree-optimization/88828
+ * tree-ssa-forwprop.c (simplify_vector_constructor): Handle
+ permuting in a single non-constant element not extracted
+ from a vector.
+
2019-05-14 Przemyslaw Wirkus <przemyslaw.wirkus@arm.com\>
* internal-fn.def (SIGNBIT): New.
+2019-05-14 Richard Biener <rguenther@suse.de>
+ H.J. Lu <hongjiu.lu@intel.com>
+
+ PR tree-optimization/88828
+ * gcc.target/i386/pr88828-1.c: New test.
+ * gcc.target/i386/pr88828-1a.c: Likewise.
+ * gcc.target/i386/pr88828-1b.c: Likewise.
+ * gcc.target/i386/pr88828-1c.c: Likewise.
+ * gcc.target/i386/pr88828-4a.c: Likewise.
+ * gcc.target/i386/pr88828-4b.c: Likewise.
+ * gcc.target/i386/pr88828-5a.c: Likewise.
+ * gcc.target/i386/pr88828-5b.c: Likewise.
+ * gcc.target/i386/pr88828-7.c: Likewise.
+ * gcc.target/i386/pr88828-7a.c: Likewise.
+ * gcc.target/i386/pr88828-7b.c: Likewise.
+ * gcc.target/i386/pr88828-8.c: Likewise.
+ * gcc.target/i386/pr88828-8a.c: Likewise.
+ * gcc.target/i386/pr88828-8b.c: Likewise.
+ * gcc.target/i386/pr88828-9.c: Likewise.
+ * gcc.target/i386/pr88828-9a.c: Likewise.
+ * gcc.target/i386/pr88828-9b.c: Likewise.
+
2019-05-14 Przemyslaw Wirkus <przemyslaw.wirkus@arm.com\>
* gcc.target/aarch64/signbitv4sf.c: New test.
--- /dev/null
+/* { dg-do run { target sse2_runtime } } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "pr88828-1a.c"
+#include "pr88828-1b.c"
+#include "pr88828-1c.c"
+
+extern void abort ();
+
+void
+do_check (__v4sf y, float f[4], float z)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ if (i == 0)
+ {
+ if (y[i] != z)
+ abort ();
+ }
+ else
+ {
+ if (y[i] != f[i])
+ abort ();
+ }
+}
+
+int
+main (void)
+{
+ float f[4] = { -11, 2, 55553, -4 };
+ float z = 134567;
+ __v4sf x = { f[0], f[1], f[2], f[3] };
+ __v4sf y;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ if (x[i] != f[i])
+ abort ();
+
+ y = foo1 (x, z);
+ do_check (y, f, z);
+ y = foo2 (x, z);
+ do_check (y, f, z);
+ y = foo3 (x, z);
+ do_check (y, f, z);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo1 (__v4sf x, float f)
+{
+ __v4sf y = { f, x[1], x[2], x[3] };
+ return y;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+static __v4sf
+vector_init (float f0,float f1, float f2,float f3)
+{
+ __v4sf y = { f0, f1, f2, f3 };
+ return y;
+}
+
+__attribute__((noinline, noclone))
+__v4sf
+foo2 (__v4sf x, float f)
+{
+ return vector_init (f, x[1], x[2], x[3]) ;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo3 (__v4sf x, float f)
+{
+ __v4sf y = x;
+ y[0] = f;
+ return y;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-times "shufps" 1 } } */
+/* { dg-final { scan-assembler-not "movaps" } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo (__v4sf x, float f)
+{
+ __v4sf y = { x[0], x[2], x[3], x[1] };
+ y[0] = f;
+ return y;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-times "vpermilps" 1 } } */
+/* { dg-final { scan-assembler-times "vmovss" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpinsrd" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vmovss" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vshufps" } } */
+/* { dg-final { scan-assembler-not "vmovaps" } } */
+/* { dg-final { scan-assembler-not "vmovlhps" } } */
+/* { dg-final { scan-assembler-not "vunpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo (__v4sf x, float f)
+{
+ __v4sf y = { x[0], x[2], x[3], x[1] };
+ y[0] = f;
+ return y;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler "movss" } } */
+/* { dg-final { scan-assembler-times "shufps" 2 } } */
+/* { dg-final { scan-assembler-times "movaps" 1 } } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo (__v4sf x, float f)
+{
+ __v4sf y = { x[0], x[2], x[3], x[0] };
+ y[3] = f;
+ return y;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-times "vpermilps" 1 } } */
+/* { dg-final { scan-assembler-times "vinsertps" 1 } } */
+/* { dg-final { scan-assembler-not "vshufps" } } */
+/* { dg-final { scan-assembler-not "vmovss" } } */
+/* { dg-final { scan-assembler-not "vmovaps" } } */
+/* { dg-final { scan-assembler-not "vmovlhps" } } */
+/* { dg-final { scan-assembler-not "vunpcklps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__attribute__((noinline, noclone))
+__v4sf
+foo (__v4sf x, float f)
+{
+ __v4sf y = { x[0], x[2], x[3], x[0] };
+ y[3] = f;
+ return y;
+}
--- /dev/null
+/* { dg-do run { target sse2_runtime } } */
+/* { dg-options "-O2 -msse2 -fexcess-precision=standard" } */
+
+#include "pr88828-7a.c"
+#include "pr88828-7b.c"
+
+extern void abort ();
+
+float
+bar (float x, float y)
+{
+ return x / y - y * x;
+}
+
+void
+do_check (__v4sf x, float f1[4], float f2[4])
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ if (i == 0)
+ {
+ if (x[i] != bar (f1[i], f2[i]))
+ abort ();
+ }
+ else
+ {
+ if (x[i] != f1[i])
+ abort ();
+ }
+}
+
+int
+main (void)
+{
+ float f1[4] = { -11, 2, 55553, -4 };
+ float f2[4] = { 111, 3.3, -55.553, 4.8 };
+ __v4sf x = { f1[0], f1[1], f1[2], f1[3] };
+ __v4sf y = { f2[0], f2[1], f2[2], f2[3] };
+ __v4sf z;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ if (x[i] != f1[i] || y[i] != f2[i] )
+ abort ();
+
+ z = foo1 (x, y);
+ do_check (z, f1, f2);
+ x = foo2 (x, y);
+ do_check (z, f1, f2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpckhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+extern float bar (float, float);
+
+__v4sf
+foo1 (__v4sf x, __v4sf y)
+{
+ __v4sf z = { bar (x[0], y[0]), x[1], x[2], x[3] };
+ return z;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpckhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+extern float bar (float, float);
+
+static __v4sf
+vector_init (float f0,float f1, float f2,float f3)
+{
+ __v4sf y = { f0, f1, f2, f3 };
+ return y;
+}
+
+__v4sf
+foo2 (__v4sf x, __v4sf y)
+{
+ return vector_init (bar (x[0], y[0]), x[1], x[2], x[3]) ;
+}
--- /dev/null
+/* { dg-do run { target sse2_runtime } } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "pr88828-8a.c"
+#include "pr88828-8b.c"
+
+extern void abort ();
+
+void
+do_check (__v4sf y, float f[4], float z)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ if (i == 0)
+ {
+ if (y[i] != z)
+ abort ();
+ }
+ else
+ {
+ if (y[i] != f[i])
+ abort ();
+ }
+}
+
+int
+main (void)
+{
+ float f[4] = { -11, 2, 55553, -4 };
+ float z = 11.4;
+ __v4sf x = { f[0], f[1], f[2], f[3] };
+ __v4sf y;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ if (x[i] != f[i])
+ abort ();
+
+ y = foo1 (x);
+ do_check (y, f, z);
+ y = foo2 (x);
+ do_check (y, f, z);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpckhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+__v4sf
+foo1 (__v4sf x)
+{
+ __v4sf z = { 11.4, x[1], x[2], x[3] };
+ return z;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpckhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+
+static __v4sf
+vector_init (float f0,float f1, float f2,float f3)
+{
+ __v4sf y = { f0, f1, f2, f3 };
+ return y;
+}
+
+__v4sf
+foo2 (__v4sf x)
+{
+ return vector_init (11.4, x[1], x[2], x[3]) ;
+}
--- /dev/null
+/* { dg-do run { target sse2_runtime } } */
+/* { dg-options "-O2 -msse2" } */
+
+#include "pr88828-9a.c"
+#include "pr88828-9b.c"
+
+extern void abort ();
+
+void
+do_check (__v4sf y, float f[4], float z)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ if (i == 0)
+ {
+ if (y[i] != z)
+ abort ();
+ }
+ else
+ {
+ if (y[i] != f[i])
+ abort ();
+ }
+}
+
+int
+main (void)
+{
+ float f[4] = { -11, 2, 55553, -4 };
+ float z = 11.4;
+ __m128 x = (__m128) (__v4sf) { f[0], f[1], f[2], f[3] };
+ __m128 y;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ if (x[i] != f[i])
+ abort ();
+
+ y = foo1 (x);
+ do_check (y, f, z);
+ y = foo2 (x);
+ do_check (y, f, z);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpckhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+__m128
+foo1 (__m128 x)
+{
+ __v4sf z = { 11.4, ((__v4sf) x)[1], ((__v4sf) x)[2], ((__v4sf) x) [3] };
+ return (__m128) z;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse -mno-sse4" } */
+/* { dg-final { scan-assembler-not "movlhps" } } */
+/* { dg-final { scan-assembler-not "unpckhps" } } */
+/* { dg-final { scan-assembler-not "unpcklps" } } */
+/* { dg-final { scan-assembler-not "shufps" } } */
+
+typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+
+static __m128
+vector_init (float f0,float f1, float f2,float f3)
+{
+ __v4sf y = { f0, f1, f2, f3 };
+ return (__m128) y;
+}
+
+__m128
+foo2 (__m128 x)
+{
+ return vector_init (11.4, ((__v4sf) x)[1], ((__v4sf) x)[2],
+ ((__v4sf) x) [3]);
+}
conv_code = ERROR_MARK;
maybe_ident = true;
tree one_constant = NULL_TREE;
+ tree one_nonconstant = NULL_TREE;
auto_vec<tree> constants;
constants.safe_grow_cleared (nelts);
FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
{
tree ref, op1;
+ unsigned int elem;
if (i >= nelts)
return false;
+ /* Look for elements extracted and possibly converted from
+ another vector. */
op1 = get_bit_field_ref_def (elt->value, conv_code);
- if (op1)
+ if (op1
+ && TREE_CODE ((ref = TREE_OPERAND (op1, 0))) == SSA_NAME
+ && VECTOR_TYPE_P (TREE_TYPE (ref))
+ && useless_type_conversion_p (TREE_TYPE (op1),
+ TREE_TYPE (TREE_TYPE (ref)))
+ && known_eq (bit_field_size (op1), elem_size)
+ && constant_multiple_p (bit_field_offset (op1),
+ elem_size, &elem))
{
- ref = TREE_OPERAND (op1, 0);
unsigned int j;
for (j = 0; j < 2; ++j)
{
if (!orig[j])
{
- if (TREE_CODE (ref) != SSA_NAME)
- return false;
- if (! VECTOR_TYPE_P (TREE_TYPE (ref))
- || ! useless_type_conversion_p (TREE_TYPE (op1),
- TREE_TYPE (TREE_TYPE (ref))))
- return false;
- if (j && !useless_type_conversion_p (TREE_TYPE (orig[0]),
- TREE_TYPE (ref)))
- return false;
- orig[j] = ref;
- break;
+ if (j == 0
+ || useless_type_conversion_p (TREE_TYPE (orig[0]),
+ TREE_TYPE (ref)))
+ break;
}
else if (ref == orig[j])
break;
}
- if (j == 2)
- return false;
-
- unsigned int elt;
- if (maybe_ne (bit_field_size (op1), elem_size)
- || !constant_multiple_p (bit_field_offset (op1), elem_size, &elt))
- return false;
- if (j)
- elt += nelts;
- if (elt != i)
- maybe_ident = false;
- sel.quick_push (elt);
+ /* Found a suitable vector element. */
+ if (j <= 2)
+ {
+ orig[j] = ref;
+ if (j)
+ elem += nelts;
+ if (elem != i)
+ maybe_ident = false;
+ sel.quick_push (elem);
+ continue;
+ }
+ /* Else fallthru. */
}
- else if (CONSTANT_CLASS_P (elt->value))
+ /* Handle elements not extracted from a vector.
+ 1. constants by permuting with constant vector
+ 2. a unique non-constant element by permuting with a splat vector */
+ if (orig[1]
+ && orig[1] != error_mark_node)
+ return false;
+ orig[1] = error_mark_node;
+ if (CONSTANT_CLASS_P (elt->value))
{
- if (orig[1]
- && orig[1] != error_mark_node)
+ if (one_nonconstant)
return false;
- orig[1] = error_mark_node;
if (!one_constant)
one_constant = elt->value;
constants[i] = elt->value;
- sel.quick_push (i + nelts);
- maybe_ident = false;
}
else
- return false;
+ {
+ if (one_constant)
+ return false;
+ if (!one_nonconstant)
+ one_nonconstant = elt->value;
+ else if (!operand_equal_p (one_nonconstant, elt->value, 0))
+ return false;
+ }
+ sel.quick_push (i + nelts);
+ maybe_ident = false;
}
if (i < nelts)
return false;
- if (! VECTOR_TYPE_P (TREE_TYPE (orig[0]))
+ if (! orig[0]
+ || ! VECTOR_TYPE_P (TREE_TYPE (orig[0]))
|| maybe_ne (TYPE_VECTOR_SUBPARTS (type),
TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig[0]))))
return false;
GET_MODE_SIZE (TYPE_MODE (type))))
return false;
op2 = vec_perm_indices_to_tree (mask_type, indices);
+ bool convert_orig0 = false;
if (!orig[1])
orig[1] = orig[0];
- if (orig[1] == error_mark_node)
+ else if (orig[1] == error_mark_node
+ && one_nonconstant)
+ {
+ gimple_seq seq = NULL;
+ orig[1] = gimple_build_vector_from_val (&seq, UNKNOWN_LOCATION,
+ type, one_nonconstant);
+ gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+ convert_orig0 = true;
+ }
+ else if (orig[1] == error_mark_node)
{
tree_vector_builder vec (type, nelts, 1);
for (unsigned i = 0; i < nelts; ++i)
/* ??? Push a don't-care value. */
vec.quick_push (one_constant);
orig[1] = vec.build ();
+ convert_orig0 = true;
}
if (conv_code == ERROR_MARK)
gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig[0],
orig[1], op2);
- else if (TREE_CODE (orig[1]) == VECTOR_CST)
+ else if (convert_orig0)
{
gimple *conv
= gimple_build_assign (make_ssa_name (type), conv_code, orig[0]);