+2019-11-14 Richard Sandiford <richard.sandiford@arm.com>
+
+ * tree-cfg.c (verify_gimple_assign_unary): Handle conversions
+ between vector types.
+ * tree-vect-stmts.c (vectorizable_conversion): Extend the
+ non-widening and non-narrowing path to handle standard
+ conversion codes, if the target supports them.
+ * expr.c (convert_move): Try using the extend and truncate optabs
+ for vectors.
+ * optabs-tree.c (supportable_convert_operation): Likewise.
+ * config/aarch64/iterators.md (Vnarroqw): New iterator.
+ * config/aarch64/aarch64-simd.md (<optab><Vnarrowq><mode>2)
+ (trunc<mode><Vnarrowq>2): New patterns.
+
2019-11-14 Richard Sandiford <richard.sandiford@arm.com>
* tree-vect-stmts.c (vect_get_vector_types_for_stmt): Don't
"pmull2\\t%0.1q, %1.2d, %2.2d"
[(set_attr "type" "crypto_pmull")]
)
+
+;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
+(define_insn "<optab><Vnarrowq><mode>2"
+ [(set (match_operand:VQN 0 "register_operand" "=w")
+ (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
+ [(set_attr "type" "neon_shift_imm_long")]
+)
+
+;; Truncate a 128-bit integer vector to a 64-bit vector.
+(define_insn "trunc<mode><Vnarrowq>2"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+ (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "xtn\t%0.<Vntype>, %1.<Vtype>"
+ [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
(V2DI "V2SI")
(DI "SI") (SI "HI")
(HI "QI")])
+(define_mode_attr Vnarrowq [(V8HI "v8qi") (V4SI "v4hi")
+ (V2DI "v2si")])
;; Narrowed quad-modes for VQN (Used for XTN2).
(define_mode_attr VNARROWQ2 [(V8HI "V16QI") (V4SI "V8HI")
if (VECTOR_MODE_P (to_mode) || VECTOR_MODE_P (from_mode))
{
+ if (GET_MODE_UNIT_PRECISION (to_mode)
+ > GET_MODE_UNIT_PRECISION (from_mode))
+ {
+ optab op = unsignedp ? zext_optab : sext_optab;
+ insn_code icode = convert_optab_handler (op, to_mode, from_mode);
+ if (icode != CODE_FOR_nothing)
+ {
+ emit_unop_insn (icode, to, from,
+ unsignedp ? ZERO_EXTEND : SIGN_EXTEND);
+ return;
+ }
+ }
+
+ if (GET_MODE_UNIT_PRECISION (to_mode)
+ < GET_MODE_UNIT_PRECISION (from_mode))
+ {
+ insn_code icode = convert_optab_handler (trunc_optab,
+ to_mode, from_mode);
+ if (icode != CODE_FOR_nothing)
+ {
+ emit_unop_insn (icode, to, from, TRUNCATE);
+ return;
+ }
+ }
+
gcc_assert (known_eq (GET_MODE_BITSIZE (from_mode),
GET_MODE_BITSIZE (to_mode)));
return true;
}
+ if (GET_MODE_UNIT_PRECISION (m1) > GET_MODE_UNIT_PRECISION (m2)
+ && can_extend_p (m1, m2, TYPE_UNSIGNED (vectype_in)))
+ {
+ *code1 = code;
+ return true;
+ }
+
+ if (GET_MODE_UNIT_PRECISION (m1) < GET_MODE_UNIT_PRECISION (m2)
+ && convert_optab_handler (trunc_optab, m1, m2) != CODE_FOR_nothing)
+ {
+ *code1 = code;
+ return true;
+ }
+
/* Now check for builtin. */
if (targetm.vectorize.builtin_conversion
&& targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
+2019-11-14 Richard Sandiford <richard.sandiford@arm.com>
+
+ * gcc.dg/vect/bb-slp-pr69907.c: Do not expect BB vectorization
+ to fail for aarch64 targets.
+ * gcc.dg/vect/no-scevccp-outer-12.c: Expect the test to pass
+ on aarch64 targets.
+ * gcc.dg/vect/vect-double-reduc-5.c: Likewise.
+ * gcc.dg/vect/vect-outer-4e.c: Likewise.
+ * gcc.target/aarch64/vect_mixed_sizes_5.c: New test.
+ * gcc.target/aarch64/vect_mixed_sizes_6.c: Likewise.
+ * gcc.target/aarch64/vect_mixed_sizes_7.c: Likewise.
+ * gcc.target/aarch64/vect_mixed_sizes_8.c: Likewise.
+ * gcc.target/aarch64/vect_mixed_sizes_9.c: Likewise.
+ * gcc.target/aarch64/vect_mixed_sizes_10.c: Likewise.
+ * gcc.target/aarch64/vect_mixed_sizes_11.c: Likewise.
+ * gcc.target/aarch64/vect_mixed_sizes_12.c: Likewise.
+ * gcc.target/aarch64/vect_mixed_sizes_13.c: Likewise.
+
2019-11-14 Richard Sandiford <richard.sandiford@arm.com>
* gcc.dg/vect/vect-outer-4f.c: Expect the test to pass on aarch64
}
/* Disable for SVE because for long or variable-length vectors we don't
- get an unrolled epilogue loop. */
-/* { dg-final { scan-tree-dump "BB vectorization with gaps at the end of a load is not supported" "slp1" { target { ! aarch64_sve } } } } */
+ get an unrolled epilogue loop. Also disable for AArch64 Advanced SIMD,
+ because there we can vectorize the epilogue using mixed vector sizes. */
+/* { dg-final { scan-tree-dump "BB vectorization with gaps at the end of a load is not supported" "slp1" { target { ! aarch64*-*-* } } } } */
}
/* Until we support multiple types in the inner loop */
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail { ! aarch64*-*-* } } } } */
/* Vectorization of loops with multiple types and double reduction is not
supported yet. */
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
return;
}
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
--- /dev/null
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#pragma GCC target "+nosve"
+
+#include <stdint.h>
+
+void
+f (int16_t *x, int16_t *y, uint8_t *z, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i] = z[i];
+ y[i] += y[i - 8];
+ }
+}
+
+/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.8h, v[0-9]+\.8b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
--- /dev/null
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#pragma GCC target "+nosve"
+
+#include <stdint.h>
+
+void
+f (int32_t *x, int64_t *y, int64_t *z, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i] = z[i];
+ y[i] += y[i - 2];
+ }
+}
+
+/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.2s, v[0-9]+\.2d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
--- /dev/null
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#pragma GCC target "+nosve"
+
+#include <stdint.h>
+
+void
+f (int16_t *x, int32_t *y, int32_t *z, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i] = z[i];
+ y[i] += y[i - 4];
+ }
+}
+
+/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.4h, v[0-9]+\.4s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
--- /dev/null
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#pragma GCC target "+nosve"
+
+#include <stdint.h>
+
+void
+f (int8_t *x, int16_t *y, int16_t *z, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i] = z[i];
+ y[i] += y[i - 8];
+ }
+}
+
+/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.8b, v[0-9]+\.8h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
--- /dev/null
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#pragma GCC target "+nosve"
+
+#include <stdint.h>
+
+void
+f (int64_t *x, int64_t *y, int32_t *z, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i] = z[i];
+ y[i] += y[i - 2];
+ }
+}
+
+/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.2d, v[0-9]+\.2s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
--- /dev/null
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#pragma GCC target "+nosve"
+
+#include <stdint.h>
+
+void
+f (int32_t *x, int32_t *y, int16_t *z, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i] = z[i];
+ y[i] += y[i - 4];
+ }
+}
+
+/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.4s, v[0-9]+\.4h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
--- /dev/null
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#pragma GCC target "+nosve"
+
+#include <stdint.h>
+
+void
+f (int16_t *x, int16_t *y, int8_t *z, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i] = z[i];
+ y[i] += y[i - 8];
+ }
+}
+
+/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.8h, v[0-9]+\.8b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
--- /dev/null
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#pragma GCC target "+nosve"
+
+#include <stdint.h>
+
+void
+f (int64_t *x, int64_t *y, uint32_t *z, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i] = z[i];
+ y[i] += y[i - 2];
+ }
+}
+
+/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.2d, v[0-9]+\.2s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
--- /dev/null
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#pragma GCC target "+nosve"
+
+#include <stdint.h>
+
+void
+f (int32_t *x, int32_t *y, uint16_t *z, int n)
+{
+ for (int i = 0; i < n; ++i)
+ {
+ x[i] = z[i];
+ y[i] += y[i - 4];
+ }
+}
+
+/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.4s, v[0-9]+\.4h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
{
CASE_CONVERT:
{
+ /* Allow conversions between vectors with the same number of elements,
+ provided that the conversion is OK for the element types too. */
+ if (VECTOR_TYPE_P (lhs_type)
+ && VECTOR_TYPE_P (rhs1_type)
+ && known_eq (TYPE_VECTOR_SUBPARTS (lhs_type),
+ TYPE_VECTOR_SUBPARTS (rhs1_type)))
+ {
+ lhs_type = TREE_TYPE (lhs_type);
+ rhs1_type = TREE_TYPE (rhs1_type);
+ }
+ else if (VECTOR_TYPE_P (lhs_type) || VECTOR_TYPE_P (rhs1_type))
+ {
+ error ("invalid vector types in nop conversion");
+ debug_generic_expr (lhs_type);
+ debug_generic_expr (rhs1_type);
+ return true;
+ }
+
/* Allow conversions from pointer type to integral type only if
there is no sign or zero extension involved.
For targets were the precision of ptrofftype doesn't match that
switch (modifier)
{
case NONE:
- if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
+ if (code != FIX_TRUNC_EXPR
+ && code != FLOAT_EXPR
+ && !CONVERT_EXPR_CODE_P (code))
return false;
if (supportable_convert_operation (code, vectype_out, vectype_in,
&decl1, &code1))