This patch enables MVE vsub instructions for auto-vectorization.
The sub<mode>3 in vec-common.md is modified to use new mode macros
to include MVE extension for vectorization. MVE vsub insns in mve.md are
modified to use 'minus' instead of unspec expression to support
sub<mode>3. Use VDQ instead fo VALL to cover all supported modes. The
redundant sub<mode>3 insns in neon.md are then removed.
gcc/ChangeLog:
2020-10-23 Dennis Zhang <dennis.zhang@arm.com>
* config/arm/mve.md (mve_vsubq<mode>): New entry for vsub instruction
using expression 'minus'.
(mve_vsubq_f<mode>): Use minus instead of VSUBQ_F unspec.
* config/arm/neon.md (sub<mode>3, sub<mode>3_fp16): Removed.
(neon_vsub<mode>): Use gen_sub<mode>3 instead of gen_sub<mode>3_fp16.
* config/arm/vec-common.md (sub<mode>3): Use the new mode macros
ARM_HAVE_<MODE>_ARITH. Use iterator VDQ instead of VALL.
gcc/testsuite/ChangeLog:
* gcc.target/arm/simd/mve-vsub_1.c: New test.
+2020-10-23 Dennis Zhang <dennis.zhang@arm.com>
+
+ * config/arm/mve.md (mve_vsubq<mode>): New entry for vsub instruction
+ using expression 'minus'.
+ (mve_vsubq_f<mode>): Use minus instead of VSUBQ_F unspec.
+ * config/arm/neon.md (sub<mode>3, sub<mode>3_fp16): Removed.
+ (neon_vsub<mode>): Use gen_sub<mode>3 instead of gen_sub<mode>3_fp16.
+ * config/arm/vec-common.md (sub<mode>3): Use the new mode macros
+ ARM_HAVE_<MODE>_ARITH. Use iterator VDQ instead of VALL.
+
2020-10-22 Alan Modra <amodra@gmail.com>
* config/rs6000/rs6000.c (rs6000_emit_xxspltidp_v2df): Delete
[(set_attr "type" "mve_move")
])
+(define_insn "mve_vsubq<mode>"
+ [
+ (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+ (minus:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+ (match_operand:MVE_2 2 "s_register_operand" "w")))
+ ]
+ "TARGET_HAVE_MVE"
+ "vsub.i%#<V_sz_elem>\t%q0, %q1, %q2"
+ [(set_attr "type" "mve_move")
+])
+
;;
;; [vabdq_f])
;;
(define_insn "mve_vsubq_f<mode>"
[
(set (match_operand:MVE_0 0 "s_register_operand" "=w")
- (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
- (match_operand:MVE_0 2 "s_register_operand" "w")]
- VSUBQ_F))
+ (minus:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")
+ (match_operand:MVE_0 2 "s_register_operand" "w")))
]
"TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
"vsub.f%#<V_sz_elem>\t%q0, %q1, %q2"
(const_string "neon_sub<q>")))]
)
-(define_insn "sub<mode>3"
- [(set
- (match_operand:VH 0 "s_register_operand" "=w")
- (minus:VH
- (match_operand:VH 1 "s_register_operand" "w")
- (match_operand:VH 2 "s_register_operand" "w")))]
- "ARM_HAVE_NEON_<MODE>_ARITH"
- "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "neon_sub<q>")]
-)
-
-(define_insn "sub<mode>3_fp16"
- [(set
- (match_operand:VH 0 "s_register_operand" "=w")
- (minus:VH
- (match_operand:VH 1 "s_register_operand" "w")
- (match_operand:VH 2 "s_register_operand" "w")))]
- "TARGET_NEON_FP16INST"
- "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "neon_sub<q>")]
-)
-
(define_insn "*mul<mode>3_neon"
[(set (match_operand:VDQW 0 "s_register_operand" "=w")
(mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
(match_operand:VH 2 "s_register_operand")]
"TARGET_NEON_FP16INST"
{
- emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
+ emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
DONE;
})
"ARM_HAVE_<MODE>_ARITH"
)
-;; Vector arithmetic. Expanders are blank, then unnamed insns implement
-;; patterns separately for IWMMXT and Neon.
-
(define_expand "sub<mode>3"
- [(set (match_operand:VALL 0 "s_register_operand")
- (minus:VALL (match_operand:VALL 1 "s_register_operand")
- (match_operand:VALL 2 "s_register_operand")))]
- "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
- || flag_unsafe_math_optimizations))
- || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
-{
-})
+ [(set (match_operand:VDQ 0 "s_register_operand")
+ (minus:VDQ (match_operand:VDQ 1 "s_register_operand")
+ (match_operand:VDQ 2 "s_register_operand")))]
+ "ARM_HAVE_<MODE>_ARITH"
+)
(define_expand "mul<mode>3"
[(set (match_operand:VDQWH 0 "s_register_operand")
+2020-10-23 Dennis Zhang <dennis.zhang@arm.com>
+
+ * gcc.target/arm/simd/mve-vsub_1.c: New test.
+
2020-10-22 Alan Modra <amodra@gmail.com>
* gcc.target/powerpc/vec-splati-runnable.c: Don't abort on
--- /dev/null
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg -additional-options "-O3 -funsafe-math-optimizations" } */
+/* { dg-additional-options "-O3" } */
+
+#include <stdint.h>
+
+void test_vsub_i32 (int32_t * dest, int32_t * a, int32_t * b) {
+ int i;
+ for (i=0; i<4; i++) {
+ dest[i] = a[i] - b[i];
+ }
+}
+
+void test_vsub_i32_u (uint32_t * dest, uint32_t * a, uint32_t * b) {
+ int i;
+ for (i=0; i<4; i++) {
+ dest[i] = a[i] - b[i];
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsub\.i32\tq[0-9]+, q[0-9]+, q[0-9]+} 2 } } */
+
+void test_vsub_i16 (int16_t * dest, int16_t * a, int16_t * b) {
+ int i;
+ for (i=0; i<8; i++) {
+ dest[i] = a[i] - b[i];
+ }
+}
+
+void test_vsub_i16_u (uint16_t * dest, uint16_t * a, uint16_t * b) {
+ int i;
+ for (i=0; i<8; i++) {
+ dest[i] = a[i] - b[i];
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsub\.i16\tq[0-9]+, q[0-9]+, q[0-9]+} 2 } } */
+
+void test_vsub_i8 (int8_t * dest, int8_t * a, int8_t * b) {
+ int i;
+ for (i=0; i<16; i++) {
+ dest[i] = a[i] - b[i];
+ }
+}
+
+void test_vsub_i8_u (uint8_t * dest, uint8_t * a, uint8_t * b) {
+ int i;
+ for (i=0; i<16; i++) {
+ dest[i] = a[i] - b[i];
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsub\.i8\tq[0-9]+, q[0-9]+, q[0-9]+} 2 } } */
+
+void test_vsub_f32 (float * dest, float * a, float * b) {
+ int i;
+ for (i=0; i<4; i++) {
+ dest[i] = a[i] - b[i];
+ }
+}
+
+/* { dg-final { scan-assembler-times {vsub\.f32\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */
+