[Patch AArch64] Fix up BSL expander for floating point types
authorJames Greenhalgh <james.greenhalgh@arm.com>
Tue, 11 Nov 2014 17:37:35 +0000 (17:37 +0000)
committerJames Greenhalgh <jgreenhalgh@gcc.gnu.org>
Tue, 11 Nov 2014 17:37:35 +0000 (17:37 +0000)
gcc/

* config/aarch64/aarch64-simd.md
(aarch64_simd_bsl<mode>_internal): Remove float cases, canonicalize.
(aarch64_simd_bsl<mode>): Add gen_lowpart expressions where we
are punning between float vectors and integer vectors.

gcc/testsuite/

* gcc.target/aarch64/vbslq_f64_1.c: New.
* gcc.target/aarch64/vbslq_f64_2.c: Likewise.
* gcc.target/aarch64/vbslq_u64_1.c: Likewise.
* gcc.target/aarch64/vbslq_u64_2.c: Likewise.

From-SVN: r217362

gcc/ChangeLog
gcc/config/aarch64/aarch64-simd.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c [new file with mode: 0644]

index 3e2dec3223344dca129818f4061a244d939316b6..302958455a1ec113ddc36743e0aca0999fa21b82 100644 (file)
@@ -1,3 +1,10 @@
+2014-11-11  James Greenhalgh  <james.greenhalgh@arm.com>
+
+       * config/aarch64/aarch64-simd.md
+       (aarch64_simd_bsl<mode>_internal): Remove float cases, canonicalize.
+       (aarch64_simd_bsl<mode>): Add gen_lowpart expressions where we
+       are punning between float vectors and integer vectors.
+
 2014-11-11  Uros Bizjak  <ubizjak@gmail.com>
 
        * config/alpha/alpha.c (alpha_emit_conditional_branch): Replace
index ef196e4b6fb39c0d2fd9ebfee76abab8369b1e92..f7012ecab07c1b38836e949c2f4e5bd0c7939b5c 100644 (file)
 ;;     bif op0, op1, mask
 
 (define_insn "aarch64_simd_bsl<mode>_internal"
-  [(set (match_operand:VALLDIF 0 "register_operand"            "=w,w,w")
-       (ior:VALLDIF
-          (and:VALLDIF
-            (match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w")
-            (match_operand:VALLDIF 2 "register_operand"        " w,w,0"))
-          (and:VALLDIF
+  [(set (match_operand:VSDQ_I_DI 0 "register_operand"          "=w,w,w")
+       (ior:VSDQ_I_DI
+          (and:VSDQ_I_DI
             (not:<V_cmp_result>
-               (match_dup:<V_cmp_result> 1))
-            (match_operand:VALLDIF 3 "register_operand"        " w,0,w"))
+              (match_operand:<V_cmp_result> 1 "register_operand"       " 0,w,w"))
+            (match_operand:VSDQ_I_DI 3 "register_operand"      " w,0,w"))
+          (and:VSDQ_I_DI
+            (match_dup:<V_cmp_result> 1)
+            (match_operand:VSDQ_I_DI 2 "register_operand"      " w,w,0"))
        ))]
   "TARGET_SIMD"
   "@
  "TARGET_SIMD"
 {
   /* We can't alias operands together if they have different modes.  */
+  rtx tmp = operands[0];
+  if (FLOAT_MODE_P (<MODE>mode))
+    {
+      operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
+      operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
+      tmp = gen_reg_rtx (<V_cmp_result>mode);
+    }
   operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
-  emit_insn (gen_aarch64_simd_bsl<mode>_internal (operands[0], operands[1],
-                                                 operands[2], operands[3]));
+  emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
+                                                         operands[1],
+                                                         operands[2],
+                                                         operands[3]));
+  if (tmp != operands[0])
+    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
+
   DONE;
 })
 
index b301e055908c8a97114d91ce8fb70e2f045eef65..ae73a1013d199a31168d619c52a451a99c396477 100644 (file)
@@ -1,3 +1,10 @@
+2014-11-11  James Greenhalgh  <james.greenhalgh@arm.com>
+
+       * gcc.target/aarch64/vbslq_f64_1.c: New.
+       * gcc.target/aarch64/vbslq_f64_2.c: Likewise.
+       * gcc.target/aarch64/vbslq_u64_1.c: Likewise.
+       * gcc.target/aarch64/vbslq_u64_2.c: Likewise.
+
 2014-11-11  Paolo Carlini  <paolo.carlini@oracle.com>
 
        PR c++/63265
diff --git a/gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c b/gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c
new file mode 100644 (file)
index 0000000..128a1db
--- /dev/null
@@ -0,0 +1,21 @@
+/* Test vbslq_f64 can be folded.  */
+/* { dg-do assemble } */
+/* { dg-options "--save-temps -O3" } */
+
+#include <arm_neon.h>
+
+/* Folds to ret.  */
+
+float32x4_t
+fold_me (float32x4_t a, float32x4_t b)
+{
+  uint32x4_t mask = {-1, -1, -1, -1};
+  return vbslq_f32 (mask, a, b);
+}
+
+/* { dg-final { scan-assembler-not "bsl\\tv" } } */
+/* { dg-final { scan-assembler-not "bit\\tv" } } */
+/* { dg-final { scan-assembler-not "bif\\tv" } } */
+
+/* { dg-final { cleanup-saved-temps } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c b/gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c
new file mode 100644 (file)
index 0000000..62358bf
--- /dev/null
@@ -0,0 +1,24 @@
+/* Test vbslq_f64 can be folded.  */
+/* { dg-do assemble } */
+/* { dg-options "--save-temps -O3" } */
+
+#include <arm_neon.h>
+
+/* Should fold out one half of the BSL, leaving just a BIC.  */
+
+float32x4_t
+half_fold_me (uint32x4_t mask)
+{
+  float32x4_t a = {0.0, 0.0, 0.0, 0.0};
+  float32x4_t b = {2.0, 4.0, 8.0, 16.0};
+  return vbslq_f32 (mask, a, b);
+
+}
+
+/* { dg-final { scan-assembler-not "bsl\\tv" } } */
+/* { dg-final { scan-assembler-not "bit\\tv" } } */
+/* { dg-final { scan-assembler-not "bif\\tv" } } */
+/* { dg-final { scan-assembler "bic\\tv" } } */
+
+/* { dg-final { cleanup-saved-temps } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c b/gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c
new file mode 100644 (file)
index 0000000..7a4892e
--- /dev/null
@@ -0,0 +1,17 @@
+/* Test if a BSL-like instruction can be generated from a C idiom.  */
+/* { dg-do assemble } */
+/* { dg-options "--save-temps -O3" } */
+
+#include <arm_neon.h>
+
+/* Folds to BIF.  */
+
+uint32x4_t
+vbslq_dummy_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t mask)
+{
+  return (mask & a) | (~mask & b);
+}
+
+/* { dg-final { scan-assembler-times "bif\\tv" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c b/gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c
new file mode 100644 (file)
index 0000000..5b70168
--- /dev/null
@@ -0,0 +1,22 @@
+/* Test vbslq_u64 can be folded.  */
+/* { dg-do assemble } */
+/* { dg-options "--save-temps -O3" } */
+#include <arm_neon.h>
+
+/* Folds to BIC.  */
+
+int32x4_t
+half_fold_int (uint32x4_t mask)
+{
+  int32x4_t a = {0, 0, 0, 0};
+  int32x4_t b = {2, 4, 8, 16};
+  return vbslq_s32 (mask, a, b);
+}
+
+/* { dg-final { scan-assembler-not "bsl\\tv" } } */
+/* { dg-final { scan-assembler-not "bit\\tv" } } */
+/* { dg-final { scan-assembler-not "bif\\tv" } } */
+/* { dg-final { scan-assembler "bic\\tv" } } */
+
+/* { dg-final { cleanup-saved-temps } } */
+