From c8824f2ccb4bc1687a86c87a090c3589bce2053e Mon Sep 17 00:00:00 2001 From: James Greenhalgh Date: Tue, 11 Nov 2014 17:37:35 +0000 Subject: [PATCH] [Patch AArch64] Fix up BSL expander for floating point types gcc/ * config/aarch64/aarch64-simd.md (aarch64_simd_bsl_internal): Remove float cases, canonicalize. (aarch64_simd_bsl): Add gen_lowpart expressions where we are punning between float vectors and integer vectors. gcc/testsuite/ * gcc.target/aarch64/vbslq_f64_1.c: New. * gcc.target/aarch64/vbslq_f64_2.c: Likewise. * gcc.target/aarch64/vbslq_u64_1.c: Likewise. * gcc.target/aarch64/vbslq_u64_2.c: Likewise. From-SVN: r217362 --- gcc/ChangeLog | 7 ++++ gcc/config/aarch64/aarch64-simd.md | 32 +++++++++++++------ gcc/testsuite/ChangeLog | 7 ++++ .../gcc.target/aarch64/vbslq_f64_1.c | 21 ++++++++++++ .../gcc.target/aarch64/vbslq_f64_2.c | 24 ++++++++++++++ .../gcc.target/aarch64/vbslq_u64_1.c | 17 ++++++++++ .../gcc.target/aarch64/vbslq_u64_2.c | 22 +++++++++++++ 7 files changed, 120 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3e2dec32233..302958455a1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2014-11-11 James Greenhalgh + + * config/aarch64/aarch64-simd.md + (aarch64_simd_bsl_internal): Remove float cases, canonicalize. + (aarch64_simd_bsl): Add gen_lowpart expressions where we + are punning between float vectors and integer vectors. + 2014-11-11 Uros Bizjak * config/alpha/alpha.c (alpha_emit_conditional_branch): Replace diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index ef196e4b6fb..f7012ecab07 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1924,15 +1924,15 @@ ;; bif op0, op1, mask (define_insn "aarch64_simd_bsl_internal" - [(set (match_operand:VALLDIF 0 "register_operand" "=w,w,w") - (ior:VALLDIF - (and:VALLDIF - (match_operand: 1 "register_operand" " 0,w,w") - (match_operand:VALLDIF 2 "register_operand" " w,w,0")) - (and:VALLDIF + [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w") + (ior:VSDQ_I_DI + (and:VSDQ_I_DI (not: - (match_dup: 1)) - (match_operand:VALLDIF 3 "register_operand" " w,0,w")) + (match_operand: 1 "register_operand" " 0,w,w")) + (match_operand:VSDQ_I_DI 3 "register_operand" " w,0,w")) + (and:VSDQ_I_DI + (match_dup: 1) + (match_operand:VSDQ_I_DI 2 "register_operand" " w,w,0")) ))] "TARGET_SIMD" "@ @@ -1950,9 +1950,21 @@ "TARGET_SIMD" { /* We can't alias operands together if they have different modes. */ + rtx tmp = operands[0]; + if (FLOAT_MODE_P (mode)) + { + operands[2] = gen_lowpart (mode, operands[2]); + operands[3] = gen_lowpart (mode, operands[3]); + tmp = gen_reg_rtx (mode); + } operands[1] = gen_lowpart (mode, operands[1]); - emit_insn (gen_aarch64_simd_bsl_internal (operands[0], operands[1], - operands[2], operands[3])); + emit_insn (gen_aarch64_simd_bsl_internal (tmp, + operands[1], + operands[2], + operands[3])); + if (tmp != operands[0]) + emit_move_insn (operands[0], gen_lowpart (mode, tmp)); + DONE; }) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b301e055908..ae73a1013d1 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2014-11-11 James Greenhalgh + + * gcc.target/aarch64/vbslq_f64_1.c: New. + * gcc.target/aarch64/vbslq_f64_2.c: Likewise. + * gcc.target/aarch64/vbslq_u64_1.c: Likewise. + * gcc.target/aarch64/vbslq_u64_2.c: Likewise. + 2014-11-11 Paolo Carlini PR c++/63265 diff --git a/gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c b/gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c new file mode 100644 index 00000000000..128a1db2a66 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vbslq_f64_1.c @@ -0,0 +1,21 @@ +/* Test vbslq_f64 can be folded. */ +/* { dg-do assemble } */ +/* { dg-options "--save-temps -O3" } */ + +#include + +/* Folds to ret. */ + +float32x4_t +fold_me (float32x4_t a, float32x4_t b) +{ + uint32x4_t mask = {-1, -1, -1, -1}; + return vbslq_f32 (mask, a, b); +} + +/* { dg-final { scan-assembler-not "bsl\\tv" } } */ +/* { dg-final { scan-assembler-not "bit\\tv" } } */ +/* { dg-final { scan-assembler-not "bif\\tv" } } */ + +/* { dg-final { cleanup-saved-temps } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c b/gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c new file mode 100644 index 00000000000..62358bf5932 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vbslq_f64_2.c @@ -0,0 +1,24 @@ +/* Test vbslq_f64 can be folded. */ +/* { dg-do assemble } */ +/* { dg-options "--save-temps -O3" } */ + +#include + +/* Should fold out one half of the BSL, leaving just a BIC. */ + +float32x4_t +half_fold_me (uint32x4_t mask) +{ + float32x4_t a = {0.0, 0.0, 0.0, 0.0}; + float32x4_t b = {2.0, 4.0, 8.0, 16.0}; + return vbslq_f32 (mask, a, b); + +} + +/* { dg-final { scan-assembler-not "bsl\\tv" } } */ +/* { dg-final { scan-assembler-not "bit\\tv" } } */ +/* { dg-final { scan-assembler-not "bif\\tv" } } */ +/* { dg-final { scan-assembler "bic\\tv" } } */ + +/* { dg-final { cleanup-saved-temps } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c b/gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c new file mode 100644 index 00000000000..7a4892e9577 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vbslq_u64_1.c @@ -0,0 +1,17 @@ +/* Test if a BSL-like instruction can be generated from a C idiom. */ +/* { dg-do assemble } */ +/* { dg-options "--save-temps -O3" } */ + +#include + +/* Folds to BIF. */ + +uint32x4_t +vbslq_dummy_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t mask) +{ + return (mask & a) | (~mask & b); +} + +/* { dg-final { scan-assembler-times "bif\\tv" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c b/gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c new file mode 100644 index 00000000000..5b70168e391 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vbslq_u64_2.c @@ -0,0 +1,22 @@ +/* Test vbslq_u64 can be folded. */ +/* { dg-do assemble } */ +/* { dg-options "--save-temps -O3" } */ +#include + +/* Folds to BIC. */ + +int32x4_t +half_fold_int (uint32x4_t mask) +{ + int32x4_t a = {0, 0, 0, 0}; + int32x4_t b = {2, 4, 8, 16}; + return vbslq_s32 (mask, a, b); +} + +/* { dg-final { scan-assembler-not "bsl\\tv" } } */ +/* { dg-final { scan-assembler-not "bit\\tv" } } */ +/* { dg-final { scan-assembler-not "bif\\tv" } } */ +/* { dg-final { scan-assembler "bic\\tv" } } */ + +/* { dg-final { cleanup-saved-temps } } */ + -- 2.30.2