From 907ea3795591b85e2f4cf1c45a4cd8c1bd783892 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 11 Jan 2021 18:03:23 +0000 Subject: [PATCH] aarch64: Add support for unpacked SVE ABD This patch adds support for unpacked SVE SABD and UABD. It also rewrites the patterns so that they match as combine patterns without the need for REG_EQUAL notes. Finally, there was no pattern for merging with the second input, which can be handled by reversing the operands. The type suffix needs to be taken from the element size rather than the container size. gcc/ * config/aarch64/aarch64-sve.md (abd_3): Extend from SVE_FULL_I to SVE_I. (*aarch64_cond_abd_2): Likewise. (*aarch64_cond_abd_any): Likewise. (@aarch64_pred_abd): Likewise. Use UNSPEC_PRED_X for the max and min but not for the minus. (*aarch64_cond_abd_3): New pattern. gcc/testsuite/ * g++.target/aarch64/sve/abd_1.C: New test. * g++.target/aarch64/sve/cond_abd_1.C: Likewise. * g++.target/aarch64/sve/cond_abd_2.C: Likewise. * g++.target/aarch64/sve/cond_abd_3.C: Likewise. * g++.target/aarch64/sve/cond_abd_4.C: Likewise. --- gcc/config/aarch64/aarch64-sve.md | 100 ++++++++++++------ gcc/testsuite/g++.target/aarch64/sve/abd_1.C | 38 +++++++ .../g++.target/aarch64/sve/cond_abd_1.C | 60 +++++++++++ .../g++.target/aarch64/sve/cond_abd_2.C | 60 +++++++++++ .../g++.target/aarch64/sve/cond_abd_3.C | 49 +++++++++ .../g++.target/aarch64/sve/cond_abd_4.C | 43 ++++++++ 6 files changed, 317 insertions(+), 33 deletions(-) create mode 100644 gcc/testsuite/g++.target/aarch64/sve/abd_1.C create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_abd_1.C create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_abd_2.C create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_abd_3.C create mode 100644 gcc/testsuite/g++.target/aarch64/sve/cond_abd_4.C diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 27b7fd0fccd..8083749a07e 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3973,10 +3973,10 @@ ;; Unpredicated integer absolute difference. (define_expand "abd_3" - [(use (match_operand:SVE_FULL_I 0 "register_operand")) - (USMAX:SVE_FULL_I - (match_operand:SVE_FULL_I 1 "register_operand") - (match_operand:SVE_FULL_I 2 "register_operand"))] + [(use (match_operand:SVE_I 0 "register_operand")) + (USMAX:SVE_I + (match_operand:SVE_I 1 "register_operand") + (match_operand:SVE_I 2 "register_operand"))] "TARGET_SVE" { rtx pred = aarch64_ptrue_reg (mode); @@ -3988,17 +3988,20 @@ ;; Predicated integer absolute difference. (define_insn "@aarch64_pred_abd" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") - (unspec:SVE_FULL_I - [(match_operand: 1 "register_operand" "Upl, Upl") - (minus:SVE_FULL_I - (USMAX:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "%0, w") - (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) - (:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (minus:SVE_I + (unspec:SVE_I + [(match_operand: 1 "register_operand" "Upl, Upl") + (USMAX:SVE_I + (match_operand:SVE_I 2 "register_operand" "%0, w") + (match_operand:SVE_I 3 "register_operand" "w, w"))] + UNSPEC_PRED_X) + (unspec:SVE_I + [(match_dup 1) + (:SVE_I (match_dup 2) - (match_dup 3)))] - UNSPEC_PRED_X))] + (match_dup 3))] + UNSPEC_PRED_X)))] "TARGET_SVE" "@ abd\t%0., %1/m, %0., %3. @@ -4033,19 +4036,19 @@ ;; Predicated integer absolute difference, merging with the first input. (define_insn_and_rewrite "*aarch64_cond_abd_2" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I [(match_operand: 1 "register_operand" "Upl, Upl") - (minus:SVE_FULL_I - (unspec:SVE_FULL_I + (minus:SVE_I + (unspec:SVE_I [(match_operand 4) - (USMAX:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "0, w") - (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))] + (USMAX:SVE_I + (match_operand:SVE_I 2 "register_operand" "0, w") + (match_operand:SVE_I 3 "register_operand" "w, w"))] UNSPEC_PRED_X) - (unspec:SVE_FULL_I + (unspec:SVE_I [(match_operand 5) - (:SVE_FULL_I + (:SVE_I (match_dup 2) (match_dup 3))] UNSPEC_PRED_X)) @@ -4062,25 +4065,56 @@ [(set_attr "movprfx" "*,yes")] ) +;; Predicated integer absolute difference, merging with the second input. +(define_insn_and_rewrite "*aarch64_cond_abd_3" + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I + [(match_operand: 1 "register_operand" "Upl, Upl") + (minus:SVE_I + (unspec:SVE_I + [(match_operand 4) + (USMAX:SVE_I + (match_operand:SVE_I 2 "register_operand" "w, w") + (match_operand:SVE_I 3 "register_operand" "0, w"))] + UNSPEC_PRED_X) + (unspec:SVE_I + [(match_operand 5) + (:SVE_I + (match_dup 2) + (match_dup 3))] + UNSPEC_PRED_X)) + (match_dup 3)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + abd\t%0., %1/m, %0., %2. + movprfx\t%0, %3\;abd\t%0., %1/m, %0., %2." + "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))" + { + operands[4] = operands[5] = CONSTM1_RTX (mode); + } + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated integer absolute difference, merging with an independent value. (define_insn_and_rewrite "*aarch64_cond_abd_any" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") + (unspec:SVE_I [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") - (minus:SVE_FULL_I - (unspec:SVE_FULL_I + (minus:SVE_I + (unspec:SVE_I [(match_operand 5) - (USMAX:SVE_FULL_I - (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w") - (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w"))] + (USMAX:SVE_I + (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w") + (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))] UNSPEC_PRED_X) - (unspec:SVE_FULL_I + (unspec:SVE_I [(match_operand 6) - (:SVE_FULL_I + (:SVE_I (match_dup 2) (match_dup 3))] UNSPEC_PRED_X)) - (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] + (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[4]) diff --git a/gcc/testsuite/g++.target/aarch64/sve/abd_1.C b/gcc/testsuite/g++.target/aarch64/sve/abd_1.C new file mode 100644 index 00000000000..0facf58ad10 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/abd_1.C @@ -0,0 +1,38 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b) \ + { \ + return (a > b ? a : b) - (a < b ? a : b); \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (int8_t, 32) +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (int8_t, 64) +TEST_TYPE (uint8_t, 64) +TEST_TYPE (int16_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (int8_t, 128) +TEST_TYPE (uint8_t, 128) +TEST_TYPE (int16_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (int32_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_abd_1.C b/gcc/testsuite/g++.target/aarch64/sve/cond_abd_1.C new file mode 100644 index 00000000000..a93d1d1fd99 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_abd_1.C @@ -0,0 +1,60 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c) \ + { \ + return c == 0 ? (a > b ? a : b) - (a < b ? a : b) : a; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (int8_t, 32) +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (int8_t, 64) +TEST_TYPE (uint8_t, 64) +TEST_TYPE (int16_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (int8_t, 128) +TEST_TYPE (uint8_t, 128) +TEST_TYPE (int16_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (int32_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x0\]\n[^L]*\tsabd\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x0\]\n[^L]*\tsabd\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x0\]\n[^L]*\tsabd\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x0\]\n[^L]*\tsabd\t\1\.h, p[0-7]/m, \1\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x0\]\n[^L]*\tsabd\t\1\.h, p[0-7]/m, \1\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x0\]\n[^L]*\tsabd\t\1\.s, p[0-7]/m, \1\.s, z[0-9]+\.s\n} } } */ + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x1\]\n[^L]*\tsabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, \1\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x1\]\n[^L]*\tsabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, \1\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x1\]\n[^L]*\tsabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, \1\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x1\]\n[^L]*\tsabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, \1\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x1\]\n[^L]*\tsabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, \1\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x1\]\n[^L]*\tsabd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, \1\.s\n} } } */ + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x0\]\n[^L]*\tuabd\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x0\]\n[^L]*\tuabd\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x0\]\n[^L]*\tuabd\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x0\]\n[^L]*\tuabd\t\1\.h, p[0-7]/m, \1\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x0\]\n[^L]*\tuabd\t\1\.h, p[0-7]/m, \1\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x0\]\n[^L]*\tuabd\t\1\.s, p[0-7]/m, \1\.s, z[0-9]+\.s\n} } } */ + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x1\]\n[^L]*\tuabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, \1\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x1\]\n[^L]*\tuabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, \1\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x1\]\n[^L]*\tuabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, \1\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x1\]\n[^L]*\tuabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, \1\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x1\]\n[^L]*\tuabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, \1\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x1\]\n[^L]*\tuabd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, \1\.s\n} } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_abd_2.C b/gcc/testsuite/g++.target/aarch64/sve/cond_abd_2.C new file mode 100644 index 00000000000..ad6304aaaf8 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_abd_2.C @@ -0,0 +1,60 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c) \ + { \ + return c == 0 ? (a > b ? a : b) - (a < b ? a : b) : b; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (int8_t, 32) +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (int8_t, 64) +TEST_TYPE (uint8_t, 64) +TEST_TYPE (int16_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (int8_t, 128) +TEST_TYPE (uint8_t, 128) +TEST_TYPE (int16_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (int32_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x1\]\n[^L]*\tsabd\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x1\]\n[^L]*\tsabd\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x1\]\n[^L]*\tsabd\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x1\]\n[^L]*\tsabd\t\1\.h, p[0-7]/m, \1\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x1\]\n[^L]*\tsabd\t\1\.h, p[0-7]/m, \1\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x1\]\n[^L]*\tsabd\t\1\.s, p[0-7]/m, \1\.s, z[0-9]+\.s\n} } } */ + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x0\]\n[^L]*\tsabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, \1\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x0\]\n[^L]*\tsabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, \1\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x0\]\n[^L]*\tsabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, \1\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x0\]\n[^L]*\tsabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, \1\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x0\]\n[^L]*\tsabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, \1\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x0\]\n[^L]*\tsabd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, \1\.s\n} } } */ + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x1\]\n[^L]*\tuabd\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x1\]\n[^L]*\tuabd\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x1\]\n[^L]*\tuabd\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x1\]\n[^L]*\tuabd\t\1\.h, p[0-7]/m, \1\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x1\]\n[^L]*\tuabd\t\1\.h, p[0-7]/m, \1\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x1\]\n[^L]*\tuabd\t\1\.s, p[0-7]/m, \1\.s, z[0-9]+\.s\n} } } */ + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x0\]\n[^L]*\tuabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, \1\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x0\]\n[^L]*\tuabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, \1\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x0\]\n[^L]*\tuabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, \1\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x0\]\n[^L]*\tuabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, \1\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x0\]\n[^L]*\tuabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, \1\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x0\]\n[^L]*\tuabd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, \1\.s\n} } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_abd_3.C b/gcc/testsuite/g++.target/aarch64/sve/cond_abd_3.C new file mode 100644 index 00000000000..29a8d2f4f77 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_abd_3.C @@ -0,0 +1,49 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c) \ + { \ + return c == 0 ? (a > b ? a : b) - (a < b ? a : b) : c; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (int8_t, 32) +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (int8_t, 64) +TEST_TYPE (uint8_t, 64) +TEST_TYPE (int16_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (int8_t, 128) +TEST_TYPE (uint8_t, 128) +TEST_TYPE (int16_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (int32_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x2\]\n[^L]*\tsabd\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x2\]\n[^L]*\tsabd\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x2\]\n[^L]*\tsabd\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x2\]\n[^L]*\tsabd\t\1\.h, p[0-7]/m, \1\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x2\]\n[^L]*\tsabd\t\1\.h, p[0-7]/m, \1\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x2\]\n[^L]*\tsabd\t\1\.s, p[0-7]/m, \1\.s, z[0-9]+\.s\n} } } */ + +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.h, p[0-7]/z, \[x2\]\n[^L]*\tuabd\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.s, p[0-7]/z, \[x2\]\n[^L]*\tuabd\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[x2\]\n[^L]*\tuabd\t\1\.b, p[0-7]/m, \1\.b, z[0-9]+\.b\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.s, p[0-7]/z, \[x2\]\n[^L]*\tuabd\t\1\.h, p[0-7]/m, \1\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+)\.d, p[0-7]/z, \[x2\]\n[^L]*\tuabd\t\1\.h, p[0-7]/m, \1\.h, z[0-9]+\.h\n} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+)\.d, p[0-7]/z, \[x2\]\n[^L]*\tuabd\t\1\.s, p[0-7]/m, \1\.s, z[0-9]+\.s\n} } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/cond_abd_4.C b/gcc/testsuite/g++.target/aarch64/sve/cond_abd_4.C new file mode 100644 index 00000000000..b4157315652 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/cond_abd_4.C @@ -0,0 +1,43 @@ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */ + +#include + +#define TEST_OP(TYPE) \ + TYPE \ + test##_##TYPE##_reg (TYPE a, TYPE b, TYPE c) \ + { \ + return c == 1 ? (a > b ? a : b) - (a < b ? a : b) : 0; \ + } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + TEST_OP (TYPE##SIZE) + +TEST_TYPE (int8_t, 32) +TEST_TYPE (uint8_t, 32) + +TEST_TYPE (int8_t, 64) +TEST_TYPE (uint8_t, 64) +TEST_TYPE (int16_t, 64) +TEST_TYPE (uint16_t, 64) + +TEST_TYPE (int8_t, 128) +TEST_TYPE (uint8_t, 128) +TEST_TYPE (int16_t, 128) +TEST_TYPE (uint16_t, 128) +TEST_TYPE (int32_t, 128) +TEST_TYPE (uint32_t, 128) + +/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z, z[0-9]+\.b\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ -- 2.30.2