From: Richard Sandiford Date: Fri, 8 Jan 2021 10:49:38 +0000 (+0000) Subject: aarch64: Support unpacked CNOT on SVE X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=5fe3e6bf061da8d9b0e759927c340fe8e0f44725;p=gcc.git aarch64: Support unpacked CNOT on SVE This patch adds unpacked support for unconditional and conditional CNOT. The type suffix has to be taken from the element size rather than the container size. gcc/ * config/aarch64/aarch64-sve.md (*cnot): Extend from SVE_FULL_I to SVE_I. (*cond_cnot_2, *cond_cnot_any): Likewise. gcc/testsuite/ * gcc.target/aarch64/sve/cnot_2.c: New test. * gcc.target/aarch64/sve/cond_cnot_4.c: Likewise. * gcc.target/aarch64/sve/cond_cnot_4_run.c: Likewise. * gcc.target/aarch64/sve/cond_cnot_5.c: Likewise. * gcc.target/aarch64/sve/cond_cnot_5_run.c: Likewise. * gcc.target/aarch64/sve/cond_cnot_6.c: Likewise. * gcc.target/aarch64/sve/cond_cnot_6_run.c: Likewise. --- diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index b83f9912cb6..2f5a5e3c914 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3227,16 +3227,16 @@ ) (define_insn "*cnot" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I [(unspec: [(match_operand: 1 "register_operand" "Upl, Upl") (match_operand:SI 5 "aarch64_sve_ptrue_flag") (eq: - (match_operand:SVE_FULL_I 2 "register_operand" "0, w") - (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))] + (match_operand:SVE_I 2 "register_operand" "0, w") + (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))] UNSPEC_PRED_Z) - (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one") + (match_operand:SVE_I 4 "aarch64_simd_imm_one") (match_dup 3)] UNSPEC_SEL))] "TARGET_SVE" @@ -3274,19 +3274,19 @@ ;; Predicated logical inverse, merging with the first input. (define_insn_and_rewrite "*cond_cnot_2" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") + (unspec:SVE_I [(match_operand: 1 "register_operand" "Upl, Upl") ;; Logical inverse of operand 2 (as above). - (unspec:SVE_FULL_I + (unspec:SVE_I [(unspec: [(match_operand 5) (const_int SVE_KNOWN_PTRUE) (eq: - (match_operand:SVE_FULL_I 2 "register_operand" "0, w") - (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))] + (match_operand:SVE_I 2 "register_operand" "0, w") + (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))] UNSPEC_PRED_Z) - (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one") + (match_operand:SVE_I 4 "aarch64_simd_imm_one") (match_dup 3)] UNSPEC_SEL) (match_dup 2)] @@ -3310,22 +3310,22 @@ ;; as earlyclobber helps to make the instruction more regular to the ;; register allocator. (define_insn_and_rewrite "*cond_cnot_any" - [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w") - (unspec:SVE_FULL_I + [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w") + (unspec:SVE_I [(match_operand: 1 "register_operand" "Upl, Upl, Upl") ;; Logical inverse of operand 2 (as above). - (unspec:SVE_FULL_I + (unspec:SVE_I [(unspec: [(match_operand 5) (const_int SVE_KNOWN_PTRUE) (eq: - (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w") - (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))] + (match_operand:SVE_I 2 "register_operand" "w, w, w") + (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))] UNSPEC_PRED_Z) - (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one") + (match_operand:SVE_I 4 "aarch64_simd_imm_one") (match_dup 3)] UNSPEC_SEL) - (match_operand:SVE_FULL_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")] + (match_operand:SVE_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])" "@ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c new file mode 100644 index 00000000000..fe778234424 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cnot_2.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define DEF_LOOP(TYPE1, TYPE2, COUNT) \ + void __attribute__ ((noipa)) \ + test_##TYPE1##_##TYPE2##_##TYPE3 (TYPE2 *restrict r, \ + TYPE1 *restrict pred, \ + TYPE2 *restrict a) \ + { \ + for (int i = 0; i < COUNT; ++i) \ + if (pred[i]) \ + r[i] = !a[i]; \ + } + +#define TEST_ALL(T) \ + T (int16_t, int8_t, 7) \ + T (int32_t, int8_t, 3) \ + T (int32_t, int16_t, 3) \ + T (int64_t, int8_t, 5) \ + T (int64_t, int16_t, 5) \ + T (int64_t, int32_t, 5) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c new file mode 100644 index 00000000000..729d3f4f2ac --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define DEF_LOOP(TYPE1, TYPE2, COUNT) \ + void __attribute__ ((noipa)) \ + test_##TYPE1##_##TYPE2 (TYPE2 *__restrict r, \ + TYPE2 *__restrict a, \ + TYPE1 *__restrict pred) \ + { \ + for (int i = 0; i < COUNT; ++i) \ + r[i] = pred[i] ? !a[i] : a[i]; \ + } + +#define TEST_ALL(T) \ + T (int16_t, int8_t, 7) \ + T (int32_t, int8_t, 3) \ + T (int32_t, int16_t, 3) \ + T (int64_t, int8_t, 5) \ + T (int64_t, int16_t, 5) \ + T (int64_t, int32_t, 5) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-not {\tmov\tz} } } */ +/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4_run.c new file mode 100644 index 00000000000..de9c0a502e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_4_run.c @@ -0,0 +1,26 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "cond_cnot_4.c" + +#define TEST_LOOP(TYPE1, TYPE2, N) \ + { \ + TYPE1 pred[N]; \ + TYPE2 r[N], a[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = i & 1 ? 0 : 3 * (i + 1); \ + pred[i] = (i % 3 < 2); \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE1##_##TYPE2 (r, a, pred); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (TYPE2) (pred[i] ? !a[i] : a[i])) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5.c new file mode 100644 index 00000000000..7318e108591 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define DEF_LOOP(TYPE1, TYPE2, COUNT) \ + void __attribute__ ((noipa)) \ + test_##TYPE1##_##TYPE2 (TYPE2 *__restrict r, \ + TYPE1 *__restrict a, \ + TYPE2 *__restrict b) \ + { \ + for (int i = 0; i < COUNT; ++i) \ + r[i] = a[i] == 0 ? !b[i] : a[i]; \ + } + +#define TEST_ALL(T) \ + T (int16_t, int8_t, 7) \ + T (int32_t, int8_t, 3) \ + T (int32_t, int16_t, 3) \ + T (int64_t, int8_t, 5) \ + T (int64_t, int16_t, 5) \ + T (int64_t, int32_t, 5) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-not {\tmov\tz} } } */ +/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5_run.c new file mode 100644 index 00000000000..f8f277c32c2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_5_run.c @@ -0,0 +1,26 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "cond_cnot_5.c" + +#define TEST_LOOP(TYPE1, TYPE2, N) \ + { \ + TYPE1 a[N]; \ + TYPE2 r[N], b[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = i % 3 < 2 ? 0 : i * 42; \ + b[i] = i & 1 ? 0 : 3 * (i + 1); \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE1##_##TYPE2 (r, a, b); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (TYPE2) (a[i] == 0 ? !b[i] : a[i])) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c new file mode 100644 index 00000000000..d44e357f44a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include + +#define DEF_LOOP(TYPE1, TYPE2, COUNT) \ + void __attribute__ ((noipa)) \ + test_##TYPE1##_##TYPE2 (TYPE2 *__restrict r, \ + TYPE1 *__restrict a, \ + TYPE2 *__restrict b) \ + { \ + for (int i = 0; i < COUNT; ++i) \ + r[i] = a[i] == 0 ? !b[i] : 127; \ + } + +#define TEST_ALL(T) \ + T (int16_t, int8_t, 7) \ + T (int32_t, int8_t, 3) \ + T (int32_t, int16_t, 3) \ + T (int64_t, int8_t, 5) \ + T (int64_t, int16_t, 5) \ + T (int64_t, int32_t, 5) + +TEST_ALL (DEF_LOOP) + +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 3 } } */ +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ +/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ + +/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6_run.c new file mode 100644 index 00000000000..9e33616dc8f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_6_run.c @@ -0,0 +1,26 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "cond_cnot_6.c" + +#define TEST_LOOP(TYPE1, TYPE2, N) \ + { \ + TYPE1 a[N]; \ + TYPE2 r[N], b[N]; \ + for (int i = 0; i < N; ++i) \ + { \ + a[i] = i % 3 < 2 ? 0 : i * 42; \ + b[i] = i & 1 ? 0 : 3 * (i + 1); \ + asm volatile ("" ::: "memory"); \ + } \ + test_##TYPE1##_##TYPE2 (r, a, b); \ + for (int i = 0; i < N; ++i) \ + if (r[i] != (TYPE2) (a[i] == 0 ? !b[i] : 127)) \ + __builtin_abort (); \ + } + +int main () +{ + TEST_ALL (TEST_LOOP) + return 0; +}