From 5169fa77322e36dd4783bc5126185159c35a3584 Mon Sep 17 00:00:00 2001 From: Sylvia Taylor Date: Tue, 9 Jul 2019 12:51:55 +0000 Subject: [PATCH] [aarch64]: redefine aes patterns This first patch removes aarch64 usage of the aese/aesmc and aesd/aesimc fusions (i.e. aes fusion) implemented in the scheduler due to unpredictable behaviour observed in cases such as: - when register allocation goes bad (e.g. extra movs) - aes operations with xor and zeroed keys among interleaved operations A more stable version should be provided by instead doing the aes fusion during the combine pass. Since the aese and aesd patterns have been rewritten as encapsulating a xor operation, the existing combine fusion patterns have also been updated. The purpose is to simplify the need of having additional combine patterns for cases like the ones below: For AESE (though it also applies to AESD as both have a xor operation): data = data ^ key; data = vaeseq_u8(data, zero); --- eor v1.16b, v0.16b, v1.16b aese v1.16b, v2.16b Should mean and generate the same as: data = vaeseq_u8(data, key); --- aese v1.16b, v0.16b 2019-07-09 Sylvia Taylor * config/aarch64/aarch64-simd.md (aarch64_crypto_aesv16qi): Redefine pattern with xor. (aarch64_crypto_aesv16qi): Remove attribute enabled. (*aarch64_crypto_aesv16qi_xor_combine): Remove both. (*aarch64_crypto_aese_fused, *aarch64_crypto_aesd_fused): Update to new definition. * config/aarch64/aarch64.c (aarch_macro_fusion_pair_p): Remove aese/aesmc fusion check. * gcc.target/aarch64/crypto-fuse-1.c: Remove. * gcc.target/aarch64/crypto-fuse-2.c: Remove. * gcc.target/aarch64/aes-fuse-1.c: New testcase. * gcc.target/aarch64/aes-fuse-2.c: New testcase. From-SVN: r273304 --- gcc/ChangeLog | 11 +++ gcc/config/aarch64/aarch64-simd.md | 67 ++++++------------- gcc/config/aarch64/aarch64.c | 4 -- gcc/testsuite/ChangeLog | 7 ++ .../aarch64/{crypto-fuse-1.c => aes-fuse-1.c} | 45 +++++++++---- gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c | 65 ++++++++++++++++++ .../gcc.target/aarch64/crypto-fuse-2.c | 45 ------------- 7 files changed, 135 insertions(+), 109 deletions(-) rename gcc/testsuite/gcc.target/aarch64/{crypto-fuse-1.c => aes-fuse-1.c} (51%) create mode 100644 gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c delete mode 100644 gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 271a27abfc6..25959b9453c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2019-07-09 Sylvia Taylor + + * config/aarch64/aarch64-simd.md + (aarch64_crypto_aesv16qi): Redefine pattern with xor. + (aarch64_crypto_aesv16qi): Remove attribute enabled. + (*aarch64_crypto_aesv16qi_xor_combine): Remove both. + (*aarch64_crypto_aese_fused, + *aarch64_crypto_aesd_fused): Update to new definition. + * config/aarch64/aarch64.c + (aarch_macro_fusion_pair_p): Remove aese/aesmc fusion check. + 2019-07-09 Richard Biener * gimple-match.h (gimple_match_op::resimplify): New. diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 837242c7e56..0c2600f1fc6 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -6053,56 +6053,23 @@ (define_insn "aarch64_crypto_aesv16qi" [(set (match_operand:V16QI 0 "register_operand" "=w") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0") - (match_operand:V16QI 2 "register_operand" "w")] + (unspec:V16QI + [(xor:V16QI + (match_operand:V16QI 1 "register_operand" "%0") + (match_operand:V16QI 2 "register_operand" "w"))] CRYPTO_AES))] "TARGET_SIMD && TARGET_AES" "aes\\t%0.16b, %2.16b" [(set_attr "type" "crypto_aese")] ) -(define_insn "*aarch64_crypto_aesv16qi_xor_combine" - [(set (match_operand:V16QI 0 "register_operand" "=w") - (unspec:V16QI [(xor:V16QI - (match_operand:V16QI 1 "register_operand" "%0") - (match_operand:V16QI 2 "register_operand" "w")) - (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")] - CRYPTO_AES))] - "TARGET_SIMD && TARGET_AES" - "aes\\t%0.16b, %2.16b" - [(set_attr "type" "crypto_aese")] -) - -(define_insn "*aarch64_crypto_aesv16qi_xor_combine" - [(set (match_operand:V16QI 0 "register_operand" "=w") - (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "") - (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0") - (match_operand:V16QI 2 "register_operand" "w"))] - CRYPTO_AES))] - "TARGET_SIMD && TARGET_AES" - "aes\\t%0.16b, %2.16b" - [(set_attr "type" "crypto_aese")] -) - -;; When AES/AESMC fusion is enabled we want the register allocation to -;; look like: -;; AESE Vn, _ -;; AESMC Vn, Vn -;; So prefer to tie operand 1 to operand 0 when fusing. - (define_insn "aarch64_crypto_aesv16qi" - [(set (match_operand:V16QI 0 "register_operand" "=w,w") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")] + [(set (match_operand:V16QI 0 "register_operand" "=w") + (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")] CRYPTO_AESMC))] "TARGET_SIMD && TARGET_AES" "aes\\t%0.16b, %1.16b" - [(set_attr "type" "crypto_aesmc") - (set_attr_alternative "enabled" - [(if_then_else (match_test - "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)") - (const_string "yes" ) - (const_string "no")) - (const_string "yes")])] + [(set_attr "type" "crypto_aesmc")] ) ;; When AESE/AESMC fusion is enabled we really want to keep the two together @@ -6111,12 +6078,14 @@ ;; Mash the two together during combine. (define_insn "*aarch64_crypto_aese_fused" - [(set (match_operand:V16QI 0 "register_operand" "=&w") + [(set (match_operand:V16QI 0 "register_operand" "=w") (unspec:V16QI [(unspec:V16QI - [(match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE) - ] UNSPEC_AESMC))] + [(xor:V16QI + (match_operand:V16QI 1 "register_operand" "%0") + (match_operand:V16QI 2 "register_operand" "w"))] + UNSPEC_AESE)] + UNSPEC_AESMC))] "TARGET_SIMD && TARGET_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b" @@ -6130,12 +6099,14 @@ ;; Mash the two together during combine. (define_insn "*aarch64_crypto_aesd_fused" - [(set (match_operand:V16QI 0 "register_operand" "=&w") + [(set (match_operand:V16QI 0 "register_operand" "=w") (unspec:V16QI [(unspec:V16QI - [(match_operand:V16QI 1 "register_operand" "0") - (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD) - ] UNSPEC_AESIMC))] + [(xor:V16QI + (match_operand:V16QI 1 "register_operand" "%0") + (match_operand:V16QI 2 "register_operand" "w"))] + UNSPEC_AESD)] + UNSPEC_AESIMC))] "TARGET_SIMD && TARGET_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b" diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index a18fbd0f0aa..e4e9e3fb02c 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -17965,10 +17965,6 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) } } - if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC) - && aarch_crypto_can_dual_issue (prev, curr)) - return true; - if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH) && any_condjump_p (curr)) { diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6f334ef888e..3e36c0a2f4d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2019-07-09 Sylvia Taylor + + * gcc.target/aarch64/crypto-fuse-1.c: Remove. + * gcc.target/aarch64/crypto-fuse-2.c: Remove. + * gcc.target/aarch64/aes-fuse-1.c: New testcase. + * gcc.target/aarch64/aes-fuse-2.c: New testcase. + 2019-07-09 Christophe Lyon * gcc.target/arm/cmse/bitfield-1.c: Fix address of .gnu.sgstubs diff --git a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-1.c b/gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c similarity index 51% rename from gcc/testsuite/gcc.target/aarch64/crypto-fuse-1.c rename to gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c index d8adc89466c..d7b4f89919d 100644 --- a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-1.c +++ b/gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c @@ -1,45 +1,66 @@ /* { dg-do compile } */ /* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */ +/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/ #include #define AESE(r, v, key) (r = vaeseq_u8 ((v), (key))); #define AESMC(r, i) (r = vaesmcq_u8 (i)) +const uint8x16_t zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + uint8x16_t dummy; uint8x16_t a; uint8x16_t b; uint8x16_t c; uint8x16_t d; -uint8x16_t e; +uint8x16_t x; +uint8x16_t y; +uint8x16_t k; + +void foo (void) -void -foo (void) { - AESE (a, a, e); + AESE (a, a, k); dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy); - AESE (b, b, e); + AESE (b, b, k); dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy); - AESE (c, c, e); + AESE (c, c, k); dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy); - AESE (d, d, e); + AESE (d, d, k); dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy); - AESMC (a, a); + x = x ^ k; + AESE (x, x, zero); dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy); - AESMC (b, b); + y = y ^ k; + AESE (y, y, zero); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + + AESMC (d, d); dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy); AESMC (c, c); dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy); - AESMC (d, d); -} + AESMC (b, b); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESMC (a, a); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); -/* { dg-final { scan-assembler-times "crypto_aese_fused" 4 } } */ + AESMC (y, y); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESMC (x, x); +} +/* { dg-final { scan-assembler-times "crypto_aese_fused" 6 } } */ +/* { dg-final { scan-assembler-not "veor" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c b/gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c new file mode 100644 index 00000000000..dfe01b03a36 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c @@ -0,0 +1,65 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */ +/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/ + +#include + +#define AESD(r, v, key) (r = vaesdq_u8 ((v), (key))); +#define AESIMC(r, i) (r = vaesimcq_u8 (i)) + +const uint8x16_t zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +uint8x16_t dummy; +uint8x16_t a; +uint8x16_t b; +uint8x16_t c; +uint8x16_t d; +uint8x16_t x; +uint8x16_t y; +uint8x16_t k; + +void foo (void) +{ + AESD (a, a, k); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESD (b, b, k); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESD (c, c, k); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESD (d, d, k); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + + x = x ^ k; + AESD (x, x, zero); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + y = y ^ k; + AESD (y, y, zero); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + + AESIMC (d, d); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESIMC (c, c); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESIMC (b, b); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESIMC (a, a); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + + AESIMC (y, y); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESIMC (x, x); +} + +/* { dg-final { scan-assembler-times "crypto_aesd_fused" 6 } } */ +/* { dg-final { scan-assembler-not "veor" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c b/gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c deleted file mode 100644 index b12df2d3ee4..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c +++ /dev/null @@ -1,45 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */ - -#include - -#define AESE(r, v, key) (r = vaesdq_u8 ((v), (key))); -#define AESMC(r, i) (r = vaesimcq_u8 (i)) - -uint8x16_t dummy; -uint8x16_t a; -uint8x16_t b; -uint8x16_t c; -uint8x16_t d; -uint8x16_t e; - -void -foo (void) -{ - AESE (a, a, e); - dummy = vaddq_u8 (dummy, dummy); - dummy = vaddq_u8 (dummy, dummy); - AESE (b, b, e); - dummy = vaddq_u8 (dummy, dummy); - dummy = vaddq_u8 (dummy, dummy); - AESE (c, c, e); - dummy = vaddq_u8 (dummy, dummy); - dummy = vaddq_u8 (dummy, dummy); - AESE (d, d, e); - dummy = vaddq_u8 (dummy, dummy); - dummy = vaddq_u8 (dummy, dummy); - - AESMC (a, a); - dummy = vaddq_u8 (dummy, dummy); - dummy = vaddq_u8 (dummy, dummy); - AESMC (b, b); - dummy = vaddq_u8 (dummy, dummy); - dummy = vaddq_u8 (dummy, dummy); - AESMC (c, c); - dummy = vaddq_u8 (dummy, dummy); - dummy = vaddq_u8 (dummy, dummy); - AESMC (d, d); -} - -/* { dg-final { scan-assembler-times "crypto_aesd_fused" 4 } } */ - -- 2.30.2