[patch1/2][arm][PR90317]: fix sha1 patterns
authorSylvia Taylor <sylvia.taylor@arm.com>
Thu, 18 Jul 2019 15:42:13 +0000 (15:42 +0000)
committerKyrylo Tkachov <ktkachov@gcc.gnu.org>
Thu, 18 Jul 2019 15:42:13 +0000 (15:42 +0000)
This patch fixes:

1) Ice message thrown when using the crypto_sha1h intrinsic due to
incompatible mode used for zero_extend. Removed zero extend as it is
not a good choice for vector modes and using an equivalent single
mode like TI (128bits) instead of V4SI produces extra instructions
making it inefficient.

This affects gcc version 8 and above.

2) Incorrect combine optimizations made due to vec_select usage
in the sha1 patterns on arm. The patterns should only combine
a vec select within a sha1h<op> instruction when the lane is 0.

This affects gcc version 5 and above.

- Fixed by explicitly declaring the valid const int for such
optimizations. For cases when the lane is not 0, the vector
lane selection now occurs in a e.g. vmov instruction prior
to sha1h<op>.

- Updated the sha1h testcases on arm to check for additional
cases with custom vector lane selection.

The intrinsic functions for the sha1 patterns have also been
simplified which seems to eliminate extra vmovs like:
- vmov.i32 q8, #0.

2019-07-18  Sylvia Taylor  <sylvia.taylor@arm.com>

        PR target/90317
        * config/arm/arm_neon.h
        (vsha1h_u32): Refactor.
        (vsha1cq_u32): Likewise.
        (vsha1pq_u32): Likewise.
        (vsha1mq_u32): Likewise.
        * config/arm/crypto.md:
        (crypto_sha1h): Remove zero extend, correct vec select.
        (crypto_sha1c): Correct vec select.
        (crypto_sha1m): Likewise.
        (crypto_sha1p): Likewise.

        * gcc.target/arm/crypto-vsha1cq_u32.c (foo): Change return type to
        uint32_t.
        (GET_LANE, TEST_SHA1C_VEC_SELECT): New.
        * gcc.target/arm/crypto-vsha1h_u32.c (foo): Change return type to
        uint32_t.
        (GET_LANE, TEST_SHA1H_VEC_SELECT): New.
        * gcc.target/arm/crypto-vsha1mq_u32.c (foo): Change return type to
        uint32_t.
        (GET_LANE, TEST_SHA1M_VEC_SELECT): New.
        * gcc.target/arm/crypto-vsha1pq_u32.c (foo): Change return type to
        uint32_t.
        (GET_LANE, TEST_SHA1P_VEC_SELECT): New.

From-SVN: r273574

gcc/ChangeLog
gcc/config/arm/arm_neon.h
gcc/config/arm/crypto.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/arm/crypto-vsha1cq_u32.c
gcc/testsuite/gcc.target/arm/crypto-vsha1h_u32.c
gcc/testsuite/gcc.target/arm/crypto-vsha1mq_u32.c
gcc/testsuite/gcc.target/arm/crypto-vsha1pq_u32.c

index a4a625e7eb039eac0947889930bc263c7e9ee386..668dc40b7fab2176f14288ff3271f48b1e14acf3 100644 (file)
@@ -1,3 +1,17 @@
+2019-07-18  Sylvia Taylor  <sylvia.taylor@arm.com>
+
+        PR target/90317
+        * config/arm/arm_neon.h
+        (vsha1h_u32): Refactor.
+        (vsha1cq_u32): Likewise.
+        (vsha1pq_u32): Likewise.
+        (vsha1mq_u32): Likewise.
+        * config/arm/crypto.md:
+        (crypto_sha1h): Remove zero extend, correct vec select.
+        (crypto_sha1c): Correct vec select.
+        (crypto_sha1m): Likewise.
+        (crypto_sha1p): Likewise.
+
 2019-07-18  Richard Earnshaw  <rearnsha@arm.com>
 
        * config/arm/predicates.md (arm_borrow_operation): New predicate.
index 6b982392ece69bb245ffd3bdc34d09c6f01745eb..1f200d491d1de3993bc3a682d586da137958ff6b 100644 (file)
@@ -16938,37 +16938,32 @@ __extension__ extern __inline uint32_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vsha1h_u32 (uint32_t __hash_e)
 {
-  uint32x4_t __t = vdupq_n_u32 (0);
-  __t = vsetq_lane_u32 (__hash_e, __t, 0);
-  __t = __builtin_arm_crypto_sha1h (__t);
-  return vgetq_lane_u32 (__t, 0);
+  return vgetq_lane_u32 (__builtin_arm_crypto_sha1h (vdupq_n_u32 (__hash_e)),
+                        0);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vsha1cq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
 {
-  uint32x4_t __t = vdupq_n_u32 (0);
-  __t = vsetq_lane_u32 (__hash_e, __t, 0);
-  return __builtin_arm_crypto_sha1c (__hash_abcd, __t, __wk);
+  return __builtin_arm_crypto_sha1c (__hash_abcd, vdupq_n_u32 (__hash_e),
+                                    __wk);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vsha1pq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
 {
-  uint32x4_t __t = vdupq_n_u32 (0);
-  __t = vsetq_lane_u32 (__hash_e, __t, 0);
-  return __builtin_arm_crypto_sha1p (__hash_abcd, __t, __wk);
+  return __builtin_arm_crypto_sha1p (__hash_abcd, vdupq_n_u32 (__hash_e),
+                                    __wk);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
 vsha1mq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
 {
-  uint32x4_t __t = vdupq_n_u32 (0);
-  __t = vsetq_lane_u32 (__hash_e, __t, 0);
-  return __builtin_arm_crypto_sha1m (__hash_abcd, __t, __wk);
+  return __builtin_arm_crypto_sha1m (__hash_abcd,  vdupq_n_u32 (__hash_e),
+                                    __wk);
 }
 
 __extension__ extern __inline uint32x4_t
index bf34f69fc75c4b16dd947a7fe48728a97a319abf..115c515ac46965892556cce127df56cccf047b2c 100644 (file)
   [(set_attr "type" "<crypto_type>")]
 )
 
+/* The vec_select operation always selects index 0 from the lower V2SI subreg
+   of the V4SI, adjusted for endianness. Required due to neon_vget_lane and
+   neon_set_lane that change the element ordering in memory for big-endian.  */
+
 (define_insn "crypto_sha1h"
   [(set (match_operand:V4SI 0 "register_operand" "=w")
-        (zero_extend:V4SI
-          (unspec:SI [(vec_select:SI
-                        (match_operand:V4SI 1 "register_operand" "w")
-                        (parallel [(match_operand:SI 2 "immediate_operand" "i")]))]
-           UNSPEC_SHA1H)))]
-  "TARGET_CRYPTO"
+          (unspec:V4SI
+             [(vec_select:SI
+               (match_operand:V4SI 1 "register_operand" "w")
+               (parallel [(match_operand:SI 2 "immediate_operand" "i")]))]
+          UNSPEC_SHA1H))]
+  "TARGET_CRYPTO && INTVAL (operands[2]) == NEON_ENDIAN_LANE_N (V2SImode, 0)"
   "sha1h.32\\t%q0, %q1"
   [(set_attr "type" "crypto_sha1_fast")]
 )
   [(set_attr "type" "crypto_pmull")]
 )
 
+/* The vec_select operation always selects index 0 from the lower V2SI subreg
+   of the V4SI, adjusted for endianness. Required due to neon_vget_lane and
+   neon_set_lane that change the element ordering in memory for big-endian.  */
+
 (define_insn "crypto_<crypto_pattern>"
   [(set (match_operand:V4SI 0 "register_operand" "=w")
         (unspec:<crypto_mode>
                         (parallel [(match_operand:SI 4 "immediate_operand" "i")]))
                       (match_operand:<crypto_mode> 3 "register_operand" "w")]
          CRYPTO_SELECTING))]
-  "TARGET_CRYPTO"
+  "TARGET_CRYPTO && INTVAL (operands[4]) == NEON_ENDIAN_LANE_N (V2SImode, 0)"
   "<crypto_pattern>.<crypto_size_sfx>\\t%q0, %q2, %q3"
   [(set_attr "type" "<crypto_type>")]
 )
index 0f47604da85abd76c7e7559eb4d0f8b526ccaf4c..7bf322fc18220d6ce27398598a6d514f5b7f9201 100644 (file)
@@ -1,3 +1,19 @@
+2019-07-18  Sylvia Taylor  <sylvia.taylor@arm.com>
+
+        PR target/90317
+        * gcc.target/arm/crypto-vsha1cq_u32.c (foo): Change return type to
+        uint32_t.
+        (GET_LANE, TEST_SHA1C_VEC_SELECT): New.
+        * gcc.target/arm/crypto-vsha1h_u32.c (foo): Change return type to
+        uint32_t.
+        (GET_LANE, TEST_SHA1H_VEC_SELECT): New.
+        * gcc.target/arm/crypto-vsha1mq_u32.c (foo): Change return type to
+        uint32_t.
+        (GET_LANE, TEST_SHA1M_VEC_SELECT): New.
+        * gcc.target/arm/crypto-vsha1pq_u32.c (foo): Change return type to
+        uint32_t.
+        (GET_LANE, TEST_SHA1P_VEC_SELECT): New.
+
 2019-07-18  Jan Hubicka  <hubicka@ucw.cz>
 
        * g++.dg/lto/alias-5_0.C: New testcase.
index 4dc9dee6617efba4788d0b2273bd84511b28a34f..41f97a74d6f8ad59320d002ddad4f56316b4f340 100644 (file)
@@ -1,11 +1,12 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target arm_crypto_ok } */
 /* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-O3" } */
 
 #include "arm_neon.h"
 
-int
-foo (void)
+uint32_t foo (void)
+
 {
   uint32_t hash = 0xdeadbeef;
   uint32x4_t a = {0, 1, 2, 3};
@@ -15,4 +16,20 @@ foo (void)
   return res[0];
 }
 
-/* { dg-final { scan-assembler "sha1c.32\tq\[0-9\]+, q\[0-9\]+" } } */
+#define GET_LANE(lane)                                                      \
+       uint32x4_t foo_lane##lane (uint32x4_t val,uint32x4_t a, uint32x4_t b)\
+       {                                                                    \
+           return vsha1cq_u32 (a, vgetq_lane_u32 (val, lane), b);           \
+       }
+
+#define TEST_SHA1C_VEC_SELECT(FUNC)                            \
+       FUNC (0)                                                \
+       FUNC (1)                                                \
+       FUNC (2)                                                \
+       FUNC (3)                                                \
+
+TEST_SHA1C_VEC_SELECT (GET_LANE)
+
+/* { dg-final { scan-assembler-times {sha1c.32\tq[0-9]+, q[0-9]+} 5 } } */
+/* { dg-final { scan-assembler-times {vdup.32\tq[0-9]+, r[0-9]+} 3 } } */
+/* { dg-final { scan-assembler-times {vmov.32\tr[0-9]+, d[0-9]+\[[0-9]+\]+} 4 } } */
index dee277485247b8c92ff5c17e6ed0fccbaec5575e..b2846675a27696c23101be12d202c1eb850e8f17 100644 (file)
@@ -1,14 +1,31 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target arm_crypto_ok } */
 /* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-O3" } */
 
 #include "arm_neon.h"
 
-int
-foo (void)
+uint32_t foo (void)
+
 {
   uint32_t val = 0xdeadbeef;
   return vsha1h_u32 (val);
 }
 
-/* { dg-final { scan-assembler "sha1h.32\tq\[0-9\]+, q\[0-9\]+" } } */
+#define GET_LANE(lane)                                         \
+       uint32_t foo_lane##lane (uint32x4_t val)                \
+       {                                                       \
+           return vsha1h_u32 (vgetq_lane_u32 (val, lane));     \
+       }
+
+#define TEST_SHA1H_VEC_SELECT(FUNC)                            \
+       FUNC (0)                                                \
+       FUNC (1)                                                \
+       FUNC (2)                                                \
+       FUNC (3)                                                \
+
+TEST_SHA1H_VEC_SELECT (GET_LANE)
+
+/* { dg-final { scan-assembler-times {sha1h.32\tq[0-9]+, q[0-9]+} 5 } } */
+/* { dg-final { scan-assembler-times {vdup.32\tq[0-9]+, r[0-9]+} 3 } } */
+/* { dg-final { scan-assembler-times {vmov.32\tr[0-9]+, d[0-9]+\[[0-9]+\]+} 8 } } */
index 672b93a97475fd7caec67611cf074b32fe2d6860..676e64ce779cea34f4744e09a697bcfc64b4e3ba 100644 (file)
@@ -1,11 +1,12 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target arm_crypto_ok } */
 /* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-O3" } */
 
 #include "arm_neon.h"
 
-int
-foo (void)
+uint32_t foo (void)
+
 {
   uint32_t hash = 0xdeadbeef;
   uint32x4_t a = {0, 1, 2, 3};
@@ -15,4 +16,20 @@ foo (void)
   return res[0];
 }
 
-/* { dg-final { scan-assembler "sha1m.32\tq\[0-9\]+, q\[0-9\]+" } } */
+#define GET_LANE(lane)                                                      \
+       uint32x4_t foo_lane##lane (uint32x4_t val,uint32x4_t a, uint32x4_t b)\
+       {                                                                    \
+           return vsha1mq_u32 (a, vgetq_lane_u32 (val, lane), b);           \
+       }
+
+#define TEST_SHA1M_VEC_SELECT(FUNC)                            \
+       FUNC (0)                                                \
+       FUNC (1)                                                \
+       FUNC (2)                                                \
+       FUNC (3)                                                \
+
+TEST_SHA1M_VEC_SELECT (GET_LANE)
+
+/* { dg-final { scan-assembler-times {sha1m.32\tq[0-9]+, q[0-9]+} 5 } } */
+/* { dg-final { scan-assembler-times {vdup.32\tq[0-9]+, r[0-9]+} 3 } } */
+/* { dg-final { scan-assembler-times {vmov.32\tr[0-9]+, d[0-9]+\[[0-9]+\]+} 4 } } */
index ff508e0dc7f683c55496be15b4bab5219a73ac66..ed10fe265ba74ae9c6ae86aca63d802b25887a21 100644 (file)
@@ -1,11 +1,12 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target arm_crypto_ok } */
 /* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-O3" } */
 
 #include "arm_neon.h"
 
-int
-foo (void)
+uint32_t foo (void)
+
 {
   uint32_t hash = 0xdeadbeef;
   uint32x4_t a = {0, 1, 2, 3};
@@ -15,4 +16,20 @@ foo (void)
   return res[0];
 }
 
-/* { dg-final { scan-assembler "sha1p.32\tq\[0-9\]+, q\[0-9\]+" } } */
+#define GET_LANE(lane)                                                      \
+       uint32x4_t foo_lane##lane (uint32x4_t val,uint32x4_t a, uint32x4_t b)\
+       {                                                                    \
+           return vsha1pq_u32 (a, vgetq_lane_u32 (val, lane), b);           \
+       }
+
+#define TEST_SHA1P_VEC_SELECT(FUNC)                            \
+       FUNC (0)                                                \
+       FUNC (1)                                                \
+       FUNC (2)                                                \
+       FUNC (3)                                                \
+
+TEST_SHA1P_VEC_SELECT (GET_LANE)
+
+/* { dg-final { scan-assembler-times {sha1p.32\tq[0-9]+, q[0-9]+} 5 } } */
+/* { dg-final { scan-assembler-times {vdup.32\tq[0-9]+, r[0-9]+} 3 } } */
+/* { dg-final { scan-assembler-times {vmov.32\tr[0-9]+, d[0-9]+\[[0-9]+\]+} 4 } } */