+2019-07-18 Sylvia Taylor <sylvia.taylor@arm.com>
+
+ PR target/90317
+ * config/arm/arm_neon.h
+ (vsha1h_u32): Refactor.
+ (vsha1cq_u32): Likewise.
+ (vsha1pq_u32): Likewise.
+ (vsha1mq_u32): Likewise.
+ * config/arm/crypto.md:
+ (crypto_sha1h): Remove zero extend, correct vec select.
+ (crypto_sha1c): Correct vec select.
+ (crypto_sha1m): Likewise.
+ (crypto_sha1p): Likewise.
+
2019-07-18 Richard Earnshaw <rearnsha@arm.com>
* config/arm/predicates.md (arm_borrow_operation): New predicate.
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsha1h_u32 (uint32_t __hash_e)
{
- uint32x4_t __t = vdupq_n_u32 (0);
- __t = vsetq_lane_u32 (__hash_e, __t, 0);
- __t = __builtin_arm_crypto_sha1h (__t);
- return vgetq_lane_u32 (__t, 0);
+ return vgetq_lane_u32 (__builtin_arm_crypto_sha1h (vdupq_n_u32 (__hash_e)),
+ 0);
}
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsha1cq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
{
- uint32x4_t __t = vdupq_n_u32 (0);
- __t = vsetq_lane_u32 (__hash_e, __t, 0);
- return __builtin_arm_crypto_sha1c (__hash_abcd, __t, __wk);
+ return __builtin_arm_crypto_sha1c (__hash_abcd, vdupq_n_u32 (__hash_e),
+ __wk);
}
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsha1pq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
{
- uint32x4_t __t = vdupq_n_u32 (0);
- __t = vsetq_lane_u32 (__hash_e, __t, 0);
- return __builtin_arm_crypto_sha1p (__hash_abcd, __t, __wk);
+ return __builtin_arm_crypto_sha1p (__hash_abcd, vdupq_n_u32 (__hash_e),
+ __wk);
}
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vsha1mq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk)
{
- uint32x4_t __t = vdupq_n_u32 (0);
- __t = vsetq_lane_u32 (__hash_e, __t, 0);
- return __builtin_arm_crypto_sha1m (__hash_abcd, __t, __wk);
+ return __builtin_arm_crypto_sha1m (__hash_abcd, vdupq_n_u32 (__hash_e),
+ __wk);
}
__extension__ extern __inline uint32x4_t
[(set_attr "type" "<crypto_type>")]
)
+/* The vec_select operation always selects index 0 from the lower V2SI subreg
+ of the V4SI, adjusted for endianness. Required due to neon_vget_lane and
+ neon_set_lane that change the element ordering in memory for big-endian. */
+
(define_insn "crypto_sha1h"
[(set (match_operand:V4SI 0 "register_operand" "=w")
- (zero_extend:V4SI
- (unspec:SI [(vec_select:SI
- (match_operand:V4SI 1 "register_operand" "w")
- (parallel [(match_operand:SI 2 "immediate_operand" "i")]))]
- UNSPEC_SHA1H)))]
- "TARGET_CRYPTO"
+ (unspec:V4SI
+ [(vec_select:SI
+ (match_operand:V4SI 1 "register_operand" "w")
+ (parallel [(match_operand:SI 2 "immediate_operand" "i")]))]
+ UNSPEC_SHA1H))]
+ "TARGET_CRYPTO && INTVAL (operands[2]) == NEON_ENDIAN_LANE_N (V2SImode, 0)"
"sha1h.32\\t%q0, %q1"
[(set_attr "type" "crypto_sha1_fast")]
)
[(set_attr "type" "crypto_pmull")]
)
+/* The vec_select operation always selects index 0 from the lower V2SI subreg
+ of the V4SI, adjusted for endianness. Required due to neon_vget_lane and
+ neon_set_lane that change the element ordering in memory for big-endian. */
+
(define_insn "crypto_<crypto_pattern>"
[(set (match_operand:V4SI 0 "register_operand" "=w")
(unspec:<crypto_mode>
(parallel [(match_operand:SI 4 "immediate_operand" "i")]))
(match_operand:<crypto_mode> 3 "register_operand" "w")]
CRYPTO_SELECTING))]
- "TARGET_CRYPTO"
+ "TARGET_CRYPTO && INTVAL (operands[4]) == NEON_ENDIAN_LANE_N (V2SImode, 0)"
"<crypto_pattern>.<crypto_size_sfx>\\t%q0, %q2, %q3"
[(set_attr "type" "<crypto_type>")]
)
+2019-07-18 Sylvia Taylor <sylvia.taylor@arm.com>
+
+ PR target/90317
+ * gcc.target/arm/crypto-vsha1cq_u32.c (foo): Change return type to
+ uint32_t.
+ (GET_LANE, TEST_SHA1C_VEC_SELECT): New.
+ * gcc.target/arm/crypto-vsha1h_u32.c (foo): Change return type to
+ uint32_t.
+ (GET_LANE, TEST_SHA1H_VEC_SELECT): New.
+ * gcc.target/arm/crypto-vsha1mq_u32.c (foo): Change return type to
+ uint32_t.
+ (GET_LANE, TEST_SHA1M_VEC_SELECT): New.
+ * gcc.target/arm/crypto-vsha1pq_u32.c (foo): Change return type to
+ uint32_t.
+ (GET_LANE, TEST_SHA1P_VEC_SELECT): New.
+
2019-07-18 Jan Hubicka <hubicka@ucw.cz>
* g++.dg/lto/alias-5_0.C: New testcase.
/* { dg-do compile } */
/* { dg-require-effective-target arm_crypto_ok } */
/* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-O3" } */
#include "arm_neon.h"
-int
-foo (void)
+uint32_t foo (void)
+
{
uint32_t hash = 0xdeadbeef;
uint32x4_t a = {0, 1, 2, 3};
return res[0];
}
-/* { dg-final { scan-assembler "sha1c.32\tq\[0-9\]+, q\[0-9\]+" } } */
+#define GET_LANE(lane) \
+ uint32x4_t foo_lane##lane (uint32x4_t val,uint32x4_t a, uint32x4_t b)\
+ { \
+ return vsha1cq_u32 (a, vgetq_lane_u32 (val, lane), b); \
+ }
+
+#define TEST_SHA1C_VEC_SELECT(FUNC) \
+ FUNC (0) \
+ FUNC (1) \
+ FUNC (2) \
+ FUNC (3) \
+
+TEST_SHA1C_VEC_SELECT (GET_LANE)
+
+/* { dg-final { scan-assembler-times {sha1c.32\tq[0-9]+, q[0-9]+} 5 } } */
+/* { dg-final { scan-assembler-times {vdup.32\tq[0-9]+, r[0-9]+} 3 } } */
+/* { dg-final { scan-assembler-times {vmov.32\tr[0-9]+, d[0-9]+\[[0-9]+\]+} 4 } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_crypto_ok } */
/* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-O3" } */
#include "arm_neon.h"
-int
-foo (void)
+uint32_t foo (void)
+
{
uint32_t val = 0xdeadbeef;
return vsha1h_u32 (val);
}
-/* { dg-final { scan-assembler "sha1h.32\tq\[0-9\]+, q\[0-9\]+" } } */
+#define GET_LANE(lane) \
+ uint32_t foo_lane##lane (uint32x4_t val) \
+ { \
+ return vsha1h_u32 (vgetq_lane_u32 (val, lane)); \
+ }
+
+#define TEST_SHA1H_VEC_SELECT(FUNC) \
+ FUNC (0) \
+ FUNC (1) \
+ FUNC (2) \
+ FUNC (3) \
+
+TEST_SHA1H_VEC_SELECT (GET_LANE)
+
+/* { dg-final { scan-assembler-times {sha1h.32\tq[0-9]+, q[0-9]+} 5 } } */
+/* { dg-final { scan-assembler-times {vdup.32\tq[0-9]+, r[0-9]+} 3 } } */
+/* { dg-final { scan-assembler-times {vmov.32\tr[0-9]+, d[0-9]+\[[0-9]+\]+} 8 } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_crypto_ok } */
/* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-O3" } */
#include "arm_neon.h"
-int
-foo (void)
+uint32_t foo (void)
+
{
uint32_t hash = 0xdeadbeef;
uint32x4_t a = {0, 1, 2, 3};
return res[0];
}
-/* { dg-final { scan-assembler "sha1m.32\tq\[0-9\]+, q\[0-9\]+" } } */
+#define GET_LANE(lane) \
+ uint32x4_t foo_lane##lane (uint32x4_t val,uint32x4_t a, uint32x4_t b)\
+ { \
+ return vsha1mq_u32 (a, vgetq_lane_u32 (val, lane), b); \
+ }
+
+#define TEST_SHA1M_VEC_SELECT(FUNC) \
+ FUNC (0) \
+ FUNC (1) \
+ FUNC (2) \
+ FUNC (3) \
+
+TEST_SHA1M_VEC_SELECT (GET_LANE)
+
+/* { dg-final { scan-assembler-times {sha1m.32\tq[0-9]+, q[0-9]+} 5 } } */
+/* { dg-final { scan-assembler-times {vdup.32\tq[0-9]+, r[0-9]+} 3 } } */
+/* { dg-final { scan-assembler-times {vmov.32\tr[0-9]+, d[0-9]+\[[0-9]+\]+} 4 } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_crypto_ok } */
/* { dg-add-options arm_crypto } */
+/* { dg-additional-options "-O3" } */
#include "arm_neon.h"
-int
-foo (void)
+uint32_t foo (void)
+
{
uint32_t hash = 0xdeadbeef;
uint32x4_t a = {0, 1, 2, 3};
return res[0];
}
-/* { dg-final { scan-assembler "sha1p.32\tq\[0-9\]+, q\[0-9\]+" } } */
+#define GET_LANE(lane) \
+ uint32x4_t foo_lane##lane (uint32x4_t val,uint32x4_t a, uint32x4_t b)\
+ { \
+ return vsha1pq_u32 (a, vgetq_lane_u32 (val, lane), b); \
+ }
+
+#define TEST_SHA1P_VEC_SELECT(FUNC) \
+ FUNC (0) \
+ FUNC (1) \
+ FUNC (2) \
+ FUNC (3) \
+
+TEST_SHA1P_VEC_SELECT (GET_LANE)
+
+/* { dg-final { scan-assembler-times {sha1p.32\tq[0-9]+, q[0-9]+} 5 } } */
+/* { dg-final { scan-assembler-times {vdup.32\tq[0-9]+, r[0-9]+} 3 } } */
+/* { dg-final { scan-assembler-times {vmov.32\tr[0-9]+, d[0-9]+\[[0-9]+\]+} 4 } } */