+2019-02-07 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+
+ * config/aarch64/iterators.md (max_opp): New code_attr.
+ (USMAX): New code iterator.
+ * config/aarch64/predicates.md (aarch64_smin): New predicate.
+ (aarch64_smax): Likewise.
+ * config/aarch64/aarch64-simd.md (abd<mode>_3): Rename to...
+ (*aarch64_<su>abd<mode>_3): ... Change RTL representation to
+ MINUS (MAX MIN).
+
2019-02-07 H.J. Lu <hongjiu.lu@intel.com>
PR target/89229
[(set_attr "type" "neon_abs<q>")]
)
-(define_insn "abd<mode>_3"
+;; It's tempting to represent SABD as ABS (MINUS op1 op2).
+;; This isn't accurate as ABS treats always its input as a signed value.
+;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
+;; Whereas SABD would return 192 (-64 signed) on the above example.
+;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
+(define_insn "*aarch64_<su>abd<mode>_3"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
- (abs:VDQ_BHSI (minus:VDQ_BHSI
- (match_operand:VDQ_BHSI 1 "register_operand" "w")
- (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
- "TARGET_SIMD"
- "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+ (minus:VDQ_BHSI
+ (USMAX:VDQ_BHSI
+ (match_operand:VDQ_BHSI 1 "register_operand" "w")
+ (match_operand:VDQ_BHSI 2 "register_operand" "w"))
+ (match_operator 3 "aarch64_<max_opp>"
+ [(match_dup 1)
+ (match_dup 2)])))]
+ "TARGET_SIMD"
+ "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "type" "neon_abd<q>")]
)
(define_code_attr f16mac [(plus "a") (minus "s")])
+;; Map smax to smin and umax to umin.
+(define_code_attr max_opp [(smax "smin") (umax "umin")])
+
;; The number of subvectors in an SVE_STRUCT.
(define_mode_attr vector_count [(VNx32QI "2") (VNx16HI "2")
(VNx8SI "2") (VNx4DI "2")
(define_code_iterator FMAXMIN [smax smin])
+;; Signed and unsigned max operations.
+(define_code_iterator USMAX [smax umax])
+
;; Code iterator for variants of vector max and min.
(define_code_iterator ADDSUB [plus minus])
(ior (match_operand 0 "register_operand")
(match_operand 0 "const_scalar_int_operand")))
+(define_predicate "aarch64_smin"
+ (match_code "smin"))
+
+(define_predicate "aarch64_umin"
+ (match_code "umin"))
+
;; True for integer comparisons and for FP comparisons other than LTGT or UNEQ.
(define_special_predicate "aarch64_comparison_operator"
(match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered,
+2019-02-07 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+
+ * gcc.target/aarch64/abd_1.c: New test.
+ * gcc.dg/sabd_1.c: Likewise.
+
2019-02-07 Dominique d'Humieres <dominiq@gcc.gnu.org>
PR fortran/52789
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O3 -fwrapv" } */
+/* Make sure vectorized absolute difference behaves same as scalar version. */
+
+#define N 16
+signed char a[] = {-100, -100, -100, -100,-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100 };
+signed char b[] = { 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100 };
+
+signed char out[N];
+
+__attribute__ ((noinline,noipa))
+void
+foo (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ signed char diff = b[i] - a[i];
+ out[i] = diff > 0 ? diff : -diff;
+ }
+}
+
+signed char out2[N];
+
+__attribute__ ((noinline,noipa))
+void
+foo_scalar (void)
+{
+ for (int i = 0; i < N; i++)
+ {
+ asm volatile ("");
+ signed char diff = b[i] - a[i];
+ out2[i] = diff > 0 ? diff : -diff;
+ }
+}
+
+int
+main (void)
+{
+ foo ();
+ foo_scalar ();
+ for (int i = 0; i < N; i++)
+ if (out[i] != out2[i])
+ __builtin_abort ();
+
+ return 0;
+}
+
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+#pragma GCC target "+nosve"
+
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+#define N 1024
+
+#define FUNC(T) \
+void \
+sabd_##T (signed T * restrict a, signed T * restrict b, \
+ signed T * restrict out) \
+{ \
+ for (int i = 0; i < N; i++) \
+ out[i] = MAX (a[i], b[i]) - MIN (a[i], b[i]); \
+} \
+ \
+void \
+uabd_##T (unsigned T * restrict a, unsigned T * restrict b, \
+ unsigned T * restrict out) \
+{ \
+ for (int i = 0; i < N; i++) \
+ out[i] = MAX (a[i], b[i]) - MIN (a[i], b[i]); \
+}
+
+FUNC(char)
+FUNC(short)
+FUNC(int)
+
+/* { dg-final { scan-assembler-times "sabd\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "uabd\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "sabd\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */
+/* { dg-final { scan-assembler-times "uabd\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */
+/* { dg-final { scan-assembler-times "sabd\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */
+/* { dg-final { scan-assembler-times "uabd\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */