[AArch64] Emit TARGET_DOTPROD-specific sequence for <us>sadv16qi

author Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Mon, 3 Jun 2019 11:20:58 +0000 (11:20 +0000)

committer Kyrylo Tkachov <ktkachov@gcc.gnu.org>

Mon, 3 Jun 2019 11:20:58 +0000 (11:20 +0000)
author Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Mon, 3 Jun 2019 11:20:58 +0000 (11:20 +0000)
committer Kyrylo Tkachov <ktkachov@gcc.gnu.org>
Mon, 3 Jun 2019 11:20:58 +0000 (11:20 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 372e880987c22894540a7063c7ae0f7b752cb1c2..06184edd27f20cbf86ec79f92b592964666ce688 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2019-06-03  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       * config/aarch64/iterators.md (MAX_OPP): New code attr.
+       * config/aarch64/aarch64-simd.md (*aarch64_<su>abd<mode>_3): Rename to...
+       (aarch64_<su>abd<mode>_3): ... This.
+       (<sur>sadv16qi): Add TARGET_DOTPROD expansion.
+
  2019-06-03  Richard Biener  <rguenther@suse.de>
  
         * tree-ssa-sccvn.c (ao_ref_init_from_vn_reference): Get original
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index d4c48d2aa613d6b33fa9b012a98cfd89c96fec9b..b648e9e791658c45bd82c5a08c3d9f5809951b2c 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -710,7 +710,7 @@
  ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
  ;; Whereas SABD would return 192 (-64 signed) on the above example.
  ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
-(define_insn "*aarch64_<su>abd<mode>_3"
+(define_insn "aarch64_<su>abd<mode>_3"
    [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
         (minus:VDQ_BHSI
           (USMAX:VDQ_BHSI
@@ -764,7 +764,16 @@
  ;; UABAL       tmp.8h, op1.16b, op2.16b
  ;; UADALP      op3.4s, tmp.8h
  ;; MOV         op0, op3 // should be eliminated in later passes.
-;; The signed version just uses the signed variants of the above instructions.
+;;
+;; For TARGET_DOTPROD we do:
+;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
+;; UABD        tmp2.16b, op1.16b, op2.16b
+;; UDOT        op3.4s, tmp2.16b, tmp1.16b
+;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
+;;
+;; The signed version just uses the signed variants of the above instructions
+;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
+;; unsigned.
  
  (define_expand "<sur>sadv16qi"
    [(use (match_operand:V4SI 0 "register_operand"))
@@ -773,6 +782,15 @@
     (use (match_operand:V4SI 3 "register_operand"))]
    "TARGET_SIMD"
    {
+    if (TARGET_DOTPROD)
+      {
+       rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
+       rtx abd = gen_reg_rtx (V16QImode);
+       emit_insn (gen_aarch64_<sur>abdv16qi_3 (abd, operands[1], operands[2]));
+       emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
+                                         abd, ones));
+       DONE;
+      }
      rtx reduc = gen_reg_rtx (V8HImode);
      emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
                                                operands[2]));
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 999bdc25db892a1e62b42f34a12d86737058c8ca..112cf11f58ed48737696ec09836181462af4d848 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2019-06-03  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       * gcc.target/aarch64/ssadv16qi.c: Add +nodotprod to pragma.
+       * gcc.target/aarch64/usadv16qi.c: Likewise.
+       * gcc.target/aarch64/ssadv16qi-dotprod.c: New test.
+       * gcc.target/aarch64/usadv16qi-dotprod.c: Likewise.
+
  2019-06-03  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
  
         * lib/target-supports.exp (add_options_for_aarch64_sve): New procedure.
diff --git a/gcc/testsuite/gcc.target/aarch64/ssadv16qi-dotprod.c b/gcc/testsuite/gcc.target/aarch64/ssadv16qi-dotprod.c

new file mode 100644 (file)

index 0000000..08b6831
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ssadv16qi-dotprod.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_2a_dotprod_neon_ok } */
+/* { dg-add-options arm_v8_2a_dotprod_neon }  */
+/* { dg-additional-options "-O3" } */
+
+#pragma GCC target "+nosve"
+
+#define N 1024
+
+signed char pix1[N], pix2[N];
+
+int foo (void)
+{
+  int i_sum = 0;
+  int i;
+
+  for (i = 0; i < N; i++)
+    i_sum += __builtin_abs (pix1[i] - pix2[i]);
+
+  return i_sum;
+}
+
+/* { dg-final { scan-assembler-not {\tsshll\t} } } */
+/* { dg-final { scan-assembler-not {\tsshll2\t} } } */
+/* { dg-final { scan-assembler-not {\tssubl\t} } } */
+/* { dg-final { scan-assembler-not {\tssubl2\t} } } */
+/* { dg-final { scan-assembler-not {\tabs\t} } } */
+
+/* { dg-final { scan-assembler {\tsabd\t} } } */
+/* { dg-final { scan-assembler {\tudot\t} } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/ssadv16qi.c b/gcc/testsuite/gcc.target/aarch64/ssadv16qi.c

index 40b28843616e84df137210b45ec16abed2a37c75..85a867a113013f560bfd0a3142805b9c95ad8c5a 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/ssadv16qi.c
+++ b/gcc/testsuite/gcc.target/aarch64/ssadv16qi.c
@@ -1,7 +1,7 @@
  /* { dg-do compile } */
  /* { dg-options "-O3" } */
  
-#pragma GCC target "+nosve"
+#pragma GCC target "+nosve+nodotprod"
  
  #define N 1024
  
diff --git a/gcc/testsuite/gcc.target/aarch64/usadv16qi-dotprod.c b/gcc/testsuite/gcc.target/aarch64/usadv16qi-dotprod.c

new file mode 100644 (file)

index 0000000..ea8de4d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/usadv16qi-dotprod.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_2a_dotprod_neon_ok } */
+/* { dg-add-options arm_v8_2a_dotprod_neon }  */
+/* { dg-additional-options "-O3" } */
+
+#pragma GCC target "+nosve"
+
+#define N 1024
+
+unsigned char pix1[N], pix2[N];
+
+int foo (void)
+{
+  int i_sum = 0;
+  int i;
+
+  for (i = 0; i < N; i++)
+    i_sum += __builtin_abs (pix1[i] - pix2[i]);
+
+  return i_sum;
+}
+
+/* { dg-final { scan-assembler-not {\tushll\t} } } */
+/* { dg-final { scan-assembler-not {\tushll2\t} } } */
+/* { dg-final { scan-assembler-not {\tusubl\t} } } */
+/* { dg-final { scan-assembler-not {\tusubl2\t} } } */
+/* { dg-final { scan-assembler-not {\tabs\t} } } */
+
+/* { dg-final { scan-assembler {\tuabd\t} } } */
+/* { dg-final { scan-assembler {\tudot\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/usadv16qi.c b/gcc/testsuite/gcc.target/aarch64/usadv16qi.c

index 69ceaf4259ea43e95078ce900d2498c3a2291369..a66e1209662cefaa95c90d8d2694f9c7c0de4152 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/usadv16qi.c
+++ b/gcc/testsuite/gcc.target/aarch64/usadv16qi.c
@@ -1,7 +1,7 @@
  /* { dg-do compile } */
  /* { dg-options "-O3" } */
  
-#pragma GCC target "+nosve"
+#pragma GCC target "+nosve+nodotprod"
  
  #define N 1024
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Mon, 3 Jun 2019 11:20:58 +0000 (11:20 +0000)
committer	Kyrylo Tkachov <ktkachov@gcc.gnu.org>
	Mon, 3 Jun 2019 11:20:58 +0000 (11:20 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/ssadv16qi-dotprod.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/ssadv16qi.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/usadv16qi-dotprod.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/usadv16qi.c		patch \| blob \| history