[Patch AArch64] Use 128-bit vectors when autovectorizing 16-bit float types
authorJames Greenhalgh <james.greenhalgh@arm.com>
Tue, 14 Feb 2017 14:48:33 +0000 (14:48 +0000)
committerJames Greenhalgh <jgreenhalgh@gcc.gnu.org>
Tue, 14 Feb 2017 14:48:33 +0000 (14:48 +0000)
gcc/

* config/aarch64/aarch64.c (aarch64_simd_container_mode): Handle
HFmode.

gcc/testsuite/

* gcc.target/aarch64/vect_fp16_1.c: New.

From-SVN: r245429

gcc/ChangeLog
gcc/config/aarch64/aarch64.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/vect_fp16_1.c [new file with mode: 0644]

index 2692bac47bee5b47576b8ff9acb9e958469eb6b5..2c184214d0f9207e5bb51d075d1d47f0787fd641 100644 (file)
@@ -1,3 +1,8 @@
+2017-02-14  James Greenhalgh  <james.greenhalgh@arm.com>
+
+       * config/aarch64/aarch64.c (aarch64_simd_container_mode): Handle
+       HFmode.
+
 2017-02-14  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
 
        PR rtl-optimization/68664
index 45404063aeae36a0703971aa4d6d4fe9e58027c1..ab1bdc0233afe7a3c41501cb724a5c4c719123b8 100644 (file)
@@ -10845,6 +10845,8 @@ aarch64_simd_container_mode (machine_mode mode, unsigned width)
            return V2DFmode;
          case SFmode:
            return V4SFmode;
+         case HFmode:
+           return V8HFmode;
          case SImode:
            return V4SImode;
          case HImode:
@@ -10861,6 +10863,8 @@ aarch64_simd_container_mode (machine_mode mode, unsigned width)
          {
          case SFmode:
            return V2SFmode;
+         case HFmode:
+           return V4HFmode;
          case SImode:
            return V2SImode;
          case HImode:
index cc10fd4c8386311ee89406aa92542dd9dc5fa417..6f6f0da8b0dcd2627f8d0d364e112ad4d8978e3b 100644 (file)
@@ -1,3 +1,7 @@
+2017-02-14  James Greenhalgh  <james.greenhalgh@arm.com>
+
+       * gcc.target/aarch64/vect_fp16_1.c: New.
+
 2017-02-14  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
 
        * gcc.dg/gimplefe-25.c: New test.
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_fp16_1.c b/gcc/testsuite/gcc.target/aarch64/vect_fp16_1.c
new file mode 100644 (file)
index 0000000..da0cd81
--- /dev/null
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-vect-cost-model" } */
+
+/* Check that we vectorize to a full 128-bit vector for _Float16 and __fp16
+   types.  */
+
+/* Enable ARMv8.2-A+fp16 so we have access to the vector instructions.  */
+#pragma GCC target ("arch=armv8.2-a+fp16")
+
+_Float16
+sum_Float16 (_Float16 *__restrict__ __attribute__ ((__aligned__ (16))) a,
+            _Float16 *__restrict__ __attribute__ ((__aligned__ (16))) b,
+            _Float16 *__restrict__ __attribute__ ((__aligned__ (16))) c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] + c[i];
+}
+
+_Float16
+sum_fp16 (__fp16 *__restrict__ __attribute__ ((__aligned__ (16))) a,
+         __fp16 *__restrict__ __attribute__ ((__aligned__ (16))) b,
+         __fp16 *__restrict__ __attribute__ ((__aligned__ (16))) c)
+{
+  for (int i = 0; i < 256; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* Two FADD operations on "8h" data widths, one from sum_Float16, one from
+   sum_fp16.  */
+/* { dg-final { scan-assembler-times "fadd\tv\[0-9\]\+.8h" 2 } } */