[AArch64] Add basic FP16 support

author Alan Lawrence <alan.lawrence@arm.com>

Wed, 29 Jul 2015 12:27:05 +0000 (12:27 +0000)

committer Alan Lawrence <alalaw01@gcc.gnu.org>

Wed, 29 Jul 2015 12:27:05 +0000 (12:27 +0000)
author Alan Lawrence <alan.lawrence@arm.com>
Wed, 29 Jul 2015 12:27:05 +0000 (12:27 +0000)
committer Alan Lawrence <alalaw01@gcc.gnu.org>
Wed, 29 Jul 2015 12:27:05 +0000 (12:27 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index ed752fc9e5a4dd741e1d12e33642cd3874bb6f02..1ccf95e32f87ffc7a6691a8b29524c2ac6ca7365 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,26 @@
+2015-07-29  Alan Lawrence  <alan.lawrence@arm.com>
+
+       * config/aarch64/aarch64-builtins.c (aarch64_fp16_type_node): New.
+       (aarch64_init_builtins): Make aarch64_fp16_type_node, use for __fp16.
+
+       * config/aarch64/aarch64-modes.def: Add HFmode.
+
+       * config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define
+       __ARM_FP16_FORMAT_IEEE and __ARM_FP16_ARGS. Set bit 1 of __ARM_FP.
+
+       * config/aarch64/aarch64.c (aarch64_init_libfuncs,
+       aarch64_promoted_type): New.
+
+       (aarch64_float_const_representable_p): Disable HFmode.
+       (aarch64_mangle_type): Mangle half-precision floats to "Dh".
+       (TARGET_PROMOTED_TYPE): Define to aarch64_promoted_type.
+       (TARGET_INIT_LIBFUNCS): Define to aarch64_init_libfuncs.
+
+       * config/aarch64/aarch64.md (mov<mode>): Include HFmode using GPF_F16.
+       (movhf_aarch64, extendhfsf2, extendhfdf2, truncsfhf2, truncdfhf2): New.
+
+       * config/aarch64/iterators.md (GPF_F16): New.
+
  2015-07-29  Richard Biener  <rguenther@suse.de>
  
         * match.pd: Merge address comparison patterns and make them
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c

index 4b7832940610dc467863f0a3fb1a28f76873972f..800f6e1ffcd358aa22ceecbc460bc1dcac4acd9e 100644 (file)
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -436,6 +436,9 @@ static struct aarch64_simd_type_info aarch64_simd_types [] = {
  };
  #undef ENTRY
  
+/* This type is not SIMD-specific; it is the user-visible __fp16.  */
+static tree aarch64_fp16_type_node = NULL_TREE;
+
  static tree aarch64_simd_intOI_type_node = NULL_TREE;
  static tree aarch64_simd_intEI_type_node = NULL_TREE;
  static tree aarch64_simd_intCI_type_node = NULL_TREE;
@@ -846,6 +849,12 @@ aarch64_init_builtins (void)
      = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr,
                             AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE);
  
+  aarch64_fp16_type_node = make_node (REAL_TYPE);
+  TYPE_PRECISION (aarch64_fp16_type_node) = 16;
+  layout_type (aarch64_fp16_type_node);
+
+  (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16");
+
    if (TARGET_SIMD)
      aarch64_init_simd_builtins ();
    if (TARGET_CRC32)
diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def

index b17b90d90601ae0a631a78560da743720c4638ce..3160bef1105fd4b9de4ac61b23b3dca975d1efaa 100644 (file)
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -36,6 +36,10 @@ CC_MODE (CC_DLTU);
  CC_MODE (CC_DGEU);
  CC_MODE (CC_DGTU);
  
+/* Half-precision floating point for __fp16.  */
+FLOAT_MODE (HF, 2, 0);
+ADJUST_FLOAT_FORMAT (HF, &ieee_half_format);
+
  /* Vector modes.  */
  VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI.  */
  VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI.  */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

index 9ba16297c3aaf29c478db9d8533ee5e1494f7c71..844676fddce308406ab88483d4943791f4b254e3 100644 (file)
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -8700,6 +8700,10 @@ aarch64_mangle_type (const_tree type)
    if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
      return "St9__va_list";
  
+  /* Half-precision float.  */
+  if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
+    return "Dh";
+
    /* Mangle AArch64-specific internal types.  TYPE_NAME is non-NULL_TREE for
       builtin types.  */
    if (TYPE_NAME (type) != NULL)
@@ -9940,6 +9944,33 @@ aarch64_start_file (void)
    default_file_start();
  }
  
+static void
+aarch64_init_libfuncs (void)
+{
+   /* Half-precision float operations.  The compiler handles all operations
+     with NULL libfuncs by converting to SFmode.  */
+
+  /* Conversions.  */
+  set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee");
+  set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee");
+
+  /* Arithmetic.  */
+  set_optab_libfunc (add_optab, HFmode, NULL);
+  set_optab_libfunc (sdiv_optab, HFmode, NULL);
+  set_optab_libfunc (smul_optab, HFmode, NULL);
+  set_optab_libfunc (neg_optab, HFmode, NULL);
+  set_optab_libfunc (sub_optab, HFmode, NULL);
+
+  /* Comparisons.  */
+  set_optab_libfunc (eq_optab, HFmode, NULL);
+  set_optab_libfunc (ne_optab, HFmode, NULL);
+  set_optab_libfunc (lt_optab, HFmode, NULL);
+  set_optab_libfunc (le_optab, HFmode, NULL);
+  set_optab_libfunc (ge_optab, HFmode, NULL);
+  set_optab_libfunc (gt_optab, HFmode, NULL);
+  set_optab_libfunc (unord_optab, HFmode, NULL);
+}
+
  /* Target hook for c_mode_for_suffix.  */
  static machine_mode
  aarch64_c_mode_for_suffix (char suffix)
@@ -9978,7 +10009,8 @@ aarch64_float_const_representable_p (rtx x)
    if (!CONST_DOUBLE_P (x))
      return false;
  
-  if (GET_MODE (x) == VOIDmode)
+  /* We don't support HFmode constants yet.  */
+  if (GET_MODE (x) == VOIDmode || GET_MODE (x) == HFmode)
      return false;
  
    REAL_VALUE_FROM_CONST_DOUBLE (r, x);
@@ -11940,6 +11972,14 @@ aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
    return default_unspec_may_trap_p (x, flags);
  }
  
+/* Implement TARGET_PROMOTED_TYPE to promote __fp16 to float.  */
+static tree
+aarch64_promoted_type (const_tree t)
+{
+  if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
+    return float_type_node;
+  return NULL_TREE;
+}
  #undef TARGET_ADDRESS_COST
  #define TARGET_ADDRESS_COST aarch64_address_cost
  
@@ -12094,6 +12134,9 @@ aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
  #undef TARGET_SCHED_REASSOCIATION_WIDTH
  #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
  
+#undef TARGET_PROMOTED_TYPE
+#define TARGET_PROMOTED_TYPE aarch64_promoted_type
+
  #undef TARGET_SECONDARY_RELOAD
  #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
  
@@ -12186,6 +12229,8 @@ aarch64_unspec_may_trap_p (const_rtx x, unsigned flags)
  #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
    aarch64_vectorize_vec_perm_const_ok
  
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
  
  #undef TARGET_FIXED_CONDITION_CODE_REGS
  #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h

index 385156482f2e98fba60cc5404356be993ab20f74..535695c4f450cf4d362e631a2864379283ae1fc6 100644 (file)
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -61,7 +61,9 @@
        if (TARGET_FLOAT)                                         \
          {                                                       \
            builtin_define ("__ARM_FEATURE_FMA");                 \
-          builtin_define_with_int_value ("__ARM_FP", 0x0C);     \
+         builtin_define_with_int_value ("__ARM_FP", 0x0E);     \
+         builtin_define ("__ARM_FP16_FORMAT_IEEE");            \
+         builtin_define ("__ARM_FP16_ARGS");                   \
          }                                                       \
        if (TARGET_SIMD)                                          \
          {                                                       \
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md

index 01cdf9c74d54068b73da84371feb116a9457f79a..b7b04c4cbfff4968d1cf9937f5df599d55ac31f2 100644 (file)
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -978,8 +978,8 @@
  })
  
  (define_expand "mov<mode>"
-  [(set (match_operand:GPF 0 "nonimmediate_operand" "")
-       (match_operand:GPF 1 "general_operand" ""))]
+  [(set (match_operand:GPF_F16 0 "nonimmediate_operand" "")
+       (match_operand:GPF_F16 1 "general_operand" ""))]
    ""
    {
      if (!TARGET_FLOAT)
@@ -995,6 +995,26 @@
    }
  )
  
+(define_insn "*movhf_aarch64"
+  [(set (match_operand:HF 0 "nonimmediate_operand" "=w, ?r,w,w,m,r,m ,r")
+       (match_operand:HF 1 "general_operand"      "?rY, w,w,m,w,m,rY,r"))]
+  "TARGET_FLOAT && (register_operand (operands[0], HFmode)
+    || register_operand (operands[1], HFmode))"
+  "@
+   mov\\t%0.h[0], %w1
+   umov\\t%w0, %1.h[0]
+   mov\\t%0.h[0], %1.h[0]
+   ldr\\t%h0, %1
+   str\\t%h1, %0
+   ldrh\\t%w0, %1
+   strh\\t%w1, %0
+   mov\\t%w0, %w1"
+  [(set_attr "type" "neon_from_gp,neon_to_gp,fmov,\
+                     f_loads,f_stores,load1,store1,mov_reg")
+   (set_attr "simd" "yes,yes,yes,*,*,*,*,*")
+   (set_attr "fp"   "*,*,*,yes,yes,*,*,*")]
+)
+
  (define_insn "*movsf_aarch64"
    [(set (match_operand:SF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
         (match_operand:SF 1 "general_operand"      "?rY, w,w,Ufc,m,w,m,rY,r"))]
@@ -4093,6 +4113,22 @@
    [(set_attr "type" "f_cvt")]
  )
  
+(define_insn "extendhfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=w")
+        (float_extend:SF (match_operand:HF 1 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fcvt\\t%s0, %h1"
+  [(set_attr "type" "f_cvt")]
+)
+
+(define_insn "extendhfdf2"
+  [(set (match_operand:DF 0 "register_operand" "=w")
+        (float_extend:DF (match_operand:HF 1 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fcvt\\t%d0, %h1"
+  [(set_attr "type" "f_cvt")]
+)
+
  (define_insn "truncdfsf2"
    [(set (match_operand:SF 0 "register_operand" "=w")
          (float_truncate:SF (match_operand:DF 1 "register_operand" "w")))]
@@ -4101,6 +4137,22 @@
    [(set_attr "type" "f_cvt")]
  )
  
+(define_insn "truncsfhf2"
+  [(set (match_operand:HF 0 "register_operand" "=w")
+        (float_truncate:HF (match_operand:SF 1 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fcvt\\t%h0, %s1"
+  [(set_attr "type" "f_cvt")]
+)
+
+(define_insn "truncdfhf2"
+  [(set (match_operand:HF 0 "register_operand" "=w")
+        (float_truncate:HF (match_operand:DF 1 "register_operand" "w")))]
+  "TARGET_FLOAT"
+  "fcvt\\t%h0, %d1"
+  [(set_attr "type" "f_cvt")]
+)
+
  (define_insn "fix_trunc<GPF:mode><GPI:mode>2"
    [(set (match_operand:GPI 0 "register_operand" "=r")
          (fix:GPI (match_operand:GPF 1 "register_operand" "w")))]
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index b19d3d743a733303371341d9f323f47ed83618c3..647817ea1a8d193628384a8ef202ee1e91fbc41b 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -38,6 +38,9 @@
  ;; Iterator for General Purpose Floating-point registers (32- and 64-bit modes)
  (define_mode_iterator GPF [SF DF])
  
+;; Iterator for General Purpose Float registers, inc __fp16.
+(define_mode_iterator GPF_F16 [HF SF DF])
+
  ;; Integer vector modes.
  (define_mode_iterator VDQ_I [V8QI V16QI V4HI V8HI V2SI V4SI V2DI])
  
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 9c9d962ff42b6f33dd79147d88dcc2d1c675e770..5853526ea3d3c8941d569e79efffc7f20e8cff32 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2015-07-29  Alan Lawrence  <alan.lawrence@arm.com>
+
+       * gcc.target/aarch64/f16_movs_1.c: New test.
+
  2015-07-28  Tom de Vries  <tom@codesourcery.com>
  
         * gcc.dg/autopar/uns-outer-4.c: Remove xfail on scan for parallelizing
diff --git a/gcc/testsuite/gcc.target/aarch64/f16_movs_1.c b/gcc/testsuite/gcc.target/aarch64/f16_movs_1.c

new file mode 100644 (file)

index 0000000..6cb8086
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/f16_movs_1.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* { dg-options "-fno-inline -O2" } */
+
+#include <arm_neon.h>
+
+__fp16
+func2 (__fp16 a, __fp16 b)
+{
+  return b;
+}
+
+int
+main (int argc, char **argv)
+{
+  __fp16 array[16];
+  int i;
+
+  for (i = 0; i < sizeof (array) / sizeof (array[0]); i++)
+    array[i] = i;
+
+  array[0] = func2 (array[1], array[2]);
+
+  __builtin_printf ("%f\n", array[0]); /* { dg-output "2.0" } */
+
+  return 0;
+}
author	Alan Lawrence <alan.lawrence@arm.com>
	Wed, 29 Jul 2015 12:27:05 +0000 (12:27 +0000)
committer	Alan Lawrence <alalaw01@gcc.gnu.org>
	Wed, 29 Jul 2015 12:27:05 +0000 (12:27 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-builtins.c		patch \| blob \| history
gcc/config/aarch64/aarch64-modes.def		patch \| blob \| history
gcc/config/aarch64/aarch64.c		patch \| blob \| history
gcc/config/aarch64/aarch64.h		patch \| blob \| history
gcc/config/aarch64/aarch64.md		patch \| blob \| history
gcc/config/aarch64/iterators.md		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/f16_movs_1.c	[new file with mode: 0644]	patch \| blob