From: Alan Lawrence Date: Wed, 29 Jul 2015 12:27:05 +0000 (+0000) Subject: [AArch64] Add basic FP16 support X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c2ec330c9af93429e85803ef2e7f2a92a23d3f64;p=gcc.git [AArch64] Add basic FP16 support gcc/: * config/aarch64/aarch64-builtins.c (aarch64_fp16_type_node): New. (aarch64_init_builtins): Make aarch64_fp16_type_node, use for __fp16. * config/aarch64/aarch64-modes.def: Add HFmode. * config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define __ARM_FP16_FORMAT_IEEE and __ARM_FP16_ARGS. Set bit 1 of __ARM_FP. * config/aarch64/aarch64.c (aarch64_init_libfuncs, aarch64_promoted_type): New. (aarch64_float_const_representable_p): Disable HFmode. (aarch64_mangle_type): Mangle half-precision floats to "Dh". (TARGET_PROMOTED_TYPE): Define to aarch64_promoted_type. (TARGET_INIT_LIBFUNCS): Define to aarch64_init_libfuncs. * config/aarch64/aarch64.md (mov): Include HFmode using GPF_F16. (movhf_aarch64, extendhfsf2, extendhfdf2, truncsfhf2, truncdfhf2): New. * config/aarch64/iterators.md (GPF_F16): New. gcc/testsuite/: * gcc.target/aarch64/f16_movs_1.c: New test. From-SVN: r226346 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ed752fc9e5a..1ccf95e32f8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,26 @@ +2015-07-29 Alan Lawrence + + * config/aarch64/aarch64-builtins.c (aarch64_fp16_type_node): New. + (aarch64_init_builtins): Make aarch64_fp16_type_node, use for __fp16. + + * config/aarch64/aarch64-modes.def: Add HFmode. + + * config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define + __ARM_FP16_FORMAT_IEEE and __ARM_FP16_ARGS. Set bit 1 of __ARM_FP. + + * config/aarch64/aarch64.c (aarch64_init_libfuncs, + aarch64_promoted_type): New. + + (aarch64_float_const_representable_p): Disable HFmode. + (aarch64_mangle_type): Mangle half-precision floats to "Dh". + (TARGET_PROMOTED_TYPE): Define to aarch64_promoted_type. + (TARGET_INIT_LIBFUNCS): Define to aarch64_init_libfuncs. + + * config/aarch64/aarch64.md (mov): Include HFmode using GPF_F16. + (movhf_aarch64, extendhfsf2, extendhfdf2, truncsfhf2, truncdfhf2): New. + + * config/aarch64/iterators.md (GPF_F16): New. + 2015-07-29 Richard Biener * match.pd: Merge address comparison patterns and make them diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 4b783294061..800f6e1ffcd 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -436,6 +436,9 @@ static struct aarch64_simd_type_info aarch64_simd_types [] = { }; #undef ENTRY +/* This type is not SIMD-specific; it is the user-visible __fp16. */ +static tree aarch64_fp16_type_node = NULL_TREE; + static tree aarch64_simd_intOI_type_node = NULL_TREE; static tree aarch64_simd_intEI_type_node = NULL_TREE; static tree aarch64_simd_intCI_type_node = NULL_TREE; @@ -846,6 +849,12 @@ aarch64_init_builtins (void) = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr, AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); + aarch64_fp16_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (aarch64_fp16_type_node) = 16; + layout_type (aarch64_fp16_type_node); + + (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16"); + if (TARGET_SIMD) aarch64_init_simd_builtins (); if (TARGET_CRC32) diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def index b17b90d9060..3160bef1105 100644 --- a/gcc/config/aarch64/aarch64-modes.def +++ b/gcc/config/aarch64/aarch64-modes.def @@ -36,6 +36,10 @@ CC_MODE (CC_DLTU); CC_MODE (CC_DGEU); CC_MODE (CC_DGTU); +/* Half-precision floating point for __fp16. */ +FLOAT_MODE (HF, 2, 0); +ADJUST_FLOAT_FORMAT (HF, &ieee_half_format); + /* Vector modes. */ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI. */ VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI. */ diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 9ba16297c3a..844676fddce 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -8700,6 +8700,10 @@ aarch64_mangle_type (const_tree type) if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type)) return "St9__va_list"; + /* Half-precision float. */ + if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16) + return "Dh"; + /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for builtin types. */ if (TYPE_NAME (type) != NULL) @@ -9940,6 +9944,33 @@ aarch64_start_file (void) default_file_start(); } +static void +aarch64_init_libfuncs (void) +{ + /* Half-precision float operations. The compiler handles all operations + with NULL libfuncs by converting to SFmode. */ + + /* Conversions. */ + set_conv_libfunc (trunc_optab, HFmode, SFmode, "__gnu_f2h_ieee"); + set_conv_libfunc (sext_optab, SFmode, HFmode, "__gnu_h2f_ieee"); + + /* Arithmetic. */ + set_optab_libfunc (add_optab, HFmode, NULL); + set_optab_libfunc (sdiv_optab, HFmode, NULL); + set_optab_libfunc (smul_optab, HFmode, NULL); + set_optab_libfunc (neg_optab, HFmode, NULL); + set_optab_libfunc (sub_optab, HFmode, NULL); + + /* Comparisons. */ + set_optab_libfunc (eq_optab, HFmode, NULL); + set_optab_libfunc (ne_optab, HFmode, NULL); + set_optab_libfunc (lt_optab, HFmode, NULL); + set_optab_libfunc (le_optab, HFmode, NULL); + set_optab_libfunc (ge_optab, HFmode, NULL); + set_optab_libfunc (gt_optab, HFmode, NULL); + set_optab_libfunc (unord_optab, HFmode, NULL); +} + /* Target hook for c_mode_for_suffix. */ static machine_mode aarch64_c_mode_for_suffix (char suffix) @@ -9978,7 +10009,8 @@ aarch64_float_const_representable_p (rtx x) if (!CONST_DOUBLE_P (x)) return false; - if (GET_MODE (x) == VOIDmode) + /* We don't support HFmode constants yet. */ + if (GET_MODE (x) == VOIDmode || GET_MODE (x) == HFmode) return false; REAL_VALUE_FROM_CONST_DOUBLE (r, x); @@ -11940,6 +11972,14 @@ aarch64_unspec_may_trap_p (const_rtx x, unsigned flags) return default_unspec_may_trap_p (x, flags); } +/* Implement TARGET_PROMOTED_TYPE to promote __fp16 to float. */ +static tree +aarch64_promoted_type (const_tree t) +{ + if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) + return float_type_node; + return NULL_TREE; +} #undef TARGET_ADDRESS_COST #define TARGET_ADDRESS_COST aarch64_address_cost @@ -12094,6 +12134,9 @@ aarch64_unspec_may_trap_p (const_rtx x, unsigned flags) #undef TARGET_SCHED_REASSOCIATION_WIDTH #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width +#undef TARGET_PROMOTED_TYPE +#define TARGET_PROMOTED_TYPE aarch64_promoted_type + #undef TARGET_SECONDARY_RELOAD #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload @@ -12186,6 +12229,8 @@ aarch64_unspec_may_trap_p (const_rtx x, unsigned flags) #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \ aarch64_vectorize_vec_perm_const_ok +#undef TARGET_INIT_LIBFUNCS +#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs #undef TARGET_FIXED_CONDITION_CODE_REGS #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 385156482f2..535695c4f45 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -61,7 +61,9 @@ if (TARGET_FLOAT) \ { \ builtin_define ("__ARM_FEATURE_FMA"); \ - builtin_define_with_int_value ("__ARM_FP", 0x0C); \ + builtin_define_with_int_value ("__ARM_FP", 0x0E); \ + builtin_define ("__ARM_FP16_FORMAT_IEEE"); \ + builtin_define ("__ARM_FP16_ARGS"); \ } \ if (TARGET_SIMD) \ { \ diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 01cdf9c74d5..b7b04c4cbff 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -978,8 +978,8 @@ }) (define_expand "mov" - [(set (match_operand:GPF 0 "nonimmediate_operand" "") - (match_operand:GPF 1 "general_operand" ""))] + [(set (match_operand:GPF_F16 0 "nonimmediate_operand" "") + (match_operand:GPF_F16 1 "general_operand" ""))] "" { if (!TARGET_FLOAT) @@ -995,6 +995,26 @@ } ) +(define_insn "*movhf_aarch64" + [(set (match_operand:HF 0 "nonimmediate_operand" "=w, ?r,w,w,m,r,m ,r") + (match_operand:HF 1 "general_operand" "?rY, w,w,m,w,m,rY,r"))] + "TARGET_FLOAT && (register_operand (operands[0], HFmode) + || register_operand (operands[1], HFmode))" + "@ + mov\\t%0.h[0], %w1 + umov\\t%w0, %1.h[0] + mov\\t%0.h[0], %1.h[0] + ldr\\t%h0, %1 + str\\t%h1, %0 + ldrh\\t%w0, %1 + strh\\t%w1, %0 + mov\\t%w0, %w1" + [(set_attr "type" "neon_from_gp,neon_to_gp,fmov,\ + f_loads,f_stores,load1,store1,mov_reg") + (set_attr "simd" "yes,yes,yes,*,*,*,*,*") + (set_attr "fp" "*,*,*,yes,yes,*,*,*")] +) + (define_insn "*movsf_aarch64" [(set (match_operand:SF 0 "nonimmediate_operand" "=w, ?r,w,w ,w,m,r,m ,r") (match_operand:SF 1 "general_operand" "?rY, w,w,Ufc,m,w,m,rY,r"))] @@ -4093,6 +4113,22 @@ [(set_attr "type" "f_cvt")] ) +(define_insn "extendhfsf2" + [(set (match_operand:SF 0 "register_operand" "=w") + (float_extend:SF (match_operand:HF 1 "register_operand" "w")))] + "TARGET_FLOAT" + "fcvt\\t%s0, %h1" + [(set_attr "type" "f_cvt")] +) + +(define_insn "extendhfdf2" + [(set (match_operand:DF 0 "register_operand" "=w") + (float_extend:DF (match_operand:HF 1 "register_operand" "w")))] + "TARGET_FLOAT" + "fcvt\\t%d0, %h1" + [(set_attr "type" "f_cvt")] +) + (define_insn "truncdfsf2" [(set (match_operand:SF 0 "register_operand" "=w") (float_truncate:SF (match_operand:DF 1 "register_operand" "w")))] @@ -4101,6 +4137,22 @@ [(set_attr "type" "f_cvt")] ) +(define_insn "truncsfhf2" + [(set (match_operand:HF 0 "register_operand" "=w") + (float_truncate:HF (match_operand:SF 1 "register_operand" "w")))] + "TARGET_FLOAT" + "fcvt\\t%h0, %s1" + [(set_attr "type" "f_cvt")] +) + +(define_insn "truncdfhf2" + [(set (match_operand:HF 0 "register_operand" "=w") + (float_truncate:HF (match_operand:DF 1 "register_operand" "w")))] + "TARGET_FLOAT" + "fcvt\\t%h0, %d1" + [(set_attr "type" "f_cvt")] +) + (define_insn "fix_trunc2" [(set (match_operand:GPI 0 "register_operand" "=r") (fix:GPI (match_operand:GPF 1 "register_operand" "w")))] diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index b19d3d743a7..647817ea1a8 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -38,6 +38,9 @@ ;; Iterator for General Purpose Floating-point registers (32- and 64-bit modes) (define_mode_iterator GPF [SF DF]) +;; Iterator for General Purpose Float registers, inc __fp16. +(define_mode_iterator GPF_F16 [HF SF DF]) + ;; Integer vector modes. (define_mode_iterator VDQ_I [V8QI V16QI V4HI V8HI V2SI V4SI V2DI]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9c9d962ff42..5853526ea3d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2015-07-29 Alan Lawrence + + * gcc.target/aarch64/f16_movs_1.c: New test. + 2015-07-28 Tom de Vries * gcc.dg/autopar/uns-outer-4.c: Remove xfail on scan for parallelizing diff --git a/gcc/testsuite/gcc.target/aarch64/f16_movs_1.c b/gcc/testsuite/gcc.target/aarch64/f16_movs_1.c new file mode 100644 index 00000000000..6cb80866790 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/f16_movs_1.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ +/* { dg-options "-fno-inline -O2" } */ + +#include + +__fp16 +func2 (__fp16 a, __fp16 b) +{ + return b; +} + +int +main (int argc, char **argv) +{ + __fp16 array[16]; + int i; + + for (i = 0; i < sizeof (array) / sizeof (array[0]); i++) + array[i] = i; + + array[0] = func2 (array[1], array[2]); + + __builtin_printf ("%f\n", array[0]); /* { dg-output "2.0" } */ + + return 0; +}