From: James Greenhalgh Date: Fri, 5 Aug 2016 16:08:24 +0000 (+0000) Subject: [AArch64] Handle HFAs of float16 types properly X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1b62ed4f2638c59f30ff9480789b4cdeb740ddbe;p=gcc.git [AArch64] Handle HFAs of float16 types properly Fix PR Target/72819. gcc/ PR Target/72819 * config/aarch64/aarch64.h (aarch64_fp16_type_node): Declare. (aarch64_fp16_ptr_type_node): Likewise. * config/aarch64/aarch64-simd-builtins.c (aarch64_fp16_ptr_type_node): Define. (aarch64_init_fp16_types): New, refactored out of... (aarch64_init_builtins): ...here, update to call aarch64_init_fp16_types. * config/aarch64/aarch64.c (aarch64_gimplify_va_arg_expr): Handle HFmode. (aapcs_vfp_sub_candidate): Likewise. gcc/testsuite/ PR Target/72819 * gcc.target/aarch64/aapcs64/abitest-common.h: Define half-precision registers. * gcc.target/aarch64/aapcs64/abitest.S (dumpregs): Add assembly for saving the half-precision registers. * gcc.target/aarch64/aapcs64/func-ret-1.c: Test that an __fp16 value is returned in h0. * gcc.target/aarch64/aapcs64/test_2.c: Check that __FP16 arguments are passed in FP/SIMD registers. * gcc.target/aarch64/aapcs64/test_27.c: New, test that __fp16 HFA passing works corrcetly. * gcc.target/aarch64/aapcs64/type-def.h (hfa_f16x1_t): New. (hfa_f16x2_t): Likewise. (hfa_f16x3_t): Likewise. * gcc.target/aarch64/aapcs64/va_arg-1.c: Check that __fp16 values are promoted to double and passed in a double register. * gcc.target/aarch64/aapcs64/va_arg-2.c: Check that __fp16 values are promoted to double and stacked. * gcc.target/aarch64/aapcs64/va_arg-4.c: Check stacking of HFA of __fp16 data types. * gcc.target/aarch64/aapcs64/va_arg-5.c: Likewise. * gcc.target/aarch64/aapcs64/va_arg-16.c: New, check HFAs of __fp16 first get passed in FP/SIMD registers, then stacked. From-SVN: r239173 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b8d3e3040d0..1925a05d0e4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2016-08-05 James Greenhalgh + + PR Target/72819 + * config/aarch64/aarch64.h (aarch64_fp16_type_node): Declare. + (aarch64_fp16_ptr_type_node): Likewise. + * config/aarch64/aarch64-simd-builtins.c + (aarch64_fp16_ptr_type_node): Define. + (aarch64_init_fp16_types): New, refactored out of... + (aarch64_init_builtins): ...here, update to call + aarch64_init_fp16_types. + * config/aarch64/aarch64.c (aarch64_gimplify_va_arg_expr): Handle + HFmode. + (aapcs_vfp_sub_candidate): Likewise. + 2016-08-05 Martin Liska Joshua Cranmer diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index ca91d9108ea..1de325a0fc3 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -443,13 +443,15 @@ static struct aarch64_simd_type_info aarch64_simd_types [] = { }; #undef ENTRY -/* This type is not SIMD-specific; it is the user-visible __fp16. */ -static tree aarch64_fp16_type_node = NULL_TREE; - static tree aarch64_simd_intOI_type_node = NULL_TREE; static tree aarch64_simd_intCI_type_node = NULL_TREE; static tree aarch64_simd_intXI_type_node = NULL_TREE; +/* The user-visible __fp16 type, and a pointer to that type. Used + across the back-end. */ +tree aarch64_fp16_type_node = NULL_TREE; +tree aarch64_fp16_ptr_type_node = NULL_TREE; + static const char * aarch64_mangle_builtin_scalar_type (const_tree type) { @@ -883,6 +885,21 @@ aarch64_init_builtin_rsqrt (void) } } +/* Initialize the backend types that support the user-visible __fp16 + type, also initialize a pointer to that type, to be used when + forming HFAs. */ + +static void +aarch64_init_fp16_types (void) +{ + aarch64_fp16_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (aarch64_fp16_type_node) = 16; + layout_type (aarch64_fp16_type_node); + + (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16"); + aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node); +} + void aarch64_init_builtins (void) { @@ -904,11 +921,7 @@ aarch64_init_builtins (void) = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr, AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); - aarch64_fp16_type_node = make_node (REAL_TYPE); - TYPE_PRECISION (aarch64_fp16_type_node) = 16; - layout_type (aarch64_fp16_type_node); - - (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16"); + aarch64_init_fp16_types (); if (TARGET_SIMD) aarch64_init_simd_builtins (); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 2129a5de765..3e663eb5f13 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -9881,15 +9881,10 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, field_t = long_double_type_node; field_ptr_t = long_double_ptr_type_node; break; -/* The half precision and quad precision are not fully supported yet. Enable - the following code after the support is complete. Need to find the correct - type node for __fp16 *. */ -#if 0 case HFmode: - field_t = float_type_node; - field_ptr_t = float_ptr_type_node; + field_t = aarch64_fp16_type_node; + field_ptr_t = aarch64_fp16_ptr_type_node; break; -#endif case V2SImode: case V4SImode: { @@ -10051,7 +10046,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep) { case REAL_TYPE: mode = TYPE_MODE (type); - if (mode != DFmode && mode != SFmode && mode != TFmode) + if (mode != DFmode && mode != SFmode + && mode != TFmode && mode != HFmode) return -1; if (*modep == VOIDmode) @@ -10064,7 +10060,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep) case COMPLEX_TYPE: mode = TYPE_MODE (TREE_TYPE (type)); - if (mode != DFmode && mode != SFmode && mode != TFmode) + if (mode != DFmode && mode != SFmode + && mode != TFmode && mode != HFmode) return -1; if (*modep == VOIDmode) diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 003fec87e41..19caf9f2979 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -944,4 +944,9 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define ASM_OUTPUT_POOL_EPILOGUE aarch64_asm_output_pool_epilogue +/* This type is the user-visible __fp16, and a pointer to that type. We + need it in many places in the backend. Defined in aarch64-builtins.c. */ +extern tree aarch64_fp16_type_node; +extern tree aarch64_fp16_ptr_type_node; + #endif /* GCC_AARCH64_H */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e807b298fe3..c133af963ea 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,29 @@ +2016-08-05 James Greenhalgh + + PR Target/72819 + * gcc.target/aarch64/aapcs64/abitest-common.h: Define half-precision + registers. + * gcc.target/aarch64/aapcs64/abitest.S (dumpregs): Add assembly for + saving the half-precision registers. + * gcc.target/aarch64/aapcs64/func-ret-1.c: Test that an __fp16 + value is returned in h0. + * gcc.target/aarch64/aapcs64/test_2.c: Check that __FP16 arguments + are passed in FP/SIMD registers. + * gcc.target/aarch64/aapcs64/test_27.c: New, test that __fp16 HFA + passing works corrcetly. + * gcc.target/aarch64/aapcs64/type-def.h (hfa_f16x1_t): New. + (hfa_f16x2_t): Likewise. + (hfa_f16x3_t): Likewise. + * gcc.target/aarch64/aapcs64/va_arg-1.c: Check that __fp16 values + are promoted to double and passed in a double register. + * gcc.target/aarch64/aapcs64/va_arg-2.c: Check that __fp16 values + are promoted to double and stacked. + * gcc.target/aarch64/aapcs64/va_arg-4.c: Check stacking of HFA of + __fp16 data types. + * gcc.target/aarch64/aapcs64/va_arg-5.c: Likewise. + * gcc.target/aarch64/aapcs64/va_arg-16.c: New, check HFAs of + __fp16 first get passed in FP/SIMD registers, then stacked. + 2016-08-05 Nathan Sidwell PR c++/68724 diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest-common.h b/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest-common.h index 4e2ef0dace8..138de7321e9 100644 --- a/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest-common.h +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest-common.h @@ -57,7 +57,17 @@ #define X8 320 #define X9 328 -#define STACK 336 +#define H0 336 +#define H1 338 +#define H2 340 +#define H3 342 +#define H4 344 +#define H5 346 +#define H6 348 +#define H7 350 + + +#define STACK 352 /* The type of test. 'myfunc' in abitest.S needs to know which kind of test it is running to decide what to do at the runtime. Keep the diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest.S b/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest.S index c2fbd83fcb3..893e68cb994 100644 --- a/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest.S +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/abitest.S @@ -13,7 +13,12 @@ dumpregs: myfunc: mov x16, sp mov x17, sp - sub sp, sp, 352 // 336 for registers and 16 for old sp and lr + sub sp, sp, 368 // 352 for registers and 16 for old sp and lr + + sub x17, x17, 8 + st4 { v4.h, v5.h, v6.h, v7.h }[0], [x17] //344 + sub x17, x17, 8 + st4 { v0.h, v1.h, v2.h, v3.h }[0], [x17] //336 stp x8, x9, [x17, #-16]! //320 diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c index a21c9265cdd..29a1ca6d45b 100644 --- a/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/func-ret-1.c @@ -44,4 +44,5 @@ FUNC_VAL_CHECK (12, vf2_t, vf2, D0, f32in64) FUNC_VAL_CHECK (13, vi4_t, vi4, Q0, i32in128) FUNC_VAL_CHECK (14, int *, int_ptr, X0, flat) FUNC_VAL_CHECK (15, vlf1_t, vlf1, Q0, flat) +FUNC_VAL_CHECK (16, __fp16, 0xabcd, H0, flat) #endif diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/test_2.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_2.c index 94817ede3e1..ce7c60a8d7b 100644 --- a/gcc/testsuite/gcc.target/aarch64/aapcs64/test_2.c +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_2.c @@ -12,5 +12,6 @@ ARG(double, 4.0, D1) ARG(float, 2.0f, S2) ARG(double, 5.0, D3) + ARG(__fp16, 8.0f, H4) LAST_ARG(int, 3, W0) #endif diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/test_27.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_27.c new file mode 100644 index 00000000000..7bc79f5fcaf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/test_27.c @@ -0,0 +1,46 @@ +/* Test AAPCS64 layout + + Test named homogeneous floating-point aggregates of __fp16 data, + which should be passed in SIMD/FP registers or via the stack. */ + +/* { dg-do run { target aarch64*-*-* } } */ + +#ifndef IN_FRAMEWORK +#define TESTFILE "test_27.c" + +struct x0 +{ + __fp16 v[1]; +} f16x1; + +struct x1 +{ + __fp16 v[2]; +} f16x2; + +struct x2 +{ + __fp16 v[3]; +} f16x3; + +#define HAS_DATA_INIT_FUNC +void init_data () +{ + f16x1.v[0] = 2.0f; + f16x2.v[0] = 4.0f; + f16x2.v[1] = 8.0f; + f16x3.v[0] = 16.0f; + f16x3.v[1] = 32.0f; + f16x3.v[2] = 64.0f; +} + +#include "abitest.h" +#else +ARG (struct x0, f16x1, H0) +ARG (struct x1, f16x2, H1) +ARG (struct x2, f16x3, H3) +ARG (struct x1, f16x2, H6) +ARG (struct x0, f16x1, STACK) +ARG (int, 0xdeadbeef, W0) +LAST_ARG (double, 456.789, STACK+8) +#endif diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h b/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h index 3b9b3499ece..ca1fa5811ff 100644 --- a/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/type-def.h @@ -44,6 +44,24 @@ struct hfa_fx3_t float c; }; +struct hfa_f16x1_t +{ + __fp16 a; +}; + +struct hfa_f16x2_t +{ + __fp16 a; + __fp16 b; +}; + +struct hfa_f16x3_t +{ + __fp16 a; + __fp16 b; + __fp16 c; +}; + struct hfa_dx2_t { double a; diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-1.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-1.c index 4fb9a033ad4..5b9e0576dcd 100644 --- a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-1.c +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-1.c @@ -19,6 +19,8 @@ signed short ss = 0xcba9; signed int ss_promoted = 0xffffcba9; float fp = 65432.12345f; double fp_promoted = (double)65432.12345f; +__fp16 fp16 = 2.0f; +__fp16 fp16_promoted = (double)2.0f; #define HAS_DATA_INIT_FUNC void init_data () @@ -46,9 +48,13 @@ void init_data () ANON ( long double , 98765432123456789.987654321L, Q2, 12) ANON ( vf2_t, vf2 , D3, 13) ANON ( vi4_t, vi4 , Q4, 14) + /* 7.2: For unprototyped (i.e. pre- ANSI or K&R C) and variadic functions, + in addition to the normal conversions and promotions, arguments of + type __fp16 are converted to type double. */ + ANON_PROMOTED( __fp16, fp16 , double, fp16_promoted, D5, 15) #ifndef __AAPCS64_BIG_ENDIAN__ - LAST_ANON ( int , 0xeeee, STACK+32,15) + LAST_ANON ( int , 0xeeee, STACK+32,16) #else - LAST_ANON ( int , 0xeeee, STACK+36,15) + LAST_ANON ( int , 0xeeee, STACK+36,16) #endif #endif diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-16.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-16.c new file mode 100644 index 00000000000..73f8f1c7bef --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-16.c @@ -0,0 +1,28 @@ +/* Test AAPCS64 layout and __builtin_va_arg. + + This test is focused particularly on __fp16 unnamed homogeneous + floating-point aggregate types which should be passed in fp/simd + registers until we run out of those, then the stack. */ + +/* { dg-do run { target aarch64*-*-* } } */ + +#ifndef IN_FRAMEWORK +#define AAPCS64_TEST_STDARG +#define TESTFILE "va_arg-16.c" +#include "type-def.h" + +struct hfa_f16x1_t hfa_f16x1 = {2.0f}; +struct hfa_f16x2_t hfa_f16x2 = {4.0f, 8.0f}; +struct hfa_f16x3_t hfa_f16x3 = {16.0f, 32.0f, 64.0f}; + +#include "abitest.h" +#else + ARG (int, 1, W0, LAST_NAMED_ARG_ID) + DOTS + ANON (struct hfa_f16x1_t, hfa_f16x1, H0 , 0) + ANON (struct hfa_f16x2_t, hfa_f16x2, H1 , 1) + ANON (struct hfa_f16x3_t, hfa_f16x3, H3 , 2) + ANON (struct hfa_f16x2_t, hfa_f16x2, H6 , 3) + ANON (struct hfa_f16x1_t, hfa_f16x1, STACK , 4) + LAST_ANON(double , 1.0 , STACK+8, 5) +#endif diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-2.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-2.c index e972691f4ae..8f2f8811b61 100644 --- a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-2.c +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-2.c @@ -19,6 +19,8 @@ signed short ss = 0xcba9; signed int ss_promoted = 0xffffcba9; float fp = 65432.12345f; double fp_promoted = (double)65432.12345f; +__fp16 fp16 = 2.0f; +__fp16 fp16_promoted = (double)2.0f; #define HAS_DATA_INIT_FUNC void init_data () @@ -64,9 +66,10 @@ void init_data () ANON ( long double , 98765432123456789.987654321L, STACK+80, 20) ANON ( vf2_t, vf2 , STACK+96, 21) ANON ( vi4_t, vi4 , STACK+112,22) + ANON_PROMOTED( __fp16 , fp16 , double, fp16_promoted, STACK+128,23) #ifndef __AAPCS64_BIG_ENDIAN__ - LAST_ANON ( int , 0xeeee, STACK+128,23) + LAST_ANON ( int , 0xeeee, STACK+136,24) #else - LAST_ANON ( int , 0xeeee, STACK+132,23) + LAST_ANON ( int , 0xeeee, STACK+140,24) #endif #endif diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-4.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-4.c index fab3575756d..010ad8b3f67 100644 --- a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-4.c +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-4.c @@ -29,6 +29,8 @@ struct non_hfa_ffvf2_t non_hfa_ffvf2; struct non_hfa_fffd_t non_hfa_fffd = {33.f, 34.f, 35.f, 36.0}; union hfa_union_t hfa_union; union non_hfa_union_t non_hfa_union; +struct hfa_f16x2_t hfa_f16x2 = {2.0f, 4.0f}; +struct hfa_f16x3_t hfa_f16x3 = {2.0f, 4.0f, 8.0f}; #define HAS_DATA_INIT_FUNC void init_data () @@ -89,9 +91,12 @@ void init_data () PTR_ANON (struct non_hfa_ffs_t , non_hfa_ffs , STACK+120, 18) ANON (struct non_hfa_ffs_2_t, non_hfa_ffs_2, STACK+128, 19) ANON (union non_hfa_union_t, non_hfa_union, STACK+144, 20) + /* HFA of __fp16 passed on stack, directed __fp16 test is va_arg-10.c. */ + ANON (struct hfa_f16x2_t , hfa_f16x2 , STACK+152, 21) + ANON (struct hfa_f16x3_t , hfa_f16x3 , STACK+160, 22) #ifndef __AAPCS64_BIG_ENDIAN__ - LAST_ANON(int , 2 , STACK+152, 30) + LAST_ANON(int , 2 , STACK+168, 30) #else - LAST_ANON(int , 2 , STACK+156, 30) + LAST_ANON(int , 2 , STACK+172, 30) #endif #endif diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-5.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-5.c index 4853f92a297..e54f1f51fb6 100644 --- a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-5.c +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-5.c @@ -17,6 +17,8 @@ struct hfa_dx4_t hfa_dx4 = {1234.123, 2345.234, 3456.345, 4567.456}; struct hfa_ldx3_t hfa_ldx3 = {123456.7890, 234567.8901, 345678.9012}; struct hfa_ffs_t hfa_ffs; union hfa_union_t hfa_union; +struct hfa_f16x2_t hfa_f16x2 = {2.0f, 4.0f}; +struct hfa_f16x3_t hfa_f16x3 = {2.0f, 4.0f, 8.0f}; #define HAS_DATA_INIT_FUNC void init_data () @@ -43,5 +45,8 @@ void init_data () ANON (struct hfa_fx1_t , hfa_fx1 , STACK+24, 4) ANON (struct hfa_fx2_t , hfa_fx2 , STACK+32, 5) ANON (struct hfa_dx2_t , hfa_dx2 , STACK+40, 6) - LAST_ANON(double , 1.0 , STACK+56, 7) + /* HFA of __fp16 passed on stack, directed __fp16 test is va_arg-10.c. */ + ANON (struct hfa_f16x2_t, hfa_f16x2, STACK+56, 7) + ANON (struct hfa_f16x3_t, hfa_f16x3, STACK+64, 8) + LAST_ANON(double , 1.0 , STACK+72, 9) #endif