+2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com>
+
+ * config.gcc: Add arm_bf16.h.
+ * config/aarch64/aarch64-builtins.c
+ (aarch64_simd_builtin_std_type): Add BFmode.
+ (aarch64_init_simd_builtin_types): Define element types for vector
+ types.
+ (aarch64_init_bf16_types): New function.
+ (aarch64_general_init_builtins): Add arm_init_bf16_types function call.
+ * config/aarch64/aarch64-modes.def: Add BFmode and V4BF, V8BF vector
+ modes.
+ * config/aarch64/aarch64-simd-builtin-types.def: Add BF SIMD types.
+ * config/aarch64/aarch64-simd.md: Add BF vector types to NEON move
+ patterns.
+ * config/aarch64/aarch64.h (AARCH64_VALID_SIMD_DREG_MODE): Add V4BF.
+ (AARCH64_VALID_SIMD_QREG_MODE): Add V8BF.
+ * config/aarch64/aarch64.c
+ (aarch64_classify_vector_mode): Add support for BF types.
+ (aarch64_gimplify_va_arg_expr): Add support for BF types.
+ (aarch64_vq_mode): Add support for BF types.
+ (aarch64_simd_container_mode): Add support for BF types.
+ (aarch64_mangle_type): Add support for BF scalar type.
+ * config/aarch64/aarch64.md: Add BFmode to movhf pattern.
+ * config/aarch64/arm_bf16.h: New file.
+ * config/aarch64/arm_neon.h: Add arm_bf16.h and Bfloat vector types.
+ * config/aarch64/iterators.md: Add BF types to mode attributes.
+ (HFBF, GPF_TF_F16_MOV, VDMOV, VQMOV, VQMOV_NO2Em VALL_F16MOV): New.
+
2020-01-10 Jason Merrill <jason@redhat.com>
PR c++/93173 - incorrect tree sharing.
;;
aarch64*-*-*)
cpu_type=aarch64
- extra_headers="arm_fp16.h arm_neon.h arm_acle.h arm_sve.h"
+ extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h"
c_target_objs="aarch64-c.o"
cxx_target_objs="aarch64-c.o"
d_target_objs="aarch64-d.o"
#define hi_UP E_HImode
#define hf_UP E_HFmode
#define qi_UP E_QImode
+#define bf_UP E_BFmode
+#define v4bf_UP E_V4BFmode
+#define v8bf_UP E_V8BFmode
#define UP(X) X##_UP
#define SIMD_MAX_BUILTIN_ARGS 5
tree aarch64_fp16_type_node = NULL_TREE;
tree aarch64_fp16_ptr_type_node = NULL_TREE;
+/* Back-end node type for brain float (bfloat) types. */
+tree aarch64_bf16_type_node = NULL_TREE;
+tree aarch64_bf16_ptr_type_node = NULL_TREE;
+
/* Wrapper around add_builtin_function. NAME is the name of the built-in
function, TYPE is the function type, and CODE is the function subcode
(relative to AARCH64_BUILTIN_GENERAL). */
return float_type_node;
case E_DFmode:
return double_type_node;
+ case E_BFmode:
+ return aarch64_bf16_type_node;
default:
gcc_unreachable ();
}
aarch64_simd_types[Float64x1_t].eltype = double_type_node;
aarch64_simd_types[Float64x2_t].eltype = double_type_node;
+ /* Init Bfloat vector types with underlying __bf16 type. */
+ aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node;
+ aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node;
+
for (i = 0; i < nelts; i++)
{
tree eltype = aarch64_simd_types[i].eltype;
aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node);
}
+/* Initialize the backend REAL_TYPE type supporting bfloat types. */
+static void
+aarch64_init_bf16_types (void)
+{
+ aarch64_bf16_type_node = make_node (REAL_TYPE);
+ TYPE_PRECISION (aarch64_bf16_type_node) = 16;
+ SET_TYPE_MODE (aarch64_bf16_type_node, BFmode);
+ layout_type (aarch64_bf16_type_node);
+
+ lang_hooks.types.register_builtin_type (aarch64_bf16_type_node, "__bf16");
+ aarch64_bf16_ptr_type_node = build_pointer_type (aarch64_bf16_type_node);
+}
+
/* Pointer authentication builtins that will become NOP on legacy platform.
Currently, these builtins are for internal use only (libgcc EH unwinder). */
aarch64_init_fp16_types ();
+ aarch64_init_bf16_types ();
+
if (TARGET_SIMD)
aarch64_init_simd_builtins ();
VECTOR_MODE (FLOAT, DF, 1); /* V1DF. */
VECTOR_MODE (FLOAT, HF, 2); /* V2HF. */
+/* Bfloat16 modes. */
+FLOAT_MODE (BF, 2, 0);
+ADJUST_FLOAT_FORMAT (BF, &arm_bfloat_half_format);
+
+VECTOR_MODE (FLOAT, BF, 4); /* V4BF. */
+VECTOR_MODE (FLOAT, BF, 8); /* V8BF. */
+
/* Oct Int: 256-bit integer mode needed for 32-byte vector arguments. */
INT_MODE (OI, 32);
ENTRY (Float32x4_t, V4SF, none, 13)
ENTRY (Float64x1_t, V1DF, none, 13)
ENTRY (Float64x2_t, V2DF, none, 13)
+ ENTRY (Bfloat16x4_t, V4BF, none, 14)
+ ENTRY (Bfloat16x8_t, V8BF, none, 14)
;; <http://www.gnu.org/licenses/>.
(define_expand "mov<mode>"
- [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
- (match_operand:VALL_F16 1 "general_operand"))]
+ [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand")
+ (match_operand:VALL_F16MOV 1 "general_operand"))]
"TARGET_SIMD"
"
/* Force the operand into a register if it is not an
[(set_attr "type" "neon_dup<q>")]
)
-(define_insn "*aarch64_simd_mov<VD:mode>"
- [(set (match_operand:VD 0 "nonimmediate_operand"
+(define_insn "*aarch64_simd_mov<VDMOV:mode>"
+ [(set (match_operand:VDMOV 0 "nonimmediate_operand"
"=w, m, m, w, ?r, ?w, ?r, w")
- (match_operand:VD 1 "general_operand"
+ (match_operand:VDMOV 1 "general_operand"
"m, Dz, w, w, w, r, r, Dn"))]
"TARGET_SIMD
&& (register_operand (operands[0], <MODE>mode)
mov_reg, neon_move<q>")]
)
-(define_insn "*aarch64_simd_mov<VQ:mode>"
- [(set (match_operand:VQ 0 "nonimmediate_operand"
+(define_insn "*aarch64_simd_mov<VQMOV:mode>"
+ [(set (match_operand:VQMOV 0 "nonimmediate_operand"
"=w, Umn, m, w, ?r, ?w, ?r, w")
- (match_operand:VQ 1 "general_operand"
+ (match_operand:VQMOV 1 "general_operand"
"m, Dz, w, w, w, r, r, Dn"))]
"TARGET_SIMD
&& (register_operand (operands[0], <MODE>mode)
(define_split
- [(set (match_operand:VQ 0 "register_operand" "")
- (match_operand:VQ 1 "register_operand" ""))]
+ [(set (match_operand:VQMOV 0 "register_operand" "")
+ (match_operand:VQMOV 1 "register_operand" ""))]
"TARGET_SIMD && reload_completed
&& GP_REGNUM_P (REGNO (operands[0]))
&& GP_REGNUM_P (REGNO (operands[1]))"
})
(define_split
- [(set (match_operand:VQ 0 "register_operand" "")
- (match_operand:VQ 1 "register_operand" ""))]
+ [(set (match_operand:VQMOV 0 "register_operand" "")
+ (match_operand:VQMOV 1 "register_operand" ""))]
"TARGET_SIMD && reload_completed
&& ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
|| (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
})
(define_expand "@aarch64_split_simd_mov<mode>"
- [(set (match_operand:VQ 0)
- (match_operand:VQ 1))]
+ [(set (match_operand:VQMOV 0)
+ (match_operand:VQMOV 1))]
"TARGET_SIMD"
{
rtx dst = operands[0];
(define_insn "aarch64_simd_mov_from_<mode>low"
[(set (match_operand:<VHALF> 0 "register_operand" "=r")
(vec_select:<VHALF>
- (match_operand:VQ 1 "register_operand" "w")
- (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
+ (match_operand:VQMOV 1 "register_operand" "w")
+ (match_operand:VQMOV 2 "vect_par_cnst_lo_half" "")))]
"TARGET_SIMD && reload_completed"
"umov\t%0, %1.d[0]"
[(set_attr "type" "neon_to_gp<q>")
(define_insn "aarch64_simd_mov_from_<mode>high"
[(set (match_operand:<VHALF> 0 "register_operand" "=r")
(vec_select:<VHALF>
- (match_operand:VQ 1 "register_operand" "w")
- (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
+ (match_operand:VQMOV 1 "register_operand" "w")
+ (match_operand:VQMOV 2 "vect_par_cnst_hi_half" "")))]
"TARGET_SIMD && reload_completed"
"umov\t%0, %1.d[1]"
[(set_attr "type" "neon_to_gp<q>")
;; On big-endian this is { zeroes, operand }
(define_insn "move_lo_quad_internal_<mode>"
- [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
- (vec_concat:VQ_NO2E
+ [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w")
+ (vec_concat:VQMOV_NO2E
(match_operand:<VHALF> 1 "register_operand" "w,r,r")
(vec_duplicate:<VHALF> (const_int 0))))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
)
(define_insn "move_lo_quad_internal_be_<mode>"
- [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
- (vec_concat:VQ_NO2E
+ [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w")
+ (vec_concat:VQMOV_NO2E
(vec_duplicate:<VHALF> (const_int 0))
(match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
)
(define_expand "move_lo_quad_<mode>"
- [(match_operand:VQ 0 "register_operand")
- (match_operand:VQ 1 "register_operand")]
+ [(match_operand:VQMOV 0 "register_operand")
+ (match_operand:VQMOV 1 "register_operand")]
"TARGET_SIMD"
{
if (BYTES_BIG_ENDIAN)
;; For big-endian this is { operand1, operand2 }
(define_insn "aarch64_simd_move_hi_quad_<mode>"
- [(set (match_operand:VQ 0 "register_operand" "+w,w")
- (vec_concat:VQ
+ [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
+ (vec_concat:VQMOV
(vec_select:<VHALF>
(match_dup 0)
- (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
+ (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))
(match_operand:<VHALF> 1 "register_operand" "w,r")))]
"TARGET_SIMD && !BYTES_BIG_ENDIAN"
"@
)
(define_insn "aarch64_simd_move_hi_quad_be_<mode>"
- [(set (match_operand:VQ 0 "register_operand" "+w,w")
- (vec_concat:VQ
+ [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
+ (vec_concat:VQMOV
(match_operand:<VHALF> 1 "register_operand" "w,r")
(vec_select:<VHALF>
(match_dup 0)
- (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
+ (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))))]
"TARGET_SIMD && BYTES_BIG_ENDIAN"
"@
ins\\t%0.d[1], %1.d[0]
)
(define_expand "move_hi_quad_<mode>"
- [(match_operand:VQ 0 "register_operand")
+ [(match_operand:VQMOV 0 "register_operand")
(match_operand:<VHALF> 1 "register_operand")]
"TARGET_SIMD"
{
case E_V2SImode:
/* ...E_V1DImode doesn't exist. */
case E_V4HFmode:
+ case E_V4BFmode:
case E_V2SFmode:
case E_V1DFmode:
/* 128-bit Advanced SIMD vectors. */
case E_V4SImode:
case E_V2DImode:
case E_V8HFmode:
+ case E_V8BFmode:
case E_V4SFmode:
case E_V2DFmode:
return TARGET_SIMD ? VEC_ADVSIMD : 0;
field_t = aarch64_fp16_type_node;
field_ptr_t = aarch64_fp16_ptr_type_node;
break;
+ case E_BFmode:
+ field_t = aarch64_bf16_type_node;
+ field_ptr_t = aarch64_bf16_ptr_type_node;
+ break;
case E_V2SImode:
case E_V4SImode:
{
return V4SFmode;
case E_HFmode:
return V8HFmode;
+ case E_BFmode:
+ return V8BFmode;
case E_SImode:
return V4SImode;
case E_HImode:
return V2SFmode;
case E_HFmode:
return V4HFmode;
+ case E_BFmode:
+ return V4BFmode;
case E_SImode:
return V2SImode;
case E_HImode:
if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
return "St9__va_list";
- /* Half-precision float. */
+ /* Half-precision floating point types. */
if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
- return "Dh";
+ {
+ if (TYPE_MODE (type) == BFmode)
+ return "u6__bf16";
+ else
+ return "Dh";
+ }
/* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
builtin types. */
#define AARCH64_VALID_SIMD_DREG_MODE(MODE) \
((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \
|| (MODE) == V2SFmode || (MODE) == V4HFmode || (MODE) == DImode \
- || (MODE) == DFmode)
+ || (MODE) == DFmode || (MODE) == V4BFmode)
/* Modes valid for AdvSIMD Q registers. */
#define AARCH64_VALID_SIMD_QREG_MODE(MODE) \
((MODE) == V4SImode || (MODE) == V8HImode || (MODE) == V16QImode \
|| (MODE) == V4SFmode || (MODE) == V8HFmode || (MODE) == V2DImode \
- || (MODE) == V2DFmode)
+ || (MODE) == V2DFmode || (MODE) == V8BFmode)
#define ENDIAN_LANE_N(NUNITS, N) \
(BYTES_BIG_ENDIAN ? NUNITS - 1 - N : N)
extern tree aarch64_fp16_type_node;
extern tree aarch64_fp16_ptr_type_node;
+/* This type is the user-visible __bf16, and a pointer to that type. Defined
+ in aarch64-builtins.c. */
+extern tree aarch64_bf16_type_node;
+extern tree aarch64_bf16_ptr_type_node;
+
/* The generic unwind code in libgcc does not initialize the frame pointer.
So in order to unwind a function using a frame pointer, the very first
function that is unwound must save the frame pointer. That way the frame
})
(define_expand "mov<mode>"
- [(set (match_operand:GPF_TF_F16 0 "nonimmediate_operand")
- (match_operand:GPF_TF_F16 1 "general_operand"))]
+ [(set (match_operand:GPF_TF_F16_MOV 0 "nonimmediate_operand")
+ (match_operand:GPF_TF_F16_MOV 1 "general_operand"))]
""
{
if (!TARGET_FLOAT)
}
)
-(define_insn "*movhf_aarch64"
- [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w , w,?r,w,w ,w ,w,m,r,m ,r")
- (match_operand:HF 1 "general_operand" "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))]
- "TARGET_FLOAT && (register_operand (operands[0], HFmode)
- || aarch64_reg_or_fp_zero (operands[1], HFmode))"
+(define_insn "*mov<mode>_aarch64"
+ [(set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w , w,?r,w,w ,w ,w,m,r,m ,r")
+ (match_operand:HFBF 1 "general_operand" "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))]
+ "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
+ || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
"@
movi\\t%0.4h, #0
fmov\\t%h0, %w1
--- /dev/null
+/* Arm BF16 instrinsics include file.
+
+ Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ Contributed by Arm.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version.
+
+ GCC is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+ License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _AARCH64_BF16_H_
+#define _AARCH64_BF16_H_
+
+typedef __bf16 bfloat16_t;
+
+#endif
typedef float float32_t;
typedef double float64_t;
+typedef __Bfloat16x4_t bfloat16x4_t;
+typedef __Bfloat16x8_t bfloat16x8_t;
+
typedef struct int8x8x2_t
{
int8x8_t val[2];
#pragma GCC pop_options
+#include "arm_bf16.h"
+
#undef __aarch64_vget_lane_any
#undef __aarch64_vdup_lane_any
;; Iterator for all scalar floating point modes (HF, SF, DF)
(define_mode_iterator GPF_HF [HF SF DF])
+;; Iterator for all 16-bit scalar floating point modes (HF, BF)
+(define_mode_iterator HFBF [HF BF])
+
;; Iterator for all scalar floating point modes (HF, SF, DF and TF)
(define_mode_iterator GPF_TF_F16 [HF SF DF TF])
+;; Iterator for all scalar floating point modes suitable for moving, including
+;; special BF type (HF, SF, DF, TF and BF)
+(define_mode_iterator GPF_TF_F16_MOV [HF BF SF DF TF])
+
;; Double vector modes.
(define_mode_iterator VDF [V2SF V4HF])
;; Double vector modes.
(define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF])
+;; Double vector modes suitable for moving. Includes BFmode.
+(define_mode_iterator VDMOV [V8QI V4HI V4HF V4BF V2SI V2SF])
+
;; All modes stored in registers d0-d31.
(define_mode_iterator DREG [V8QI V4HI V4HF V2SI V2SF DF])
;; Copy of the above.
(define_mode_iterator VQ2 [V16QI V8HI V4SI V2DI V8HF V4SF V2DF])
+;; Quad vector modes suitable for moving. Includes BFmode.
+(define_mode_iterator VQMOV [V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF])
+
+;; VQMOV without 2-element modes.
+(define_mode_iterator VQMOV_NO2E [V16QI V8HI V4SI V8HF V8BF V4SF])
+
;; Quad integer vector modes.
(define_mode_iterator VQ_I [V16QI V8HI V4SI V2DI])
(define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
V4HF V8HF V2SF V4SF V2DF])
+;; All Advanced SIMD modes suitable for moving, loading, and storing,
+;; including special Bfloat vector types.
+(define_mode_iterator VALL_F16MOV [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
+ V4HF V8HF V4BF V8BF V2SF V4SF V2DF])
+
;; The VALL_F16 modes except the 128-bit 2-element ones.
(define_mode_iterator VALL_F16_NO_V2Q [V8QI V16QI V4HI V8HI V2SI V4SI
V4HF V8HF V2SF V4SF])
(V2SI "2") (V4SI "4")
(V2DI "2")
(V4HF "4") (V8HF "8")
+ (V4BF "4") (V8BF "8")
(V2SF "2") (V4SF "4")
(V1DF "1") (V2DF "2")
(DI "1") (DF "1")])
(V8HF "16b") (V2SF "8b")
(V4SF "16b") (V2DF "16b")
(DI "8b") (DF "8b")
- (SI "8b") (SF "8b")])
+ (SI "8b") (SF "8b")
+ (V4BF "8b") (V8BF "16b")])
;; Define element mode for each vector mode.
(define_mode_attr VEL [(V8QI "QI") (V16QI "QI")
(V2SI "SI") (V4SI "V2SI")
(V2DI "DI") (V2SF "SF")
(V4SF "V2SF") (V4HF "V2HF")
- (V8HF "V4HF") (V2DF "DF")])
+ (V8HF "V4HF") (V2DF "DF")
+ (V8BF "V4BF")])
;; Half modes of all vector modes, in lower-case.
(define_mode_attr Vhalf [(V8QI "v4qi") (V16QI "v8qi")
(V4HI "v2hi") (V8HI "v4hi")
- (V8HF "v4hf")
+ (V8HF "v4hf") (V8BF "v4bf")
(V2SI "si") (V4SI "v2si")
(V2DI "di") (V2SF "sf")
(V4SF "v2sf") (V2DF "df")])
(V2SI "") (V4SI "_q")
(DI "") (V2DI "_q")
(V4HF "") (V8HF "_q")
+ (V4BF "") (V8BF "_q")
(V2SF "") (V4SF "_q")
(V2DF "_q")
(QI "") (HI "") (SI "") (DI "") (HF "") (SF "") (DF "")])
+2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com>
+
+ * g++.dg/abi/mangle-neon-aarch64.C: Add Bfloat SIMD types to test.
+ * g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C: New test.
+ * gcc.target/aarch64/bfloat16_scalar_1.c: New test.
+ * gcc.target/aarch64/bfloat16_scalar_2.c: New test.
+ * gcc.target/aarch64/bfloat16_scalar_3.c: New test.
+ * gcc.target/aarch64/bfloat16_scalar_4.c: New test.
+ * gcc.target/aarch64/bfloat16_simd_1.c: New test.
+ * gcc.target/aarch64/bfloat16_simd_2.c: New test.
+ * gcc.target/aarch64/bfloat16_simd_3.c: New test.
+
2020-01-10 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/struct_vect_1.c (N): Protect with #ifndef.
void f5 (uint32x2_t a) {}
void f23 (uint64x1_t a) {}
void f61 (float16x4_t a) {}
+void f62 (bfloat16x4_t a) {}
void f6 (float32x2_t a) {}
void f7 (poly8x8_t a) {}
void f8 (poly16x4_t a) {}
void f15 (uint32x4_t a) {}
void f16 (uint64x2_t a) {}
void f171 (float16x8_t a) {}
+void f172 (bfloat16x8_t a) {}
void f17 (float32x4_t a) {}
void f18 (float64x2_t a) {}
void f19 (poly8x16_t a) {}
// { dg-final { scan-assembler "_Z2f512__Uint32x2_t:" } }
// { dg-final { scan-assembler "_Z3f2312__Uint64x1_t:" } }
// { dg-final { scan-assembler "_Z3f6113__Float16x4_t:" } }
+// { dg-final { scan-assembler "_Z3f6214__Bfloat16x4_t:" } }
// { dg-final { scan-assembler "_Z2f613__Float32x2_t:" } }
// { dg-final { scan-assembler "_Z2f711__Poly8x8_t:" } }
// { dg-final { scan-assembler "_Z2f812__Poly16x4_t:" } }
// { dg-final { scan-assembler "_Z3f1512__Uint32x4_t:" } }
// { dg-final { scan-assembler "_Z3f1612__Uint64x2_t:" } }
// { dg-final { scan-assembler "_Z4f17113__Float16x8_t:" } }
+// { dg-final { scan-assembler "_Z4f17214__Bfloat16x8_t:" } }
// { dg-final { scan-assembler "_Z3f1713__Float32x4_t:" } }
// { dg-final { scan-assembler "_Z3f1813__Float64x2_t:" } }
// { dg-final { scan-assembler "_Z3f1912__Poly8x16_t:" } }
--- /dev/null
+/* { dg-do compile { target aarch64*-*-* } } */
+
+/* Test mangling */
+
+/* { dg-final { scan-assembler "\t.global\t_Z1fPu6__bf16" } } */
+void f (__bf16 *x) { }
+
+/* { dg-final { scan-assembler "\t.global\t_Z1gPu6__bf16S_" } } */
+void g (__bf16 *x, __bf16 *y) { }
+
+/* { dg-final { scan-assembler "\t.global\t_ZN1SIu6__bf16u6__bf16E1iE" } } */
+template <typename T, typename U> struct S { static int i; };
+template <> int S<__bf16, __bf16>::i = 3;
--- /dev/null
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-O3 --save-temps -std=gnu90" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_bf16.h>
+
+/*
+**stacktest1:
+** sub sp, sp, #16
+** str h0, \[sp, 14\]
+** ldr h0, \[sp, 14\]
+** add sp, sp, 16
+** ret
+*/
+bfloat16_t stacktest1 (bfloat16_t __a)
+{
+ volatile bfloat16_t b = __a;
+ return b;
+}
+
+/*
+**bfloat_mov_ww:
+** mov v1.h\[0\], v2.h\[0\]
+** ret
+*/
+void bfloat_mov_ww (void)
+{
+ register bfloat16_t x asm ("h2");
+ register bfloat16_t y asm ("h1");
+ asm volatile ("" : "=w" (x));
+ y = x;
+ asm volatile ("" :: "w" (y));
+}
+
+/*
+**bfloat_mov_rw:
+** dup v1.4h, w1
+** ret
+*/
+void bfloat_mov_rw (void)
+{
+ register bfloat16_t x asm ("w1");
+ register bfloat16_t y asm ("h1");
+ asm volatile ("" : "=r" (x));
+ y = x;
+ asm volatile ("" :: "w" (y));
+}
+
+/*
+**bfloat_mov_wr:
+** umov w1, v1.h\[0\]
+** ret
+*/
+void bfloat_mov_wr (void)
+{
+ register bfloat16_t x asm ("h1");
+ register bfloat16_t y asm ("w1");
+ asm volatile ("" : "=w" (x));
+ y = x;
+ asm volatile ("" :: "r" (y));
+}
+
+/*
+**bfloat_mov_rr:
+** mov w1, w2
+** ret
+*/
+void bfloat_mov_rr (void)
+{
+ register bfloat16_t x asm ("w2");
+ register bfloat16_t y asm ("w1");
+ asm volatile ("" : "=r" (x));
+ y = x;
+ asm volatile ("" :: "r" (y));
+}
+
+/*
+**bfloat_mov_rm:
+** strh w2, \[x0\]
+** ret
+*/
+void bfloat_mov_rm (bfloat16_t *ptr)
+{
+ register bfloat16_t x asm ("w2");
+ asm volatile ("" : "=r" (x));
+ *ptr = x;
+}
+
+/*
+**bfloat_mov_mr:
+** ldrh w2, \[x0\]
+** ret
+*/
+void bfloat_mov_mr (bfloat16_t *ptr)
+{
+ register bfloat16_t y asm ("w2");
+ y = *ptr;
+ asm volatile ("" :: "r" (y));
+}
+
--- /dev/null
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-additional-options "-march=armv8.2-a -O3 --save-temps -std=gnu90" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_bf16.h>
+
+#pragma GCC push_options
+#pragma GCC target ("+bf16")
+
+/*
+**stacktest1:
+** sub sp, sp, #16
+** str h0, \[sp, 14\]
+** ldr h0, \[sp, 14\]
+** add sp, sp, 16
+** ret
+*/
+bfloat16_t stacktest1 (bfloat16_t __a)
+{
+ volatile bfloat16_t b = __a;
+ return b;
+}
+
+/*
+**bfloat_mov_ww:
+** mov v1.h\[0\], v2.h\[0\]
+** ret
+*/
+void bfloat_mov_ww (void)
+{
+ register bfloat16_t x asm ("h2");
+ register bfloat16_t y asm ("h1");
+ asm volatile ("" : "=w" (x));
+ y = x;
+ asm volatile ("" :: "w" (y));
+}
+
+/*
+**bfloat_mov_rw:
+** dup v1.4h, w1
+** ret
+*/
+void bfloat_mov_rw (void)
+{
+ register bfloat16_t x asm ("w1");
+ register bfloat16_t y asm ("h1");
+ asm volatile ("" : "=r" (x));
+ y = x;
+ asm volatile ("" :: "w" (y));
+}
+
+/*
+**bfloat_mov_wr:
+** umov w1, v1.h\[0\]
+** ret
+*/
+void bfloat_mov_wr (void)
+{
+ register bfloat16_t x asm ("h1");
+ register bfloat16_t y asm ("w1");
+ asm volatile ("" : "=w" (x));
+ y = x;
+ asm volatile ("" :: "r" (y));
+}
+
+/*
+**bfloat_mov_rr:
+** mov w1, w2
+** ret
+*/
+void bfloat_mov_rr (void)
+{
+ register bfloat16_t x asm ("w2");
+ register bfloat16_t y asm ("w1");
+ asm volatile ("" : "=r" (x));
+ y = x;
+ asm volatile ("" :: "r" (y));
+}
+
+/*
+**bfloat_mov_rm:
+** strh w2, \[x0\]
+** ret
+*/
+void bfloat_mov_rm (bfloat16_t *ptr)
+{
+ register bfloat16_t x asm ("w2");
+ asm volatile ("" : "=r" (x));
+ *ptr = x;
+}
+
+/*
+**bfloat_mov_mr:
+** ldrh w2, \[x0\]
+** ret
+*/
+void bfloat_mov_mr (bfloat16_t *ptr)
+{
+ register bfloat16_t y asm ("w2");
+ y = *ptr;
+ asm volatile ("" :: "r" (y));
+}
+
+#pragma GCC pop_options
+
--- /dev/null
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-additional-options "-march=armv8.2-a -O3 --save-temps -std=gnu90" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_bf16.h>
+
+/*
+**stacktest1:
+** sub sp, sp, #16
+** str h0, \[sp, 14\]
+** ldr h0, \[sp, 14\]
+** add sp, sp, 16
+** ret
+*/
+bfloat16_t stacktest1 (bfloat16_t __a)
+{
+ volatile bfloat16_t b = __a;
+ return b;
+}
+
+/*
+**bfloat_mov_ww:
+** mov v1.h\[0\], v2.h\[0\]
+** ret
+*/
+void bfloat_mov_ww (void)
+{
+ register bfloat16_t x asm ("h2");
+ register bfloat16_t y asm ("h1");
+ asm volatile ("" : "=w" (x));
+ y = x;
+ asm volatile ("" :: "w" (y));
+}
+
+/*
+**bfloat_mov_rw:
+** dup v1.4h, w1
+** ret
+*/
+void bfloat_mov_rw (void)
+{
+ register bfloat16_t x asm ("w1");
+ register bfloat16_t y asm ("h1");
+ asm volatile ("" : "=r" (x));
+ y = x;
+ asm volatile ("" :: "w" (y));
+}
+
+/*
+**bfloat_mov_wr:
+** umov w1, v1.h\[0\]
+** ret
+*/
+void bfloat_mov_wr (void)
+{
+ register bfloat16_t x asm ("h1");
+ register bfloat16_t y asm ("w1");
+ asm volatile ("" : "=w" (x));
+ y = x;
+ asm volatile ("" :: "r" (y));
+}
+
+/*
+**bfloat_mov_rr:
+** mov w1, w2
+** ret
+*/
+void bfloat_mov_rr (void)
+{
+ register bfloat16_t x asm ("w2");
+ register bfloat16_t y asm ("w1");
+ asm volatile ("" : "=r" (x));
+ y = x;
+ asm volatile ("" :: "r" (y));
+}
+
+/*
+**bfloat_mov_rm:
+** strh w2, \[x0\]
+** ret
+*/
+void bfloat_mov_rm (bfloat16_t *ptr)
+{
+ register bfloat16_t x asm ("w2");
+ asm volatile ("" : "=r" (x));
+ *ptr = x;
+}
+
+/*
+**bfloat_mov_mr:
+** ldrh w2, \[x0\]
+** ret
+*/
+void bfloat_mov_mr (bfloat16_t *ptr)
+{
+ register bfloat16_t y asm ("w2");
+ y = *ptr;
+ asm volatile ("" :: "r" (y));
+}
+
--- /dev/null
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-std=c99 -pedantic-errors -O3 --save-temps" } */
+
+#include <arm_bf16.h>
+
+_Complex bfloat16_t stacktest1 (_Complex bfloat16_t __a)
+{
+ volatile _Complex bfloat16_t b = __a;
+ return b;
+}
+
+/* { dg-error {ISO C does not support plain 'complex' meaning 'double complex'} "" { target *-*-* } 8 } */
+/* { dg-error {expected '=', ',', ';', 'asm' or '__attribute__' before 'stacktest1'} "" { target *-*-* } 8 } */
+
--- /dev/null
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-O3 --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_neon.h>
+
+/*
+**stacktest1:
+** sub sp, sp, #16
+** str h0, \[sp, 14\]
+** ldr h0, \[sp, 14\]
+** add sp, sp, 16
+** ret
+*/
+bfloat16_t stacktest1 (bfloat16_t __a)
+{
+ volatile bfloat16_t b = __a;
+ return b;
+}
+
+/*
+**stacktest2:
+** sub sp, sp, #16
+** str d0, \[sp, 8\]
+** ldr d0, \[sp, 8\]
+** add sp, sp, 16
+** ret
+*/
+bfloat16x4_t stacktest2 (bfloat16x4_t __a)
+{
+ volatile bfloat16x4_t b = __a;
+ return b;
+}
+
+/*
+**stacktest3:
+** sub sp, sp, #16
+** str q0, \[sp\]
+** ldr q0, \[sp\]
+** add sp, sp, 16
+** ret
+*/
+bfloat16x8_t stacktest3 (bfloat16x8_t __a)
+{
+ volatile bfloat16x8_t b = __a;
+ return b;
+}
+
+/* Test compilation of __attribute__ vectors of 8, 16, 32, etc. BFloats. */
+typedef bfloat16_t v8bf __attribute__((vector_size(16)));
+typedef bfloat16_t v16bf __attribute__((vector_size(32)));
+typedef bfloat16_t v32bf __attribute__((vector_size(64)));
+typedef bfloat16_t v64bf __attribute__((vector_size(128)));
+typedef bfloat16_t v128bf __attribute__((vector_size(256)));
+
+v8bf stacktest4 (v8bf __a)
+{
+ volatile v8bf b = __a;
+ return b;
+}
+
+v16bf stacktest5 (v16bf __a)
+{
+ volatile v16bf b = __a;
+ return b;
+}
+
+v32bf stacktest6 (v32bf __a)
+{
+ volatile v32bf b = __a;
+ return b;
+}
+
+v64bf stacktest7 (v64bf __a)
+{
+ volatile v64bf b = __a;
+ return b;
+}
+
+v128bf stacktest8 (v128bf __a)
+{
+ volatile v128bf b = __a;
+ return b;
+}
+
+/* Test use of constant values to assign values to vectors. */
+
+typedef bfloat16_t v2bf __attribute__((vector_size(4)));
+v2bf c2 (void) { return (v2bf) 0x12345678; }
+
+bfloat16x4_t c3 (void) { return (bfloat16x4_t) 0x1234567812345678; }
--- /dev/null
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-additional-options "-march=armv8.2-a -O3 --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_neon.h>
+
+#pragma GCC push_options
+#pragma GCC target ("+bf16")
+
+/*
+**stacktest1:
+** sub sp, sp, #16
+** str h0, \[sp, 14\]
+** ldr h0, \[sp, 14\]
+** add sp, sp, 16
+** ret
+*/
+bfloat16_t stacktest1 (bfloat16_t __a)
+{
+ volatile bfloat16_t b = __a;
+ return b;
+}
+
+/*
+**stacktest2:
+** sub sp, sp, #16
+** str d0, \[sp, 8\]
+** ldr d0, \[sp, 8\]
+** add sp, sp, 16
+** ret
+*/
+bfloat16x4_t stacktest2 (bfloat16x4_t __a)
+{
+ volatile bfloat16x4_t b = __a;
+ return b;
+}
+
+/*
+**stacktest3:
+** sub sp, sp, #16
+** str q0, \[sp\]
+** ldr q0, \[sp\]
+** add sp, sp, 16
+** ret
+*/
+bfloat16x8_t stacktest3 (bfloat16x8_t __a)
+{
+ volatile bfloat16x8_t b = __a;
+ return b;
+}
+
+/* Test compilation of __attribute__ vectors of 8, 16, 32, etc. BFloats. */
+typedef bfloat16_t v8bf __attribute__((vector_size(16)));
+typedef bfloat16_t v16bf __attribute__((vector_size(32)));
+typedef bfloat16_t v32bf __attribute__((vector_size(64)));
+typedef bfloat16_t v64bf __attribute__((vector_size(128)));
+typedef bfloat16_t v128bf __attribute__((vector_size(256)));
+
+v8bf stacktest4 (v8bf __a)
+{
+ volatile v8bf b = __a;
+ return b;
+}
+
+v16bf stacktest5 (v16bf __a)
+{
+ volatile v16bf b = __a;
+ return b;
+}
+
+v32bf stacktest6 (v32bf __a)
+{
+ volatile v32bf b = __a;
+ return b;
+}
+
+v64bf stacktest7 (v64bf __a)
+{
+ volatile v64bf b = __a;
+ return b;
+}
+
+v128bf stacktest8 (v128bf __a)
+{
+ volatile v128bf b = __a;
+ return b;
+}
+
+/* Test use of constant values to assign values to vectors. */
+
+typedef bfloat16_t v2bf __attribute__((vector_size(4)));
+v2bf c2 (void) { return (v2bf) 0x12345678; }
+
+bfloat16x4_t c3 (void) { return (bfloat16x4_t) 0x1234567812345678; }
+
+#pragma GCC pop_options
--- /dev/null
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-additional-options "-march=armv8.2-a -O3 --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_neon.h>
+
+/*
+**stacktest1:
+** sub sp, sp, #16
+** str h0, \[sp, 14\]
+** ldr h0, \[sp, 14\]
+** add sp, sp, 16
+** ret
+*/
+bfloat16_t stacktest1 (bfloat16_t __a)
+{
+ volatile bfloat16_t b = __a;
+ return b;
+}
+
+/*
+**stacktest2:
+** sub sp, sp, #16
+** str d0, \[sp, 8\]
+** ldr d0, \[sp, 8\]
+** add sp, sp, 16
+** ret
+*/
+bfloat16x4_t stacktest2 (bfloat16x4_t __a)
+{
+ volatile bfloat16x4_t b = __a;
+ return b;
+}
+
+/*
+**stacktest3:
+** sub sp, sp, #16
+** str q0, \[sp\]
+** ldr q0, \[sp\]
+** add sp, sp, 16
+** ret
+*/
+bfloat16x8_t stacktest3 (bfloat16x8_t __a)
+{
+ volatile bfloat16x8_t b = __a;
+ return b;
+}
+
+/* Test compilation of __attribute__ vectors of 8, 16, 32, etc. BFloats. */
+typedef bfloat16_t v8bf __attribute__((vector_size(16)));
+typedef bfloat16_t v16bf __attribute__((vector_size(32)));
+typedef bfloat16_t v32bf __attribute__((vector_size(64)));
+typedef bfloat16_t v64bf __attribute__((vector_size(128)));
+typedef bfloat16_t v128bf __attribute__((vector_size(256)));
+
+v8bf stacktest4 (v8bf __a)
+{
+ volatile v8bf b = __a;
+ return b;
+}
+
+v16bf stacktest5 (v16bf __a)
+{
+ volatile v16bf b = __a;
+ return b;
+}
+
+v32bf stacktest6 (v32bf __a)
+{
+ volatile v32bf b = __a;
+ return b;
+}
+
+v64bf stacktest7 (v64bf __a)
+{
+ volatile v64bf b = __a;
+ return b;
+}
+
+v128bf stacktest8 (v128bf __a)
+{
+ volatile v128bf b = __a;
+ return b;
+}
+
+/* Test use of constant values to assign values to vectors. */
+
+typedef bfloat16_t v2bf __attribute__((vector_size(4)));
+v2bf c2 (void) { return (v2bf) 0x12345678; }
+
+bfloat16x4_t c3 (void) { return (bfloat16x4_t) 0x1234567812345678; }