/* Output routines for GCC for ARM.
- Copyright (C) 1991-2018 Free Software Foundation, Inc.
+ Copyright (C) 1991-2019 Free Software Foundation, Inc.
Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
and Martin Simmons (@harleqn.co.uk).
More major hacks by Richard Earnshaw (rearnsha@arm.com).
/* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
int arm_arch8_2 = 0;
+/* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
+int arm_arch8_3 = 0;
+
+/* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
+int arm_arch8_4 = 0;
+
/* Nonzero if this chip supports the FP16 instructions extension of ARM
Architecture 8.2. */
int arm_fp16_inst = 0;
set_conv_libfunc (optable, to, from, buffer);
}
-/* Set up library functions unique to ARM. */
+static GTY(()) rtx speculation_barrier_libfunc;
+/* Set up library functions unique to ARM. */
static void
arm_init_libfuncs (void)
{
if (TARGET_AAPCS_BASED)
synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
+
+ speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
}
/* On AAPCS systems, this is the "struct __va_list". */
error ("iWMMXt unsupported under Thumb mode");
if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
- error ("can not use -mtp=cp15 with 16-bit Thumb");
+ error ("cannot use -mtp=cp15 with 16-bit Thumb");
if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
{
flag_pic = 0;
}
- /* We only support -mpure-code and -mslow-flash-data on M-profile targets
- with MOVT. */
- if ((target_pure_code || target_slow_flash_data)
- && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
+ if (target_pure_code || target_slow_flash_data)
{
const char *flag = (target_pure_code ? "-mpure-code" :
"-mslow-flash-data");
- error ("%s only supports non-pic code on M-profile targets with the "
- "MOVT instruction", flag);
- }
+ /* We only support -mpure-code and -mslow-flash-data on M-profile targets
+ with MOVT. */
+ if (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON)
+ error ("%s only supports non-pic code on M-profile targets with the "
+ "MOVT instruction", flag);
+
+ /* Cannot load addresses: -mslow-flash-data forbids literal pool and
+ -mword-relocations forbids relocation of MOVT/MOVW. */
+ if (target_word_relocations)
+ error ("%s incompatible with -mword-relocations", flag);
+ }
}
/* Recompute the global settings depending on target attribute options. */
arm_pic_register = pic_register;
}
+ if (flag_pic)
+ target_word_relocations = 1;
+
/* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
if (fix_cm3_ldrd == 2)
{
arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
+ arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
+ arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
return 1;
}
-/* Record that the current function needs a PIC register. Initialize
- cfun->machine->pic_reg if we have not already done so. */
+/* Record that the current function needs a PIC register. If PIC_REG is null,
+ a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
+ both case cfun->machine->pic_reg is initialized if we have not already done
+ so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
+ PIC register is reloaded in the current position of the instruction stream
+ irregardless of whether it was loaded before. Otherwise, it is only loaded
+ if not already done so (crtl->uses_pic_offset_table is null). Note that
+ nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
+ is only supported iff COMPUTE_NOW is false. */
static void
-require_pic_register (void)
+require_pic_register (rtx pic_reg, bool compute_now)
{
+ gcc_assert (compute_now == (pic_reg != NULL_RTX));
+
/* A lot of the logic here is made obscure by the fact that this
routine gets called as part of the rtx cost estimation process.
We don't want those calls to affect any assumptions about the real
function; and further, we can't call entry_of_function() until we
start the real expansion process. */
- if (!crtl->uses_pic_offset_table)
+ if (!crtl->uses_pic_offset_table || compute_now)
{
- gcc_assert (can_create_pseudo_p ());
+ gcc_assert (can_create_pseudo_p ()
+ || (pic_reg != NULL_RTX
+ && REG_P (pic_reg)
+ && GET_MODE (pic_reg) == Pmode));
if (arm_pic_register != INVALID_REGNUM
+ && !compute_now
&& !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
{
if (!cfun->machine->pic_reg)
{
rtx_insn *seq, *insn;
+ if (pic_reg == NULL_RTX)
+ pic_reg = gen_reg_rtx (Pmode);
if (!cfun->machine->pic_reg)
- cfun->machine->pic_reg = gen_reg_rtx (Pmode);
+ cfun->machine->pic_reg = pic_reg;
/* Play games to avoid marking the function as needing pic
if we are being called as part of the cost-estimation
start_sequence ();
if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
- && arm_pic_register > LAST_LO_REGNUM)
+ && arm_pic_register > LAST_LO_REGNUM
+ && !compute_now)
emit_move_insn (cfun->machine->pic_reg,
gen_rtx_REG (Pmode, arm_pic_register));
else
- arm_load_pic_register (0UL);
+ arm_load_pic_register (0UL, pic_reg);
seq = get_insns ();
end_sequence ();
we can't yet emit instructions directly in the final
insn stream. Queue the insns on the entry edge, they will
be committed after everything else is expanded. */
- insert_insn_on_edge (seq,
- single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
+ if (currently_expanding_to_rtl)
+ insert_insn_on_edge (seq,
+ single_succ_edge
+ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
+ else
+ emit_insn (seq);
}
}
}
}
+/* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
+ created to hold the result of the load. If not NULL, PIC_REG indicates
+ which register to use as PIC register, otherwise it is decided by register
+ allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
+ location in the instruction stream, irregardless of whether it was loaded
+ previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
+ true and null PIC_REG is only supported iff COMPUTE_NOW is false.
+
+ Returns the register REG into which the PIC load is performed. */
+
rtx
-legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
+legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
+ bool compute_now)
{
+ gcc_assert (compute_now == (pic_reg != NULL_RTX));
+
if (GET_CODE (orig) == SYMBOL_REF
|| GET_CODE (orig) == LABEL_REF)
{
rtx mem;
/* If this function doesn't have a pic register, create one now. */
- require_pic_register ();
+ require_pic_register (pic_reg, compute_now);
+
+ if (pic_reg == NULL_RTX)
+ pic_reg = cfun->machine->pic_reg;
- pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
+ pat = gen_calculate_pic_address (reg, pic_reg, orig);
/* Make the MEM as close to a constant as possible. */
mem = SET_SRC (pat);
gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
- base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
+ base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
+ pic_reg, compute_now);
offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
- base == reg ? 0 : reg);
+ base == reg ? 0 : reg, pic_reg,
+ compute_now);
if (CONST_INT_P (offset))
{
low register. */
void
-arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
+arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
{
- rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
+ rtx l1, labelno, pic_tmp, pic_rtx;
if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
return;
gcc_assert (flag_pic);
- pic_reg = cfun->machine->pic_reg;
+ if (pic_reg == NULL_RTX)
+ pic_reg = cfun->machine->pic_reg;
if (TARGET_VXWORKS_RTP)
{
pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
{
/* We need to find and carefully transform any SYMBOL and LABEL
references; so go back to the original address expression. */
- rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
+ rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
+ false /*compute_now*/);
if (new_x != orig_x)
x = new_x;
{
/* We need to find and carefully transform any SYMBOL and LABEL
references; so go back to the original address expression. */
- rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
+ rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
+ false /*compute_now*/);
if (new_x != orig_x)
x = new_x;
/* Generate code to load VALS, which is a PARALLEL containing only
constants (for vec_init) or CONST_VECTOR, efficiently into a
register. Returns an RTX to copy into the register, or NULL_RTX
- for a PARALLEL that can not be converted into a CONST_VECTOR. */
+ for a PARALLEL that cannot be converted into a CONST_VECTOR. */
rtx
neon_make_constant (rtx vals)
return target;
else if (const_vec != NULL_RTX)
/* Load from constant pool. On Cortex-A8 this takes two cycles
- (for either double or quad vectors). We can not take advantage
+ (for either double or quad vectors). We cannot take advantage
of single-cycle VLD1 because we need a PC-relative addressing
mode. */
return const_vec;
else
/* A PARALLEL containing something not valid inside CONST_VECTOR.
- We can not construct an initializer. */
+ We cannot construct an initializer. */
return NULL_RTX;
}
return FALSE;
}
+/* Prepares the operands for the VCMLA by lane instruction such that the right
+ register number is selected. This instruction is special in that it always
+ requires a D register, however there is a choice to be made between Dn[0],
+ Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers and
+ the PATTERNMODE of the insn.
+
+ The VCMLA by lane function always selects two values. For instance given D0
+ and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
+ used by the instruction. However given V4SF then index 0 and 1 are valid as
+ D0[0] or D1[0] are both valid.
+
+ This function centralizes that information based on OPERANDS, OPERANDS[3]
+ will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
+ updated to contain the right index. */
+
+rtx *
+neon_vcmla_lane_prepare_operands (machine_mode patternmode, rtx *operands)
+{
+ int lane = NEON_ENDIAN_LANE_N (patternmode, INTVAL (operands[4]));
+ machine_mode constmode = SImode;
+ machine_mode mode = GET_MODE (operands[3]);
+ int regno = REGNO (operands[3]);
+ regno = ((regno - FIRST_VFP_REGNUM) >> 1);
+ if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
+ {
+ operands[3] = gen_int_mode (regno + 1, constmode);
+ operands[4]
+ = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
+ }
+ else
+ {
+ operands[3] = gen_int_mode (regno, constmode);
+ operands[4] = gen_int_mode (lane, constmode);
+ }
+ return operands;
+}
+
+
/* Return true if X is a register that will be eliminated later on. */
int
arm_eliminable_register (rtx x)
Mnode * mp;
/* If the minipool starts before the end of FIX->INSN then this FIX
- can not be placed into the current pool. Furthermore, adding the
+ cannot be placed into the current pool. Furthermore, adding the
new constant pool entry may cause the pool to start FIX_SIZE bytes
earlier. */
if (minipool_vector_head &&
if (use_cmse)
cmse_nonsecure_call_clear_caller_saved ();
- if (TARGET_THUMB1)
+
+ /* We cannot run the Thumb passes for thunks because there is no CFG. */
+ if (cfun->is_thunk)
+ ;
+ else if (TARGET_THUMB1)
thumb1_reorg ();
else if (TARGET_THUMB2)
thumb2_reorg ();
? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
: !SYMBOL_REF_LOCAL_P (addr)))
{
- require_pic_register ();
+ require_pic_register (NULL_RTX, false /*compute_now*/);
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
}
"msr%s\tAPSR_nzcvq, %%|lr", conditional);
output_asm_insn (instr, & operand);
- if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
+ if (TARGET_HARD_FLOAT)
{
/* Clear the cumulative exception-status bits (0-4,7) and the
condition code bits (28-31) of the FPSCR. We need to
mask &= THUMB2_WORK_REGS;
if (!IS_NESTED (func_type))
mask |= (1 << IP_REGNUM);
- arm_load_pic_register (mask);
+ arm_load_pic_register (mask, NULL_RTX);
}
/* If we are profiling, make sure no instructions are scheduled before
/* Load the pic register before setting the frame pointer,
so we can use r7 as a temporary work register. */
if (flag_pic && arm_pic_register != INVALID_REGNUM)
- arm_load_pic_register (live_regs_mask);
+ arm_load_pic_register (live_regs_mask, NULL_RTX);
if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
HOST_WIDE_INT vcall_offset, tree function)
{
+ const bool long_call_p = arm_is_long_call_p (function);
+
/* On ARM, this_regno is R0 or R1 depending on
whether the function returns an aggregate or not.
*/
TREE_USED (function) = 1;
}
rtx funexp = XEXP (DECL_RTL (function), 0);
+ if (long_call_p)
+ {
+ emit_move_insn (temp, funexp);
+ funexp = temp;
+ }
funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
- rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
+ rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
SIBLING_CALL_P (insn) = 1;
+ emit_barrier ();
+
+ /* Indirect calls require a bit of fixup in PIC mode. */
+ if (long_call_p)
+ {
+ split_all_insns_noflow ();
+ arm_reorg ();
+ }
insn = get_insns ();
shorten_branches (insn);
arm_expand_compare_and_swap (rtx operands[])
{
rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
- machine_mode mode;
- rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
+ machine_mode mode, cmp_mode;
bval = operands[0];
rval = operands[1];
}
if (TARGET_THUMB1)
- {
- switch (mode)
- {
- case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
- case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
- case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
- case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
- default:
- gcc_unreachable ();
- }
- }
+ cmp_mode = E_SImode;
else
- {
- switch (mode)
- {
- case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
- case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
- case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
- case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
- default:
- gcc_unreachable ();
- }
- }
+ cmp_mode = CC_Zmode;
bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
- emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
+ emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
+ oldval, newval, is_weak, mod_s, mod_f));
if (mode == QImode || mode == HImode)
emit_move_insn (operands[1], gen_lowpart (mode, rval));
void
arm_split_compare_and_swap (rtx operands[])
{
- rtx rval, mem, oldval, newval, neg_bval;
+ rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
machine_mode mode;
enum memmodel mod_s, mod_f;
bool is_weak;
oldval = operands[3];
newval = operands[4];
is_weak = (operands[5] != const0_rtx);
- mod_s = memmodel_from_int (INTVAL (operands[6]));
+ mod_s_rtx = operands[6];
+ mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
mod_f = memmodel_from_int (INTVAL (operands[7]));
neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
mode = GET_MODE (mem);
bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
- bool use_acquire = TARGET_HAVE_LDACQ
- && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
- || is_mm_release (mod_s));
-
- bool use_release = TARGET_HAVE_LDACQ
- && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
- || is_mm_acquire (mod_s));
+ bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
+ bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
/* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
a full barrier is emitted after the store-release. */
bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
- bool use_acquire = TARGET_HAVE_LDACQ
- && !(is_mm_relaxed (model) || is_mm_consume (model)
- || is_mm_release (model));
-
- bool use_release = TARGET_HAVE_LDACQ
- && !(is_mm_relaxed (model) || is_mm_consume (model)
- || is_mm_acquire (model));
+ bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
+ bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
/* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
a full barrier is emitted after the store-release. */
{
unsigned int i, odd, mask, nelt = d->perm.length ();
rtx out0, out1, in0, in1;
- rtx (*gen)(rtx, rtx, rtx, rtx);
int first_elem;
int swap_nelt;
if (d->testing_p)
return true;
- switch (d->vmode)
- {
- case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
- case E_V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
- case E_V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
- case E_V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
- case E_V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
- case E_V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
- case E_V4SImode: gen = gen_neon_vuzpv4si_internal; break;
- case E_V2SImode: gen = gen_neon_vuzpv2si_internal; break;
- case E_V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
- case E_V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
- default:
- gcc_unreachable ();
- }
-
in0 = d->op0;
in1 = d->op1;
if (swap_nelt != 0)
if (odd)
std::swap (out0, out1);
- emit_insn (gen (out0, in0, in1, out1));
+ emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
return true;
}
{
unsigned int i, high, mask, nelt = d->perm.length ();
rtx out0, out1, in0, in1;
- rtx (*gen)(rtx, rtx, rtx, rtx);
int first_elem;
bool is_swapped;
if (d->testing_p)
return true;
- switch (d->vmode)
- {
- case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
- case E_V8QImode: gen = gen_neon_vzipv8qi_internal; break;
- case E_V8HImode: gen = gen_neon_vzipv8hi_internal; break;
- case E_V4HImode: gen = gen_neon_vzipv4hi_internal; break;
- case E_V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
- case E_V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
- case E_V4SImode: gen = gen_neon_vzipv4si_internal; break;
- case E_V2SImode: gen = gen_neon_vzipv2si_internal; break;
- case E_V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
- case E_V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
- default:
- gcc_unreachable ();
- }
-
in0 = d->op0;
in1 = d->op1;
if (is_swapped)
if (high)
std::swap (out0, out1);
- emit_insn (gen (out0, in0, in1, out1));
+ emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
return true;
}
/* Recognize patterns for the VREV insns. */
-
static bool
arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
{
unsigned int i, j, diff, nelt = d->perm.length ();
- rtx (*gen)(rtx, rtx);
+ rtx (*gen) (machine_mode, rtx, rtx);
if (!d->one_vector_p)
return false;
switch (diff)
{
case 7:
- switch (d->vmode)
- {
- case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
- case E_V8QImode: gen = gen_neon_vrev64v8qi; break;
- default:
- return false;
- }
- break;
+ switch (d->vmode)
+ {
+ case E_V16QImode:
+ case E_V8QImode:
+ gen = gen_neon_vrev64;
+ break;
+ default:
+ return false;
+ }
+ break;
case 3:
- switch (d->vmode)
- {
- case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
- case E_V8QImode: gen = gen_neon_vrev32v8qi; break;
- case E_V8HImode: gen = gen_neon_vrev64v8hi; break;
- case E_V4HImode: gen = gen_neon_vrev64v4hi; break;
- case E_V8HFmode: gen = gen_neon_vrev64v8hf; break;
- case E_V4HFmode: gen = gen_neon_vrev64v4hf; break;
+ switch (d->vmode)
+ {
+ case E_V16QImode:
+ case E_V8QImode:
+ gen = gen_neon_vrev32;
+ break;
+ case E_V8HImode:
+ case E_V4HImode:
+ case E_V8HFmode:
+ case E_V4HFmode:
+ gen = gen_neon_vrev64;
+ break;
default:
return false;
}
case 1:
switch (d->vmode)
{
- case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
- case E_V8QImode: gen = gen_neon_vrev16v8qi; break;
- case E_V8HImode: gen = gen_neon_vrev32v8hi; break;
- case E_V4HImode: gen = gen_neon_vrev32v4hi; break;
- case E_V4SImode: gen = gen_neon_vrev64v4si; break;
- case E_V2SImode: gen = gen_neon_vrev64v2si; break;
- case E_V4SFmode: gen = gen_neon_vrev64v4sf; break;
- case E_V2SFmode: gen = gen_neon_vrev64v2sf; break;
- default:
+ case E_V16QImode:
+ case E_V8QImode:
+ gen = gen_neon_vrev16;
+ break;
+ case E_V8HImode:
+ case E_V4HImode:
+ gen = gen_neon_vrev32;
+ break;
+ case E_V4SImode:
+ case E_V2SImode:
+ case E_V4SFmode:
+ case E_V2SFmode:
+ gen = gen_neon_vrev64;
+ break;
+ default:
return false;
}
break;
if (d->testing_p)
return true;
- emit_insn (gen (d->target, d->op0));
+ emit_insn (gen (d->vmode, d->target, d->op0));
return true;
}
{
unsigned int i, odd, mask, nelt = d->perm.length ();
rtx out0, out1, in0, in1;
- rtx (*gen)(rtx, rtx, rtx, rtx);
if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
return false;
if (d->testing_p)
return true;
- switch (d->vmode)
- {
- case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
- case E_V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
- case E_V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
- case E_V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
- case E_V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
- case E_V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
- case E_V4SImode: gen = gen_neon_vtrnv4si_internal; break;
- case E_V2SImode: gen = gen_neon_vtrnv2si_internal; break;
- case E_V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
- case E_V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
- default:
- gcc_unreachable ();
- }
-
in0 = d->op0;
in1 = d->op1;
if (BYTES_BIG_ENDIAN)
if (odd)
std::swap (out0, out1);
- emit_insn (gen (out0, in0, in1, out1));
+ emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
return true;
}
arm_evpc_neon_vext (struct expand_vec_perm_d *d)
{
unsigned int i, nelt = d->perm.length ();
- rtx (*gen) (rtx, rtx, rtx, rtx);
rtx offset;
unsigned int location;
location = d->perm[0];
- switch (d->vmode)
- {
- case E_V16QImode: gen = gen_neon_vextv16qi; break;
- case E_V8QImode: gen = gen_neon_vextv8qi; break;
- case E_V4HImode: gen = gen_neon_vextv4hi; break;
- case E_V8HImode: gen = gen_neon_vextv8hi; break;
- case E_V2SImode: gen = gen_neon_vextv2si; break;
- case E_V4SImode: gen = gen_neon_vextv4si; break;
- case E_V4HFmode: gen = gen_neon_vextv4hf; break;
- case E_V8HFmode: gen = gen_neon_vextv8hf; break;
- case E_V2SFmode: gen = gen_neon_vextv2sf; break;
- case E_V4SFmode: gen = gen_neon_vextv4sf; break;
- case E_V2DImode: gen = gen_neon_vextv2di; break;
- default:
- return false;
- }
-
/* Success! */
if (d->testing_p)
return true;
offset = GEN_INT (location);
- emit_insn (gen (d->target, d->op0, d->op1, offset));
+
+ if(d->vmode == E_DImode)
+ return false;
+
+ emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
return true;
}
return;
if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
- || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
+ || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
return;
/* Nested definitions must inherit mode. */
{
if (TARGET_BIG_END
&& !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
- && (GET_MODE_UNIT_SIZE (from) > UNITS_PER_WORD
- || GET_MODE_UNIT_SIZE (to) > UNITS_PER_WORD)
+ && (GET_MODE_SIZE (from) > UNITS_PER_WORD
+ || GET_MODE_SIZE (to) > UNITS_PER_WORD)
&& reg_classes_intersect_p (VFP_REGS, rclass))
return false;
return true;
return align;
}
+/* Emit a speculation barrier on target architectures that do not have
+ DSB/ISB directly. Such systems probably don't need a barrier
+ themselves, but if the code is ever run on a later architecture, it
+ might become a problem. */
+void
+arm_emit_speculation_barrier_function ()
+{
+ emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
+}
+
#if CHECKING_P
namespace selftest {