/* FP16 instructions. */
static const arm_feature_set arm_ext_fp16 =
ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST);
+static const arm_feature_set arm_ext_fp16_fml =
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_FML);
+static const arm_feature_set arm_ext_v8_2 =
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_2A);
static const arm_feature_set arm_ext_v8_3 =
ARM_FEATURE_CORE_HIGH (ARM_EXT2_V8_3A);
};
/* ARM register categories. This includes coprocessor numbers and various
- architecture extensions' registers. */
+ architecture extensions' registers. Each entry should have an error message
+ in reg_expected_msgs below. */
enum arm_reg_type
{
REG_TYPE_RN,
REG_TYPE_NQ,
REG_TYPE_VFSD,
REG_TYPE_NDQ,
+ REG_TYPE_NSD,
REG_TYPE_NSDQ,
REG_TYPE_VFC,
REG_TYPE_MVF,
/* Diagnostics used when we don't get a register of the expected type. */
const char * const reg_expected_msgs[] =
{
- N_("ARM register expected"),
- N_("bad or missing co-processor number"),
- N_("co-processor register expected"),
- N_("FPA register expected"),
- N_("VFP single precision register expected"),
- N_("VFP/Neon double precision register expected"),
- N_("Neon quad precision register expected"),
- N_("VFP single or double precision register expected"),
- N_("Neon double or quad precision register expected"),
- N_("VFP single, double or Neon quad precision register expected"),
- N_("VFP system register expected"),
- N_("Maverick MVF register expected"),
- N_("Maverick MVD register expected"),
- N_("Maverick MVFX register expected"),
- N_("Maverick MVDX register expected"),
- N_("Maverick MVAX register expected"),
- N_("Maverick DSPSC register expected"),
- N_("iWMMXt data register expected"),
- N_("iWMMXt control register expected"),
- N_("iWMMXt scalar register expected"),
- N_("XScale accumulator register expected"),
+ [REG_TYPE_RN] = N_("ARM register expected"),
+ [REG_TYPE_CP] = N_("bad or missing co-processor number"),
+ [REG_TYPE_CN] = N_("co-processor register expected"),
+ [REG_TYPE_FN] = N_("FPA register expected"),
+ [REG_TYPE_VFS] = N_("VFP single precision register expected"),
+ [REG_TYPE_VFD] = N_("VFP/Neon double precision register expected"),
+ [REG_TYPE_NQ] = N_("Neon quad precision register expected"),
+ [REG_TYPE_VFSD] = N_("VFP single or double precision register expected"),
+ [REG_TYPE_NDQ] = N_("Neon double or quad precision register expected"),
+ [REG_TYPE_NSD] = N_("Neon single or double precision register expected"),
+ [REG_TYPE_NSDQ] = N_("VFP single, double or Neon quad precision register"
+ " expected"),
+ [REG_TYPE_VFC] = N_("VFP system register expected"),
+ [REG_TYPE_MVF] = N_("Maverick MVF register expected"),
+ [REG_TYPE_MVD] = N_("Maverick MVD register expected"),
+ [REG_TYPE_MVFX] = N_("Maverick MVFX register expected"),
+ [REG_TYPE_MVDX] = N_("Maverick MVDX register expected"),
+ [REG_TYPE_MVAX] = N_("Maverick MVAX register expected"),
+ [REG_TYPE_DSPSC] = N_("Maverick DSPSC register expected"),
+ [REG_TYPE_MMXWR] = N_("iWMMXt data register expected"),
+ [REG_TYPE_MMXWC] = N_("iWMMXt control register expected"),
+ [REG_TYPE_MMXWCG] = N_("iWMMXt scalar register expected"),
+ [REG_TYPE_XSCALE] = N_("XScale accumulator register expected"),
+ [REG_TYPE_RNB] = N_("")
};
/* Some well known registers that we refer to directly elsewhere. */
|| (type == REG_TYPE_NSDQ
&& (reg->type == REG_TYPE_VFS || reg->type == REG_TYPE_VFD
|| reg->type == REG_TYPE_NQ))
+ || (type == REG_TYPE_NSD
+ && (reg->type == REG_TYPE_VFS || reg->type == REG_TYPE_VFD))
|| (type == REG_TYPE_MMXWC
&& (reg->type == REG_TYPE_MMXWCG)))
type = (enum arm_reg_type) reg->type;
if (skip_past_char (&str, '[') == SUCCESS)
{
- if (type != REG_TYPE_VFD)
+ if (type != REG_TYPE_VFD
+ && !(type == REG_TYPE_VFS
+ && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8_2)))
{
first_error (_("only D registers may be indexed"));
return FAIL;
int reg;
char *str = *ccp;
struct neon_typed_alias atype;
+ enum arm_reg_type reg_type = REG_TYPE_VFD;
+
+ if (elsize == 4)
+ reg_type = REG_TYPE_VFS;
- reg = parse_typed_reg_or_scalar (&str, REG_TYPE_VFD, NULL, &atype);
+ reg = parse_typed_reg_or_scalar (&str, reg_type, NULL, &atype);
if (reg == FAIL || (atype.defined & NTA_HASINDEX) == 0)
return FAIL;
OP_RND, /* Neon double precision register (0..31) */
OP_RNQ, /* Neon quad precision register */
OP_RVSD, /* VFP single or double precision register */
+ OP_RNSD, /* Neon single or double precision register */
OP_RNDQ, /* Neon double or quad precision register */
OP_RNSDQ, /* Neon single, double or quad precision register */
OP_RNSC, /* Neon scalar D[X] */
OP_RVSD_I0, /* VFP S or D reg, or immediate zero. */
OP_RSVD_FI0, /* VFP S or D reg, or floating point immediate zero. */
OP_RR_RNSC, /* ARM reg or Neon scalar. */
+ OP_RNSD_RNSC, /* Neon S or D reg, or Neon scalar. */
OP_RNSDQ_RNSC, /* Vector S, D or Q reg, or Neon scalar. */
OP_RNDQ_RNSC, /* Neon D or Q reg, or Neon scalar. */
OP_RND_RNSC, /* Neon D reg, or Neon scalar. */
case OP_RXA: po_reg_or_fail (REG_TYPE_XSCALE); break;
case OP_oRNQ:
case OP_RNQ: po_reg_or_fail (REG_TYPE_NQ); break;
+ case OP_RNSD: po_reg_or_fail (REG_TYPE_NSD); break;
case OP_oRNDQ:
case OP_RNDQ: po_reg_or_fail (REG_TYPE_NDQ); break;
case OP_RVSD: po_reg_or_fail (REG_TYPE_VFSD); break;
}
break;
+ case OP_RNSD_RNSC:
+ {
+ po_scalar_or_goto (8, try_s_scalar);
+ break;
+ try_s_scalar:
+ po_scalar_or_goto (4, try_nsd);
+ break;
+ try_nsd:
+ po_reg_or_fail (REG_TYPE_NSD);
+ }
+ break;
+
case OP_RNDQ_RNSC:
{
po_scalar_or_goto (8, try_ndq);
{
int is_push = (inst.instruction & A_PUSH_POP_OP_MASK) == A1_OPCODE_PUSH;
+ if (is_push && one_reg == 13 /* SP */)
+ /* PR 22483: The A2 encoding cannot be used when
+ pushing the stack pointer as this is UNPREDICTABLE. */
+ return;
+
inst.instruction &= A_COND_MASK;
inst.instruction |= is_push ? A2_OPCODE_PUSH : A2_OPCODE_POP;
inst.instruction |= one_reg << 12;
if (inst.operands[1].isreg)
{
br = inst.operands[1].reg;
- if (((br & 0x200) == 0) && ((br & 0xf0000) != 0xf000))
+ if (((br & 0x200) == 0) && ((br & 0xf0000) != 0xf0000))
as_bad (_("bad register for mrs"));
}
else
X (2, (H, I), HALF), \
X (3, (H, H, H), HALF), \
X (3, (H, F, I), MIXED), \
- X (3, (F, H, I), MIXED)
+ X (3, (F, H, I), MIXED), \
+ X (3, (D, H, H), MIXED), \
+ X (3, (D, H, S), MIXED)
#define S2(A,B) NS_##A##B
#define S3(A,B,C) NS_##A##B##C
neon_mac_reg_scalar_long (N_S16 | N_S32 | N_U16 | N_U32, N_SU_32);
}
+/* Like neon_scalar_for_mul, this function generate Rm encoding from GAS's
+ internal SCALAR. QUAD_P is 1 if it's for Q format, otherwise it's 0. */
+
+static unsigned
+neon_scalar_for_fmac_fp16_long (unsigned scalar, unsigned quad_p)
+{
+ unsigned regno = NEON_SCALAR_REG (scalar);
+ unsigned elno = NEON_SCALAR_INDEX (scalar);
+
+ if (quad_p)
+ {
+ if (regno > 7 || elno > 3)
+ goto bad_scalar;
+
+ return ((regno & 0x7)
+ | ((elno & 0x1) << 3)
+ | (((elno >> 1) & 0x1) << 5));
+ }
+ else
+ {
+ if (regno > 15 || elno > 1)
+ goto bad_scalar;
+
+ return (((regno & 0x1) << 5)
+ | ((regno >> 1) & 0x7)
+ | ((elno & 0x1) << 3));
+ }
+
+bad_scalar:
+ first_error (_("scalar out of range for multiply instruction"));
+ return 0;
+}
+
+static void
+do_neon_fmac_maybe_scalar_long (int subtype)
+{
+ enum neon_shape rs;
+ int high8;
+ /* NOTE: vfmal/vfmsl use slightly different NEON three-same encoding. 'size"
+ field (bits[21:20]) has different meaning. For scalar index variant, it's
+ used to differentiate add and subtract, otherwise it's with fixed value
+ 0x2. */
+ int size = -1;
+
+ if (inst.cond != COND_ALWAYS)
+ as_warn (_("vfmal/vfmsl with FP16 type cannot be conditional, the "
+ "behaviour is UNPREDICTABLE"));
+
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_fp16_fml),
+ _(BAD_FP16));
+
+ constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_neon_ext_armv8),
+ _(BAD_FPU));
+
+ /* vfmal/vfmsl are in three-same D/Q register format or the third operand can
+ be a scalar index register. */
+ if (inst.operands[2].isscalar)
+ {
+ high8 = 0xfe000000;
+ if (subtype)
+ size = 16;
+ rs = neon_select_shape (NS_DHS, NS_QDS, NS_NULL);
+ }
+ else
+ {
+ high8 = 0xfc000000;
+ size = 32;
+ if (subtype)
+ inst.instruction |= (0x1 << 23);
+ rs = neon_select_shape (NS_DHH, NS_QDD, NS_NULL);
+ }
+
+ neon_check_type (3, rs, N_EQK, N_EQK, N_KEY | N_F16);
+
+ /* "opcode" from template has included "ubit", so simply pass 0 here. Also,
+ the "S" bit in size field has been reused to differentiate vfmal and vfmsl,
+ so we simply pass -1 as size. */
+ unsigned quad_p = (rs == NS_QDD || rs == NS_QDS);
+ neon_three_same (quad_p, 0, size);
+
+ /* Undo neon_dp_fixup. Redo the high eight bits. */
+ inst.instruction &= 0x00ffffff;
+ inst.instruction |= high8;
+
+#define LOW1(R) ((R) & 0x1)
+#define HI4(R) (((R) >> 1) & 0xf)
+ /* Unlike usually NEON three-same, encoding for Vn and Vm will depend on
+ whether the instruction is in Q form and whether Vm is a scalar indexed
+ operand. */
+ if (inst.operands[2].isscalar)
+ {
+ unsigned rm
+ = neon_scalar_for_fmac_fp16_long (inst.operands[2].reg, quad_p);
+ inst.instruction &= 0xffffffd0;
+ inst.instruction |= rm;
+
+ if (!quad_p)
+ {
+ /* Redo Rn as well. */
+ inst.instruction &= 0xfff0ff7f;
+ inst.instruction |= HI4 (inst.operands[1].reg) << 16;
+ inst.instruction |= LOW1 (inst.operands[1].reg) << 7;
+ }
+ }
+ else if (!quad_p)
+ {
+ /* Redo Rn and Rm. */
+ inst.instruction &= 0xfff0ff50;
+ inst.instruction |= HI4 (inst.operands[1].reg) << 16;
+ inst.instruction |= LOW1 (inst.operands[1].reg) << 7;
+ inst.instruction |= HI4 (inst.operands[2].reg);
+ inst.instruction |= LOW1 (inst.operands[2].reg) << 5;
+ }
+}
+
+static void
+do_neon_vfmal (void)
+{
+ return do_neon_fmac_maybe_scalar_long (0);
+}
+
+static void
+do_neon_vfmsl (void)
+{
+ return do_neon_fmac_maybe_scalar_long (1);
+}
+
static void
do_neon_dyadic_wide (void)
{
NCE (vmovx, eb00a40, 2, (RVS, RVS), neon_movhf),
NCE (vins, eb00ac0, 2, (RVS, RVS), neon_movhf),
+ /* New backported fma/fms instructions optional in v8.2. */
+ NCE (vfmal, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmal),
+ NCE (vfmsl, 810, 3, (RNDQ, RNSD, RNSD_RNSC), neon_vfmsl),
+
#undef THUMB_VARIANT
#define THUMB_VARIANT & fpu_neon_ext_v1
#undef ARM_VARIANT
ARM_ARCH_OPT ("armv8.2-a", ARM_ARCH_V8_2A, FPU_ARCH_VFP),
ARM_ARCH_OPT ("armv8.3-a", ARM_ARCH_V8_3A, FPU_ARCH_VFP),
ARM_ARCH_OPT ("armv8-r", ARM_ARCH_V8R, FPU_ARCH_VFP),
+ ARM_ARCH_OPT ("armv8.4-a", ARM_ARCH_V8_4A, FPU_ARCH_VFP),
ARM_ARCH_OPT ("xscale", ARM_ARCH_XSCALE, FPU_ARCH_VFP),
ARM_ARCH_OPT ("iwmmxt", ARM_ARCH_IWMMXT, FPU_ARCH_VFP),
ARM_ARCH_OPT ("iwmmxt2", ARM_ARCH_IWMMXT2,FPU_ARCH_VFP),
ARM_EXT_OPT ("fp16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST),
ARM_ARCH_V8_2A),
+ ARM_EXT_OPT ("fp16fml", ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST
+ | ARM_EXT2_FP16_FML),
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_FP16_INST
+ | ARM_EXT2_FP16_FML),
+ ARM_ARCH_V8_2A),
ARM_EXT_OPT2 ("idiv", ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV | ARM_EXT_DIV),
ARM_FEATURE_CORE_LOW (ARM_EXT_ADIV | ARM_EXT_DIV),
ARM_FEATURE_CORE_LOW (ARM_EXT_V7A),
{16, ARM_ARCH_V8M_BASE},
{17, ARM_ARCH_V8M_MAIN},
{15, ARM_ARCH_V8R},
+ {16, ARM_ARCH_V8_4A},
{-1, ARM_ARCH_NONE}
};