+2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
+
+ * config/tc-arm.c (arm_ext_i8mm): New feature set.
+ (do_vusdot): New.
+ (do_vsudot): New.
+ (do_vsmmla): New.
+ (do_vummla): New.
+ (insns): Add vsmmla, vummla, vusmmla, vusdot, vsudot mnemonics.
+ (armv86a_ext_table): Add i8mm extension.
+ (arm_extensions): Move bf16 extension to context sensitive table.
+ (armv82a_ext_table, armv84a_ext_table, armv85a_ext_table):
+ Move bf16 extension to context sensitive table.
+ (armv86a_ext_table): Add i8mm extension.
+ * doc/c-arm.texi: Document i8mm extension.
+ * testsuite/gas/arm/i8mm.s: New test.
+ * testsuite/gas/arm/i8mm.d: New test.
+ * testsuite/gas/arm/bfloat17-cmdline-bad-3.d: Update test.
+
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
* config/tc-aarch64.c: Add new arch fetures to suppport the mm extension.
ARM_FEATURE_CORE_HIGH (ARM_EXT2_PREDRES);
static const arm_feature_set arm_ext_bf16 =
ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16);
+static const arm_feature_set arm_ext_i8mm =
+ ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM);
static const arm_feature_set arm_arch_any = ARM_ANY;
#ifdef OBJ_ELF
return do_neon_dotproduct (1);
}
+static void
+do_vusdot (void)
+{
+ enum neon_shape rs;
+ set_pred_insn_type (OUTSIDE_PRED_INSN);
+ if (inst.operands[2].isscalar)
+ {
+ rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY);
+
+ inst.instruction |= (1 << 25);
+ int index = inst.operands[2].reg & 0xf;
+ constraint ((index != 1 && index != 0), _("index must be 0 or 1"));
+ inst.operands[2].reg >>= 4;
+ constraint (!(inst.operands[2].reg < 16),
+ _("indexed register must be less than 16"));
+ neon_three_args (rs == NS_QQS);
+ inst.instruction |= (index << 5);
+ }
+ else
+ {
+ inst.instruction |= (1 << 21);
+ rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY);
+ neon_three_args (rs == NS_QQQ);
+ }
+}
+
+static void
+do_vsudot (void)
+{
+ enum neon_shape rs;
+ set_pred_insn_type (OUTSIDE_PRED_INSN);
+ if (inst.operands[2].isscalar)
+ {
+ rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_U8 | N_KEY);
+
+ inst.instruction |= (1 << 25);
+ int index = inst.operands[2].reg & 0xf;
+ constraint ((index != 1 && index != 0), _("index must be 0 or 1"));
+ inst.operands[2].reg >>= 4;
+ constraint (!(inst.operands[2].reg < 16),
+ _("indexed register must be less than 16"));
+ neon_three_args (rs == NS_QQS);
+ inst.instruction |= (index << 5);
+ }
+}
+
+static void
+do_vsmmla (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY);
+
+ set_pred_insn_type (OUTSIDE_PRED_INSN);
+
+ neon_three_args (1);
+
+}
+
+static void
+do_vummla (void)
+{
+ enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_U8 | N_KEY);
+
+ set_pred_insn_type (OUTSIDE_PRED_INSN);
+
+ neon_three_args (1);
+
+}
+
/* Crypto v1 instructions. */
static void
do_crypto_2op_1 (unsigned elttype, int op)
#define THUMB_VARIANT &arm_ext_i8mm
TUF ("vsmmla", c200c40, fc200c40, 3, (RNQ, RNQ, RNQ), vsmmla, vsmmla),
TUF ("vummla", c200c50, fc200c50, 3, (RNQ, RNQ, RNQ), vummla, vummla),
- TUF ("vusmmla", ca00c40, fca00c40, 3, (RNQ, RNQ, RNQ), vummla, vummla),
+ TUF ("vusmmla", ca00c40, fca00c40, 3, (RNQ, RNQ, RNQ), vsmmla, vsmmla),
TUF ("vusdot", c800d00, fc800d00, 3, (RNDQ, RNDQ, RNDQ_RNSC), vusdot, vusdot),
TUF ("vsudot", c800d10, fc800d10, 3, (RNDQ, RNDQ, RNSC), vsudot, vsudot),
};
ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8_1),
ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_2_FP16),
ARM_ADD ("fp16fml", FPU_ARCH_NEON_VFP_ARMV8_2_FP16FML),
+ ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)),
+ ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_1,
ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
ARM_ADD ("dotprod", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
{
ARM_ADD ("simd", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_4_FP16FML),
+ ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)),
+ ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_4,
ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
{
ARM_ADD ("simd", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_4_FP16FML),
+ ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)),
+ ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_4,
ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
static const struct arm_ext_table armv86a_ext_table[] =
{
+ ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
{ NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
};
use the context sensitive approach using arm_ext_table's. */
static const struct arm_option_extension_value_table arm_extensions[] =
{
- ARM_EXT_OPT ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
- ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
- ARM_ARCH_V8_2A),
ARM_EXT_OPT ("crc", ARCH_CRC_ARMV8, ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
ARM_EXT_OPT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
The following extensions are currently supported:
@code{bf16} (BFloat16 extensions for v8.6-A architecture),
+@code{i8mm} (Int8 Matrix Multiply extensions for v8.6-A architecture),
@code{crc}
@code{crypto} (Cryptography Extensions for v8-A architecture, implies @code{fp+simd}),
@code{dotprod} (Dot Product Extensions for v8.2-A architecture, implies @code{fp+simd}),
#name: Bfloat 16 bad extension
#source: bfloat16-non-neon.s
#as: -mno-warn-deprecated -march=armv8.1-a+bf16
-#error: .*Error: extension does not apply to the base architecture.*
+#error: .*Error: unknown architectural extension `bf16'*
--- /dev/null
+#name: Int8 Matrix Multiply extension
+#source: i8mm.s
+#as: -mno-warn-deprecated -march=armv8.6-a+i8mm+simd -I$srcdir/$subdir
+#objdump: -dr --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section \.text:
+
+00000000 <\.text>:
+ *[0-9a-f]+: fcea4c40 vusmmla\.s8 q10, q5, q0
+ *[0-9a-f]+: fc6a4c50 vummla\.u8 q10, q5, q0
+ *[0-9a-f]+: fc6a4c40 vsmmla\.s8 q10, q5, q0
+ *[0-9a-f]+: fcea4d40 vusdot\.s8 q10, q5, q0
+ *[0-9a-f]+: feca4d50 vsudot\.u8 q10, q5, d0\[0\]
+ *[0-9a-f]+: feca4d70 vsudot\.u8 q10, q5, d0\[1\]
+ *[0-9a-f]+: feca4d40 vusdot\.s8 q10, q5, d0\[0\]
+ *[0-9a-f]+: feca4d60 vusdot\.s8 q10, q5, d0\[1\]
+ *[0-9a-f]+: fca5ad00 vusdot\.s8 d10, d5, d0
+ *[0-9a-f]+: fe85ad00 vusdot\.s8 d10, d5, d0\[0\]
+ *[0-9a-f]+: fe85ad20 vusdot\.s8 d10, d5, d0\[1\]
+ *[0-9a-f]+: fe85ad10 vsudot\.u8 d10, d5, d0\[0\]
+ *[0-9a-f]+: fe85ad30 vsudot\.u8 d10, d5, d0\[1\]
+ *[0-9a-f]+: fcea4c40 vusmmla\.s8 q10, q5, q0
+ *[0-9a-f]+: fc6a4c50 vummla\.u8 q10, q5, q0
+ *[0-9a-f]+: fc6a4c40 vsmmla\.s8 q10, q5, q0
+ *[0-9a-f]+: fcea4d40 vusdot\.s8 q10, q5, q0
+ *[0-9a-f]+: feca4d50 vsudot\.u8 q10, q5, d0\[0\]
+ *[0-9a-f]+: feca4d70 vsudot\.u8 q10, q5, d0\[1\]
+ *[0-9a-f]+: feca4d40 vusdot\.s8 q10, q5, d0\[0\]
+ *[0-9a-f]+: feca4d60 vusdot\.s8 q10, q5, d0\[1\]
+ *[0-9a-f]+: fca5ad00 vusdot\.s8 d10, d5, d0
+ *[0-9a-f]+: fe85ad00 vusdot\.s8 d10, d5, d0\[0\]
+ *[0-9a-f]+: fe85ad20 vusdot\.s8 d10, d5, d0\[1\]
+ *[0-9a-f]+: fe85ad10 vsudot\.u8 d10, d5, d0\[0\]
+ *[0-9a-f]+: fe85ad30 vsudot\.u8 d10, d5, d0\[1\]
--- /dev/null
+vusmmla.s8 q10, q5, q0
+vummla.u8 q10, q5, q0
+vsmmla.s8 q10, q5, q0
+
+vusdot.s8 q10, q5, q0
+vsudot.u8 q10, q5, d0[0]
+vsudot.u8 q10, q5, d0[1]
+vusdot.s8 q10, q5, d0[0]
+vusdot.s8 q10, q5, d0[1]
+
+vusdot.s8 d10, d5, d0
+vusdot.s8 d10, d5, d0[0]
+vusdot.s8 d10, d5, d0[1]
+vsudot.u8 d10, d5, d0[0]
+vsudot.u8 d10, d5, d0[1]
+
+
+vusmmla q10.s8, q5.s8, q0.s8
+vummla q10.u8, q5.u8, q0.u8
+vsmmla q10.s8, q5.s8, q0.s8
+
+vusdot q10.s8, q5.s8, q0.s8
+vsudot q10.u8, q5.u8, d0.u8[0]
+vsudot q10.u8, q5.u8, d0.u8[1]
+vusdot q10.s8, q5.s8, d0.s8[0]
+vusdot q10.s8, q5.s8, d0.s8[1]
+
+vusdot d10.s8, d5.s8, d0.s8
+vusdot d10.s8, d5.s8, d0.s8[0]
+vusdot d10.s8, d5.s8, d0.s8[1]
+vsudot d10.u8, d5.u8, d0.u8[0]
+vsudot d10.u8, d5.u8, d0.u8[1]
+2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
+
+ * opcode/arm.h (ARM_EXT2_I8MM): New feature macro.
+
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
* opcode/aarch64.h (AARCH64_FEATURE_I8MM): New.
#define ARM_EXT2_V8_1M_MAIN 0x00008000 /* ARMv8.1-M Mainline. */
#define ARM_EXT2_V8_6A 0x00010000 /* ARM V8.6A. */
#define ARM_EXT2_BF16 0x00020000 /* ARMv8 bfloat16. */
+#define ARM_EXT2_I8MM 0x00040000 /* ARMv8.6A i8mm. */
/* Co-processor space extensions. */
#define ARM_CEXT_XSCALE 0x00000001 /* Allow MIA etc. */
+2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
+
+ * arm-dis.c (neon_opcodes): Add i8mm SIMD instructions.
+
+
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
* aarch64-tbl.h (aarch64_feature_i8mm_sve, aarch64_feature_f32mm_sve,
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
0xfe300810, 0xffb00f10, "vfma%6?tb.bf16\t%12-15,22Q, %16-19,7Q, %0-2D[%3,5d]"},
+ /* Matrix Multiply instructions. */
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM),
+ 0xfc200c40, 0xffb00f50, "vsmmla.s8\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM),
+ 0xfc200c50, 0xffb00f50, "vummla.u8\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM),
+ 0xfca00c40, 0xffb00f50, "vusmmla.s8\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM),
+ 0xfca00d00, 0xffb00f10, "vusdot.s8\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM),
+ 0xfe800d00, 0xffb00f10, "vusdot.s8\t%12-15,22R, %16-19,7R, d%0-3d[%5d]"},
+ {ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM),
+ 0xfe800d10, 0xffb00f10, "vsudot.u8\t%12-15,22R, %16-19,7R, d%0-3d[%5d]"},
+
/* Two registers, miscellaneous. */
{ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8),
0xf3ba0400, 0xffbf0c10, "vrint%7-9?p?m?zaxn%u.f32\t%12-15,22R, %0-3,5R"},