unsigned int len:8; /* arch string length */
bool skip:1; /* show_arch should skip this. */
enum processor_type type; /* arch type */
+ enum { vsz_none, vsz_set, vsz_reset } vsz; /* vector size control */
i386_cpu_flags enable; /* cpu feature enable flags */
i386_cpu_flags disable; /* cpu feature disable flags */
}
static char *parse_operands (char *, const char *);
static void swap_operands (void);
static void swap_2_operands (unsigned int, unsigned int);
-static enum flag_code i386_addressing_mode (void);
+static enum i386_flag_code i386_addressing_mode (void);
static void optimize_imm (void);
static bool optimize_disp (const insn_template *t);
static const insn_template *match_template (char);
no_default_mask,
unsupported_rc_sae,
invalid_register_operand,
+ internal_error,
};
struct _i386_insn
vex_encoding_vex,
vex_encoding_vex3,
vex_encoding_evex,
+ vex_encoding_evex512,
vex_encoding_error
} vec_encoding;
static char operand_chars[256];
/* Lexical macros. */
-#define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
#define is_operand_char(x) (operand_chars[(unsigned char) x])
#define is_register_char(x) (register_chars[(unsigned char) x])
#define is_space_char(x) ((x) == ' ')
-/* All non-digit non-letter characters that may occur in an operand. */
-static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!=:[@]";
+/* All non-digit non-letter characters that may occur in an operand and
+ which aren't already in extra_symbol_chars[]. */
+static const char operand_special_chars[] = "$+,)._~/<>|&^!=:@]";
/* md_assemble() always leaves the strings it's passed unaltered. To
effect this we maintain a stack of saved characters that we've smashed
/* Are we processing a .insn directive? */
#define dot_insn() (i.tm.mnem_off == MN__insn)
-/* We support four different modes. FLAG_CODE variable is used to distinguish
- these. */
-
-enum flag_code {
- CODE_32BIT,
- CODE_16BIT,
- CODE_64BIT };
-
-static enum flag_code flag_code;
+enum i386_flag_code i386_flag_code;
+#define flag_code i386_flag_code /* Permit to continue using original name. */
static unsigned int object_64bit;
static unsigned int disallow_64bit_reloc;
static int use_rela_relocations = 0;
static char *cpu_sub_arch_name = NULL;
/* CPU feature flags. */
-static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
+i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
/* If we have selected a cpu we are generating instructions for. */
static int cpu_arch_tune_set = 0;
/* Cpu we are generating instructions for. */
enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
-/* CPU feature flags of cpu we are generating instructions for. */
-static i386_cpu_flags cpu_arch_tune_flags;
-
/* CPU instruction set architecture used. */
enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
/* Encode aligned vector move as unaligned vector move. */
static unsigned int use_unaligned_vector_move;
+/* Maximum permitted vector size. */
+#define VSZ_DEFAULT VSZ512
+static unsigned int vector_size = VSZ_DEFAULT;
+
/* Encode scalar AVX instructions with specific vector length. */
static enum
{
};
#define ARCH(n, t, f, s) \
- { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, CPU_ ## f ## _FLAGS, \
+ { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, vsz_none, CPU_ ## f ## _FLAGS, \
CPU_NONE_FLAGS }
#define SUBARCH(n, e, d, s) \
- { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, CPU_ ## e ## _FLAGS, \
+ { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, vsz_none, CPU_ ## e ## _FLAGS, \
CPU_ ## d ## _FLAGS }
+#define VECARCH(n, e, d, v) \
+ { STRING_COMMA_LEN (#n), false, PROCESSOR_NONE, vsz_ ## v, \
+ CPU_ ## e ## _FLAGS, CPU_ ## d ## _FLAGS }
static const arch_entry cpu_arch[] =
{
ARCH (i386, I386, 386, false),
ARCH (i486, I486, 486, false),
ARCH (i586, PENTIUM, 586, false),
- ARCH (i686, PENTIUMPRO, 686, false),
ARCH (pentium, PENTIUM, 586, false),
+ ARCH (i686, I686, 686, false),
ARCH (pentiumpro, PENTIUMPRO, PENTIUMPRO, false),
ARCH (pentiumii, PENTIUMPRO, P2, false),
ARCH (pentiumiii, PENTIUMPRO, P3, false),
SUBARCH (sse4.1, SSE4_1, ANY_SSE4_1, false),
SUBARCH (sse4.2, SSE4_2, ANY_SSE4_2, false),
SUBARCH (sse4, SSE4_2, ANY_SSE4_1, false),
- SUBARCH (avx, AVX, ANY_AVX, false),
- SUBARCH (avx2, AVX2, ANY_AVX2, false),
- SUBARCH (avx512f, AVX512F, ANY_AVX512F, false),
- SUBARCH (avx512cd, AVX512CD, ANY_AVX512CD, false),
- SUBARCH (avx512er, AVX512ER, ANY_AVX512ER, false),
- SUBARCH (avx512pf, AVX512PF, ANY_AVX512PF, false),
- SUBARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, false),
- SUBARCH (avx512bw, AVX512BW, ANY_AVX512BW, false),
- SUBARCH (avx512vl, AVX512VL, ANY_AVX512VL, false),
+ VECARCH (avx, AVX, ANY_AVX, reset),
+ VECARCH (avx2, AVX2, ANY_AVX2, reset),
+ VECARCH (avx512f, AVX512F, ANY_AVX512F, reset),
+ VECARCH (avx512cd, AVX512CD, ANY_AVX512CD, reset),
+ VECARCH (avx512er, AVX512ER, ANY_AVX512ER, reset),
+ VECARCH (avx512pf, AVX512PF, ANY_AVX512PF, reset),
+ VECARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, reset),
+ VECARCH (avx512bw, AVX512BW, ANY_AVX512BW, reset),
+ VECARCH (avx512vl, AVX512VL, ANY_AVX512VL, reset),
SUBARCH (monitor, MONITOR, MONITOR, false),
SUBARCH (vmx, VMX, ANY_VMX, false),
SUBARCH (vmfunc, VMFUNC, ANY_VMFUNC, false),
SUBARCH (xsavec, XSAVEC, ANY_XSAVEC, false),
SUBARCH (xsaves, XSAVES, ANY_XSAVES, false),
SUBARCH (aes, AES, ANY_AES, false),
- SUBARCH (pclmul, PCLMUL, ANY_PCLMUL, false),
- SUBARCH (clmul, PCLMUL, ANY_PCLMUL, true),
+ SUBARCH (pclmul, PCLMULQDQ, ANY_PCLMULQDQ, false),
+ SUBARCH (clmul, PCLMULQDQ, ANY_PCLMULQDQ, true),
SUBARCH (fsgsbase, FSGSBASE, FSGSBASE, false),
SUBARCH (rdrnd, RDRND, RDRND, false),
SUBARCH (f16c, F16C, ANY_F16C, false),
SUBARCH (prefetchwt1, PREFETCHWT1, PREFETCHWT1, false),
SUBARCH (se1, SE1, SE1, false),
SUBARCH (clwb, CLWB, CLWB, false),
- SUBARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, false),
- SUBARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, false),
- SUBARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, false),
- SUBARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, false),
- SUBARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, false),
- SUBARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, false),
- SUBARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, false),
- SUBARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, false),
- SUBARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, false),
+ VECARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, reset),
+ VECARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, reset),
+ VECARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, reset),
+ VECARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, reset),
+ VECARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, reset),
+ VECARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, reset),
+ VECARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, reset),
+ VECARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, reset),
+ VECARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, reset),
SUBARCH (clzero, CLZERO, CLZERO, false),
SUBARCH (mwaitx, MWAITX, MWAITX, false),
SUBARCH (ospke, OSPKE, ANY_OSPKE, false),
SUBARCH (ibt, IBT, IBT, false),
SUBARCH (shstk, SHSTK, SHSTK, false),
SUBARCH (gfni, GFNI, ANY_GFNI, false),
- SUBARCH (vaes, VAES, ANY_VAES, false),
- SUBARCH (vpclmulqdq, VPCLMULQDQ, ANY_VPCLMULQDQ, false),
+ VECARCH (vaes, VAES, ANY_VAES, reset),
+ VECARCH (vpclmulqdq, VPCLMULQDQ, ANY_VPCLMULQDQ, reset),
SUBARCH (wbnoinvd, WBNOINVD, WBNOINVD, false),
SUBARCH (pconfig, PCONFIG, PCONFIG, false),
SUBARCH (waitpkg, WAITPKG, WAITPKG, false),
SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
- SUBARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, false),
- SUBARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
- ANY_AVX512_VP2INTERSECT, false),
+ VECARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, reset),
+ VECARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
+ ANY_AVX512_VP2INTERSECT, reset),
SUBARCH (tdx, TDX, TDX, false),
SUBARCH (enqcmd, ENQCMD, ENQCMD, false),
SUBARCH (serialize, SERIALIZE, SERIALIZE, false),
SUBARCH (widekl, WIDEKL, ANY_WIDEKL, false),
SUBARCH (uintr, UINTR, UINTR, false),
SUBARCH (hreset, HRESET, HRESET, false),
- SUBARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, false),
+ VECARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, reset),
SUBARCH (prefetchi, PREFETCHI, PREFETCHI, false),
- SUBARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, false),
- SUBARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, false),
+ VECARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, reset),
+ VECARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, reset),
SUBARCH (cmpccxadd, CMPCCXADD, CMPCCXADD, false),
SUBARCH (wrmsrns, WRMSRNS, WRMSRNS, false),
SUBARCH (msrlist, MSRLIST, MSRLIST, false),
- SUBARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, false),
+ VECARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, reset),
SUBARCH (rao_int, RAO_INT, RAO_INT, false),
SUBARCH (rmpquery, RMPQUERY, ANY_RMPQUERY, false),
+ SUBARCH (fred, FRED, ANY_FRED, false),
+ SUBARCH (lkgs, LKGS, ANY_LKGS, false),
+ VECARCH (avx_vnni_int16, AVX_VNNI_INT16, ANY_AVX_VNNI_INT16, reset),
+ VECARCH (sha512, SHA512, ANY_SHA512, reset),
+ VECARCH (sm3, SM3, ANY_SM3, reset),
+ VECARCH (sm4, SM4, ANY_SM4, reset),
+ SUBARCH (pbndkb, PBNDKB, PBNDKB, false),
+ VECARCH (avx10.1, AVX10_1, ANY_AVX512F, set),
};
#undef SUBARCH
{0x66,0x90}; /* xchg %ax,%ax */
static const unsigned char f32_3[] =
{0x8d,0x76,0x00}; /* leal 0(%esi),%esi */
-static const unsigned char f32_4[] =
- {0x8d,0x74,0x26,0x00}; /* leal 0(%esi,1),%esi */
+#define f32_4 (f32_5 + 1) /* leal 0(%esi,%eiz),%esi */
+static const unsigned char f32_5[] =
+ {0x2e,0x8d,0x74,0x26,0x00}; /* leal %cs:0(%esi,%eiz),%esi */
static const unsigned char f32_6[] =
{0x8d,0xb6,0x00,0x00,0x00,0x00}; /* leal 0L(%esi),%esi */
-static const unsigned char f32_7[] =
- {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
+#define f32_7 (f32_8 + 1) /* leal 0L(%esi,%eiz),%esi */
+static const unsigned char f32_8[] =
+ {0x2e,0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal %cs:0L(%esi,%eiz),%esi */
+static const unsigned char f64_3[] =
+ {0x48,0x89,0xf6}; /* mov %rsi,%rsi */
+static const unsigned char f64_4[] =
+ {0x48,0x8d,0x76,0x00}; /* lea 0(%rsi),%rsi */
+#define f64_5 (f64_6 + 1) /* lea 0(%rsi,%riz),%rsi */
+static const unsigned char f64_6[] =
+ {0x2e,0x48,0x8d,0x74,0x26,0x00}; /* lea %cs:0(%rsi,%riz),%rsi */
+static const unsigned char f64_7[] =
+ {0x48,0x8d,0xb6,0x00,0x00,0x00,0x00}; /* lea 0L(%rsi),%rsi */
+#define f64_8 (f64_9 + 1) /* lea 0L(%rsi,%riz),%rsi */
+static const unsigned char f64_9[] =
+ {0x2e,0x48,0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* lea %cs:0L(%rsi,%riz),%rsi */
+#define f16_2 (f64_3 + 1) /* mov %si,%si */
static const unsigned char f16_3[] =
{0x8d,0x74,0x00}; /* lea 0(%si),%si */
-static const unsigned char f16_4[] =
- {0x8d,0xb4,0x00,0x00}; /* lea 0W(%si),%si */
+#define f16_4 (f16_5 + 1) /* lea 0W(%si),%si */
+static const unsigned char f16_5[] =
+ {0x2e,0x8d,0xb4,0x00,0x00}; /* lea %cs:0W(%si),%si */
static const unsigned char jump_disp8[] =
{0xeb}; /* jmp disp8 */
static const unsigned char jump32_disp32[] =
{0x66,0xe9}; /* jmp disp32 */
/* 32-bit NOPs patterns. */
static const unsigned char *const f32_patt[] = {
- f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
+ f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8
+};
+/* 64-bit NOPs patterns. */
+static const unsigned char *const f64_patt[] = {
+ f32_1, f32_2, f64_3, f64_4, f64_5, f64_6, f64_7, f64_8, f64_9
};
/* 16-bit NOPs patterns. */
static const unsigned char *const f16_patt[] = {
- f32_1, f32_2, f16_3, f16_4
+ f32_1, f16_2, f16_3, f16_4, f16_5
};
/* nopl (%[re]ax) */
static const unsigned char alt_3[] =
static const unsigned char alt_4[] =
{0x0f,0x1f,0x40,0x00};
/* nopl 0(%[re]ax,%[re]ax,1) */
-static const unsigned char alt_5[] =
- {0x0f,0x1f,0x44,0x00,0x00};
+#define alt_5 (alt_6 + 1)
/* nopw 0(%[re]ax,%[re]ax,1) */
static const unsigned char alt_6[] =
{0x66,0x0f,0x1f,0x44,0x00,0x00};
static const unsigned char alt_7[] =
{0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
/* nopl 0L(%[re]ax,%[re]ax,1) */
-static const unsigned char alt_8[] =
- {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
+#define alt_8 (alt_9 + 1)
/* nopw 0L(%[re]ax,%[re]ax,1) */
static const unsigned char alt_9[] =
{0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
/* nopw %cs:0L(%[re]ax,%[re]ax,1) */
-static const unsigned char alt_10[] =
- {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
+#define alt_10 (alt_11 + 1)
/* data16 nopw %cs:0L(%eax,%eax,1) */
static const unsigned char alt_11[] =
{0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
}
nops = patt[max_single_nop_size - 1];
-
- /* Use the smaller one if the requsted one isn't available. */
- if (nops == NULL)
- {
- max_single_nop_size--;
- nops = patt[max_single_nop_size - 1];
- }
-
last = count % max_single_nop_size;
count -= last;
if (last)
{
nops = patt[last - 1];
- if (nops == NULL)
- {
- /* Use the smaller one plus one-byte NOP if the needed one
- isn't available. */
- last--;
- nops = patt[last - 1];
- memcpy (where + offset, nops, last);
- where[offset + last] = *patt[0];
- }
- else
- memcpy (where + offset, nops, last);
+ memcpy (where + offset, nops, last);
}
}
/* We need to decide which NOP sequence to use for 32bit and
64bit. When -mtune= is used:
- 1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
+ 1. For PROCESSOR_I?86, PROCESSOR_PENTIUM, PROCESSOR_IAMCU, and
PROCESSOR_GENERIC32, f32_patt will be used.
2. For the rest, alt_patt will be used.
When -mtune= isn't used, alt_patt will be used if
- cpu_arch_isa_flags has CpuNop. Otherwise, f32_patt will
+ cpu_arch_isa_flags has CpuNop. Otherwise, f32_patt/f64_patt will
be used.
When -march= or .arch is used, we can't use anything beyond
cpu_arch_isa_flags. */
- if (flag_code == CODE_16BIT)
+ if (fragP->tc_frag_data.code == CODE_16BIT)
{
patt = f16_patt;
max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
}
else
{
+ patt = fragP->tc_frag_data.code == CODE_64BIT ? f64_patt : f32_patt;
if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
{
- /* PROCESSOR_UNKNOWN means that all ISAs may be used. */
- switch (cpu_arch_tune)
+ /* PROCESSOR_UNKNOWN means that all ISAs may be used, unless
+ explicitly disabled. */
+ switch (fragP->tc_frag_data.tune)
{
case PROCESSOR_UNKNOWN:
/* We use cpu_arch_isa_flags to check if we SHOULD
optimize with nops. */
- if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
+ if (fragP->tc_frag_data.isanop)
patt = alt_patt;
- else
- patt = f32_patt;
break;
+
+ case PROCESSOR_PENTIUMPRO:
case PROCESSOR_PENTIUM4:
case PROCESSOR_NOCONA:
case PROCESSOR_CORE:
case PROCESSOR_BD:
case PROCESSOR_ZNVER:
case PROCESSOR_BT:
- patt = alt_patt;
+ if (fragP->tc_frag_data.cpunop)
+ patt = alt_patt;
break;
+
case PROCESSOR_I386:
case PROCESSOR_I486:
case PROCESSOR_PENTIUM:
- case PROCESSOR_PENTIUMPRO:
+ case PROCESSOR_I686:
case PROCESSOR_IAMCU:
case PROCESSOR_GENERIC32:
- patt = f32_patt;
break;
case PROCESSOR_NONE:
abort ();
abort ();
break;
- case PROCESSOR_I386:
- case PROCESSOR_I486:
- case PROCESSOR_PENTIUM:
- case PROCESSOR_IAMCU:
- case PROCESSOR_K6:
- case PROCESSOR_ATHLON:
- case PROCESSOR_K8:
- case PROCESSOR_AMDFAM10:
- case PROCESSOR_BD:
- case PROCESSOR_ZNVER:
- case PROCESSOR_BT:
- case PROCESSOR_GENERIC32:
+ default:
/* We use cpu_arch_isa_flags to check if we CAN optimize
with nops. */
- if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
+ if (fragP->tc_frag_data.isanop)
patt = alt_patt;
- else
- patt = f32_patt;
- break;
- case PROCESSOR_PENTIUMPRO:
- case PROCESSOR_PENTIUM4:
- case PROCESSOR_NOCONA:
- case PROCESSOR_CORE:
- case PROCESSOR_CORE2:
- case PROCESSOR_COREI7:
- if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
- patt = alt_patt;
- else
- patt = f32_patt;
- break;
- case PROCESSOR_GENERIC64:
- patt = alt_patt;
break;
+
case PROCESSOR_NONE:
abort ();
}
}
- if (patt == f32_patt)
+ if (patt != alt_patt)
{
- max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
+ max_single_nop_size = patt == f32_patt ? ARRAY_SIZE (f32_patt)
+ : ARRAY_SIZE (f64_patt);
/* Limit number of NOPs to 2 for older processors. */
max_number_of_nops = 2;
}
}
}
+static INLINE bool
+is_cpu (const insn_template *t, enum i386_cpu cpu)
+{
+ switch (cpu)
+ {
+ case Cpu287: return t->cpu.bitfield.cpu287;
+ case Cpu387: return t->cpu.bitfield.cpu387;
+ case Cpu3dnow: return t->cpu.bitfield.cpu3dnow;
+ case Cpu3dnowA: return t->cpu.bitfield.cpu3dnowa;
+ case CpuAVX: return t->cpu.bitfield.cpuavx;
+ case CpuHLE: return t->cpu.bitfield.cpuhle;
+ case CpuAVX512F: return t->cpu.bitfield.cpuavx512f;
+ case CpuAVX512VL: return t->cpu.bitfield.cpuavx512vl;
+ case Cpu64: return t->cpu.bitfield.cpu64;
+ case CpuNo64: return t->cpu.bitfield.cpuno64;
+ default:
+ gas_assert (cpu < CpuAttrEnums);
+ }
+ return t->cpu.bitfield.isa == cpu + 1u;
+}
+
+static i386_cpu_flags cpu_flags_from_attr (i386_cpu_attr a)
+{
+ const unsigned int bps = sizeof (a.array[0]) * CHAR_BIT;
+ i386_cpu_flags f = { .array[0] = 0 };
+
+ switch (ARRAY_SIZE(a.array))
+ {
+ case 1:
+ f.array[CpuAttrEnums / bps]
+ |= (a.array[0] >> CpuIsaBits) << (CpuAttrEnums % bps);
+ if (CpuAttrEnums % bps > CpuIsaBits)
+ f.array[CpuAttrEnums / bps + 1]
+ = (a.array[0] >> CpuIsaBits) >> (bps - CpuAttrEnums % bps);
+ break;
+ default:
+ abort ();
+ }
+
+ if (a.bitfield.isa)
+ f.array[(a.bitfield.isa - 1) / bps] |= 1u << ((a.bitfield.isa - 1) % bps);
+
+ return f;
+}
+
static INLINE int
cpu_flags_all_zero (const union i386_cpu_flags *x)
{
}
static INLINE int
-cpu_flags_check_cpu64 (i386_cpu_flags f)
+cpu_flags_check_cpu64 (const insn_template *t)
{
- return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
- || (flag_code != CODE_64BIT && f.bitfield.cpu64));
+ return flag_code == CODE_64BIT
+ ? !t->cpu.bitfield.cpuno64
+ : !t->cpu.bitfield.cpu64;
}
static INLINE i386_cpu_flags
static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
+static INLINE bool need_evex_encoding (void)
+{
+ return i.vec_encoding == vex_encoding_evex
+ || i.vec_encoding == vex_encoding_evex512
+ || i.mask.reg;
+}
+
#define CPU_FLAGS_ARCH_MATCH 0x1
#define CPU_FLAGS_64BIT_MATCH 0x2
static int
cpu_flags_match (const insn_template *t)
{
- i386_cpu_flags x = t->cpu_flags;
- int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
+ i386_cpu_flags x = cpu_flags_from_attr (t->cpu);
+ int match = cpu_flags_check_cpu64 (t) ? CPU_FLAGS_64BIT_MATCH : 0;
x.bitfield.cpu64 = 0;
x.bitfield.cpuno64 = 0;
/* This instruction is available only on some archs. */
i386_cpu_flags cpu = cpu_arch_flags;
+ /* Dual VEX/EVEX templates may need stripping of one of the flags. */
+ if (t->opcode_modifier.vex && t->opcode_modifier.evex)
+ {
+ /* Dual AVX/AVX512F templates need to retain AVX512F only if we already
+ know that EVEX encoding will be needed. */
+ if ((x.bitfield.cpuavx || x.bitfield.cpuavx2)
+ && x.bitfield.cpuavx512f)
+ {
+ if (need_evex_encoding ())
+ {
+ x.bitfield.cpuavx = 0;
+ x.bitfield.cpuavx2 = 0;
+ }
+ /* need_evex_encoding() isn't reliable before operands were
+ parsed. */
+ else if (i.operands)
+ {
+ x.bitfield.cpuavx512f = 0;
+ x.bitfield.cpuavx512vl = 0;
+ if (x.bitfield.cpufma && !cpu.bitfield.cpufma)
+ x.bitfield.cpuavx = 0;
+ }
+ }
+ }
+
/* AVX512VL is no standalone feature - match it and then strip it. */
if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
return match;
cpu = cpu_flags_and (x, cpu);
if (!cpu_flags_all_zero (&cpu))
{
- if (x.bitfield.cpuavx)
+ if (t->cpu.bitfield.cpuavx && t->cpu.bitfield.cpuavx512f)
+ {
+ if ((need_evex_encoding ()
+ ? cpu.bitfield.cpuavx512f
+ : cpu.bitfield.cpuavx)
+ && (!x.bitfield.cpufma || cpu.bitfield.cpufma
+ || cpu_arch_flags.bitfield.cpuavx512f)
+ && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
+ && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
+ && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
+ match |= CPU_FLAGS_ARCH_MATCH;
+ }
+ else if (x.bitfield.cpuavx)
{
/* We need to check a few extra flags with AVX. */
if (cpu.bitfield.cpuavx
|| (sse2avx && !i.prefix[DATA_PREFIX]))
&& (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
&& (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
- && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
+ && (!x.bitfield.cpupclmulqdq || cpu.bitfield.cpupclmulqdq))
match |= CPU_FLAGS_ARCH_MATCH;
}
+ else if (x.bitfield.cpuavx2 && cpu.bitfield.cpuavx2)
+ match |= CPU_FLAGS_ARCH_MATCH;
else if (x.bitfield.cpuavx512f)
{
/* We need to check a few extra flags with AVX512F. */
if (cpu.bitfield.cpuavx512f
- && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
- && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
- && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
+ && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni))
match |= CPU_FLAGS_ARCH_MATCH;
}
else
/* For FMA4 and XOP insns VEX.W controls just the first two
register operands. */
- if (t->cpu_flags.bitfield.cpufma4 || t->cpu_flags.bitfield.cpuxop)
+ if (is_cpu (t, CpuFMA4) || is_cpu (t, CpuXOP))
given = j < 2 ? 1 - j : j;
if (t->operand_types[j].bitfield.class == Reg
static void
update_code_flag (int value, int check)
{
- PRINTF_LIKE ((*as_error));
+ PRINTF_LIKE ((*as_error)) = check ? as_fatal : as_bad;
- flag_code = (enum flag_code) value;
- if (flag_code == CODE_64BIT)
- {
- cpu_arch_flags.bitfield.cpu64 = 1;
- cpu_arch_flags.bitfield.cpuno64 = 0;
- }
- else
- {
- cpu_arch_flags.bitfield.cpu64 = 0;
- cpu_arch_flags.bitfield.cpuno64 = 1;
- }
- if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
+ if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpu64 )
{
- if (check)
- as_error = as_fatal;
- else
- as_error = as_bad;
- (*as_error) (_("64bit mode not supported on `%s'."),
- cpu_arch_name ? cpu_arch_name : default_arch);
+ as_error (_("64bit mode not supported on `%s'."),
+ cpu_arch_name ? cpu_arch_name : default_arch);
+ return;
}
+
if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
{
- if (check)
- as_error = as_fatal;
- else
- as_error = as_bad;
- (*as_error) (_("32bit mode not supported on `%s'."),
- cpu_arch_name ? cpu_arch_name : default_arch);
+ as_error (_("32bit mode not supported on `%s'."),
+ cpu_arch_name ? cpu_arch_name : default_arch);
+ return;
}
+
+ flag_code = (enum flag_code) value;
+
stackop_size = '\0';
}
flag_code = (enum flag_code) new_code_flag;
if (flag_code != CODE_16BIT)
abort ();
- cpu_arch_flags.bitfield.cpu64 = 0;
- cpu_arch_flags.bitfield.cpuno64 = 1;
stackop_size = LONG_MNEM_SUFFIX;
}
}
static void
-extend_cpu_sub_arch_name (const char *name)
+extend_cpu_sub_arch_name (const char *pfx, const char *name)
{
if (cpu_sub_arch_name)
cpu_sub_arch_name = reconcat (cpu_sub_arch_name, cpu_sub_arch_name,
- ".", name, (const char *) NULL);
+ pfx, name, (const char *) NULL);
else
- cpu_sub_arch_name = concat (".", name, (const char *) NULL);
+ cpu_sub_arch_name = concat (pfx, name, (const char *) NULL);
+}
+
+static void isa_enable (unsigned int idx)
+{
+ i386_cpu_flags flags = cpu_flags_or (cpu_arch_flags, cpu_arch[idx].enable);
+
+ if (!cpu_flags_equal (&flags, &cpu_arch_flags))
+ {
+ extend_cpu_sub_arch_name (".", cpu_arch[idx].name);
+ cpu_arch_flags = flags;
+ }
+
+ cpu_arch_isa_flags = cpu_flags_or (cpu_arch_isa_flags, cpu_arch[idx].enable);
+}
+
+static void isa_disable (unsigned int idx)
+{
+ i386_cpu_flags flags
+ = cpu_flags_and_not (cpu_arch_flags, cpu_arch[idx].disable);
+
+ if (!cpu_flags_equal (&flags, &cpu_arch_flags))
+ {
+ extend_cpu_sub_arch_name (".no", cpu_arch[idx].name);
+ cpu_arch_flags = flags;
+ }
+
+ cpu_arch_isa_flags
+ = cpu_flags_and_not (cpu_arch_isa_flags, cpu_arch[idx].disable);
}
static void
i386_cpu_flags isa_flags;
enum processor_type isa;
enum flag_code flag_code;
+ unsigned int vector_size;
char stackop_size;
bool no_cond_jump_promotion;
} arch_stack_entry;
static const arch_stack_entry *arch_stack_top;
+ char *s;
+ int e;
+ const char *string;
+ unsigned int j = 0;
SKIP_WHITESPACE ();
- if (!is_end_of_line[(unsigned char) *input_line_pointer])
+ if (is_end_of_line[(unsigned char) *input_line_pointer])
+ {
+ as_bad (_("missing cpu architecture"));
+ input_line_pointer++;
+ return;
+ }
+
+ e = get_symbol_name (&s);
+ string = s;
+
+ if (strcmp (string, "push") == 0)
+ {
+ arch_stack_entry *top = XNEW (arch_stack_entry);
+
+ top->name = cpu_arch_name;
+ if (cpu_sub_arch_name)
+ top->sub_name = xstrdup (cpu_sub_arch_name);
+ else
+ top->sub_name = NULL;
+ top->flags = cpu_arch_flags;
+ top->isa = cpu_arch_isa;
+ top->isa_flags = cpu_arch_isa_flags;
+ top->flag_code = flag_code;
+ top->vector_size = vector_size;
+ top->stackop_size = stackop_size;
+ top->no_cond_jump_promotion = no_cond_jump_promotion;
+
+ top->prev = arch_stack_top;
+ arch_stack_top = top;
+
+ (void) restore_line_pointer (e);
+ demand_empty_rest_of_line ();
+ return;
+ }
+
+ if (strcmp (string, "pop") == 0)
{
- char *s;
- int e = get_symbol_name (&s);
- const char *string = s;
- unsigned int j = 0;
- i386_cpu_flags flags;
+ const arch_stack_entry *top = arch_stack_top;
- if (strcmp (string, "default") == 0)
+ if (!top)
+ as_bad (_(".arch stack is empty"));
+ else if (top->flag_code != flag_code
+ || top->stackop_size != stackop_size)
{
- if (strcmp (default_arch, "iamcu") == 0)
- string = default_arch;
- else
- {
- static const i386_cpu_flags cpu_unknown_flags = CPU_UNKNOWN_FLAGS;
+ static const unsigned int bits[] = {
+ [CODE_16BIT] = 16,
+ [CODE_32BIT] = 32,
+ [CODE_64BIT] = 64,
+ };
- cpu_arch_name = NULL;
- free (cpu_sub_arch_name);
- cpu_sub_arch_name = NULL;
- cpu_arch_flags = cpu_unknown_flags;
- if (flag_code == CODE_64BIT)
- {
- cpu_arch_flags.bitfield.cpu64 = 1;
- cpu_arch_flags.bitfield.cpuno64 = 0;
- }
- else
- {
- cpu_arch_flags.bitfield.cpu64 = 0;
- cpu_arch_flags.bitfield.cpuno64 = 1;
- }
- cpu_arch_isa = PROCESSOR_UNKNOWN;
- cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
- if (!cpu_arch_tune_set)
- {
- cpu_arch_tune = cpu_arch_isa;
- cpu_arch_tune_flags = cpu_arch_isa_flags;
- }
+ as_bad (_("this `.arch pop' requires `.code%u%s' to be in effect"),
+ bits[top->flag_code],
+ top->stackop_size == LONG_MNEM_SUFFIX ? "gcc" : "");
+ }
+ else
+ {
+ arch_stack_top = top->prev;
- j = ARRAY_SIZE (cpu_arch) + 1;
- }
+ cpu_arch_name = top->name;
+ free (cpu_sub_arch_name);
+ cpu_sub_arch_name = top->sub_name;
+ cpu_arch_flags = top->flags;
+ cpu_arch_isa = top->isa;
+ cpu_arch_isa_flags = top->isa_flags;
+ vector_size = top->vector_size;
+ no_cond_jump_promotion = top->no_cond_jump_promotion;
+
+ XDELETE (top);
}
- else if (strcmp (string, "push") == 0)
+
+ (void) restore_line_pointer (e);
+ demand_empty_rest_of_line ();
+ return;
+ }
+
+ if (strcmp (string, "default") == 0)
+ {
+ if (strcmp (default_arch, "iamcu") == 0)
+ string = default_arch;
+ else
{
- arch_stack_entry *top = XNEW (arch_stack_entry);
+ static const i386_cpu_flags cpu_unknown_flags = CPU_UNKNOWN_FLAGS;
- top->name = cpu_arch_name;
- if (cpu_sub_arch_name)
- top->sub_name = xstrdup (cpu_sub_arch_name);
- else
- top->sub_name = NULL;
- top->flags = cpu_arch_flags;
- top->isa = cpu_arch_isa;
- top->isa_flags = cpu_arch_isa_flags;
- top->flag_code = flag_code;
- top->stackop_size = stackop_size;
- top->no_cond_jump_promotion = no_cond_jump_promotion;
+ cpu_arch_name = NULL;
+ free (cpu_sub_arch_name);
+ cpu_sub_arch_name = NULL;
+ cpu_arch_flags = cpu_unknown_flags;
+ cpu_arch_isa = PROCESSOR_UNKNOWN;
+ cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
+ if (!cpu_arch_tune_set)
+ cpu_arch_tune = PROCESSOR_UNKNOWN;
- top->prev = arch_stack_top;
- arch_stack_top = top;
+ vector_size = VSZ_DEFAULT;
- (void) restore_line_pointer (e);
- demand_empty_rest_of_line ();
- return;
+ j = ARRAY_SIZE (cpu_arch) + 1;
}
- else if (strcmp (string, "pop") == 0)
- {
- const arch_stack_entry *top = arch_stack_top;
+ }
- if (!top)
- as_bad (_(".arch stack is empty"));
- else if (top->flag_code != flag_code
- || top->stackop_size != stackop_size)
+ for (; j < ARRAY_SIZE (cpu_arch); j++)
+ {
+ if (strcmp (string + (*string == '.'), cpu_arch[j].name) == 0
+ && (*string == '.') == (cpu_arch[j].type == PROCESSOR_NONE))
+ {
+ if (*string != '.')
{
- static const unsigned int bits[] = {
- [CODE_16BIT] = 16,
- [CODE_32BIT] = 32,
- [CODE_64BIT] = 64,
- };
+ check_cpu_arch_compatible (string, cpu_arch[j].enable);
- as_bad (_("this `.arch pop' requires `.code%u%s' to be in effect"),
- bits[top->flag_code],
- top->stackop_size == LONG_MNEM_SUFFIX ? "gcc" : "");
- }
- else
- {
- arch_stack_top = top->prev;
+ if (flag_code == CODE_64BIT && !cpu_arch[j].enable.bitfield.cpu64 )
+ {
+ as_bad (_("64bit mode not supported on `%s'."),
+ cpu_arch[j].name);
+ (void) restore_line_pointer (e);
+ ignore_rest_of_line ();
+ return;
+ }
- cpu_arch_name = top->name;
+ if (flag_code == CODE_32BIT && !cpu_arch[j].enable.bitfield.cpui386)
+ {
+ as_bad (_("32bit mode not supported on `%s'."),
+ cpu_arch[j].name);
+ (void) restore_line_pointer (e);
+ ignore_rest_of_line ();
+ return;
+ }
+
+ cpu_arch_name = cpu_arch[j].name;
free (cpu_sub_arch_name);
- cpu_sub_arch_name = top->sub_name;
- cpu_arch_flags = top->flags;
- cpu_arch_isa = top->isa;
- cpu_arch_isa_flags = top->isa_flags;
- no_cond_jump_promotion = top->no_cond_jump_promotion;
+ cpu_sub_arch_name = NULL;
+ cpu_arch_flags = cpu_arch[j].enable;
+ cpu_arch_isa = cpu_arch[j].type;
+ cpu_arch_isa_flags = cpu_arch[j].enable;
+ if (!cpu_arch_tune_set)
+ cpu_arch_tune = cpu_arch_isa;
+
+ vector_size = VSZ_DEFAULT;
- XDELETE (top);
+ pre_386_16bit_warned = false;
+ break;
}
+ if (cpu_flags_all_zero (&cpu_arch[j].enable))
+ continue;
+
+ isa_enable (j);
+
(void) restore_line_pointer (e);
- demand_empty_rest_of_line ();
- return;
- }
- for (; j < ARRAY_SIZE (cpu_arch); j++)
- {
- if (strcmp (string + (*string == '.'), cpu_arch[j].name) == 0
- && (*string == '.') == (cpu_arch[j].type == PROCESSOR_NONE))
+ switch (cpu_arch[j].vsz)
{
- if (*string != '.')
- {
- check_cpu_arch_compatible (string, cpu_arch[j].enable);
+ default:
+ break;
- cpu_arch_name = cpu_arch[j].name;
- free (cpu_sub_arch_name);
- cpu_sub_arch_name = NULL;
- cpu_arch_flags = cpu_arch[j].enable;
- if (flag_code == CODE_64BIT)
- {
- cpu_arch_flags.bitfield.cpu64 = 1;
- cpu_arch_flags.bitfield.cpuno64 = 0;
- }
- else
- {
- cpu_arch_flags.bitfield.cpu64 = 0;
- cpu_arch_flags.bitfield.cpuno64 = 1;
- }
- cpu_arch_isa = cpu_arch[j].type;
- cpu_arch_isa_flags = cpu_arch[j].enable;
- if (!cpu_arch_tune_set)
+ case vsz_set:
+#ifdef SVR4_COMMENT_CHARS
+ if (*input_line_pointer == ':' || *input_line_pointer == '/')
+#else
+ if (*input_line_pointer == '/')
+#endif
+ {
+ ++input_line_pointer;
+ switch (get_absolute_expression ())
{
- cpu_arch_tune = cpu_arch_isa;
- cpu_arch_tune_flags = cpu_arch_isa_flags;
+ case 512: vector_size = VSZ512; break;
+ case 256: vector_size = VSZ256; break;
+ case 128: vector_size = VSZ128; break;
+ default:
+ as_bad (_("Unrecognized vector size specifier"));
+ ignore_rest_of_line ();
+ return;
}
- pre_386_16bit_warned = false;
break;
}
-
- if (cpu_flags_all_zero (&cpu_arch[j].enable))
- continue;
-
- flags = cpu_flags_or (cpu_arch_flags,
- cpu_arch[j].enable);
-
- if (!cpu_flags_equal (&flags, &cpu_arch_flags))
- {
- extend_cpu_sub_arch_name (string + 1);
- cpu_arch_flags = flags;
- cpu_arch_isa_flags = flags;
- }
- else
- cpu_arch_isa_flags
- = cpu_flags_or (cpu_arch_isa_flags,
- cpu_arch[j].enable);
- (void) restore_line_pointer (e);
- demand_empty_rest_of_line ();
- return;
+ /* Fall through. */
+ case vsz_reset:
+ vector_size = VSZ_DEFAULT;
+ break;
}
- }
- if (startswith (string, ".no") && j >= ARRAY_SIZE (cpu_arch))
- {
- /* Disable an ISA extension. */
- for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
- if (cpu_arch[j].type == PROCESSOR_NONE
- && strcmp (string + 3, cpu_arch[j].name) == 0)
- {
- flags = cpu_flags_and_not (cpu_arch_flags,
- cpu_arch[j].disable);
- if (!cpu_flags_equal (&flags, &cpu_arch_flags))
- {
- extend_cpu_sub_arch_name (string + 1);
- cpu_arch_flags = flags;
- cpu_arch_isa_flags = flags;
- }
- (void) restore_line_pointer (e);
- demand_empty_rest_of_line ();
- return;
- }
+ demand_empty_rest_of_line ();
+ return;
}
+ }
- if (j == ARRAY_SIZE (cpu_arch))
- as_bad (_("no such architecture: `%s'"), string);
+ if (startswith (string, ".no") && j >= ARRAY_SIZE (cpu_arch))
+ {
+ /* Disable an ISA extension. */
+ for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
+ if (cpu_arch[j].type == PROCESSOR_NONE
+ && strcmp (string + 3, cpu_arch[j].name) == 0)
+ {
+ isa_disable (j);
+
+ if (cpu_arch[j].vsz == vsz_set)
+ vector_size = VSZ_DEFAULT;
- *input_line_pointer = e;
+ (void) restore_line_pointer (e);
+ demand_empty_rest_of_line ();
+ return;
+ }
}
- else
- as_bad (_("missing cpu architecture"));
+
+ if (j == ARRAY_SIZE (cpu_arch))
+ as_bad (_("no such architecture: `%s'"), string);
+
+ *input_line_pointer = e;
no_cond_jump_promotion = 0;
if (*input_line_pointer == ','
&& !is_end_of_line[(unsigned char) input_line_pointer[1]])
{
- char *string;
- char e;
-
++input_line_pointer;
- e = get_symbol_name (&string);
+ e = get_symbol_name (&s);
+ string = s;
if (strcmp (string, "nojumps") == 0)
no_cond_jump_promotion = 1;
/* Fill in lexical tables: mnemonic_chars, operand_chars. */
{
int c;
- char *p;
+ const char *p;
for (c = 0; c < 256; c++)
{
register_chars[c] = mnemonic_chars[c];
operand_chars[c] = c;
}
- else if (c == '{' || c == '}')
- {
- mnemonic_chars[c] = c;
- operand_chars[c] = c;
- }
#ifdef SVR4_COMMENT_CHARS
else if (c == '\\' && strchr (i386_comment_chars, '/'))
operand_chars[c] = c;
operand_chars[c] = c;
}
-#ifdef LEX_QM
- operand_chars['?'] = '?';
-#endif
mnemonic_chars['_'] = '_';
mnemonic_chars['-'] = '-';
mnemonic_chars['.'] = '.';
+ for (p = extra_symbol_chars; *p != '\0'; p++)
+ operand_chars[(unsigned char) *p] = *p;
for (p = operand_special_chars; *p != '\0'; p++)
operand_chars[(unsigned char) *p] = *p;
}
i.tm = *t;
+ /* Dual VEX/EVEX templates need stripping one of the possible variants. */
+ if (t->opcode_modifier.vex && t->opcode_modifier.evex)
+ {
+ if ((is_cpu (t, CpuAVX) || is_cpu (t, CpuAVX2))
+ && is_cpu (t, CpuAVX512F))
+ {
+ if (need_evex_encoding ())
+ {
+ i.tm.opcode_modifier.vex = 0;
+ i.tm.cpu.bitfield.cpuavx = 0;
+ if (is_cpu (&i.tm, CpuAVX2))
+ i.tm.cpu.bitfield.isa = 0;
+ }
+ else
+ {
+ i.tm.opcode_modifier.evex = 0;
+ i.tm.cpu.bitfield.cpuavx512f = 0;
+ }
+ }
+ }
+
/* Note that for pseudo prefixes this produces a length of 1. But for them
the length isn't interesting at all. */
for (l = 1; l < 4; ++l)
&& is_evex_encoding (&i.tm)
&& (i.vec_encoding != vex_encoding_evex
|| cpu_arch_isa_flags.bitfield.cpuavx512vl
- || i.tm.cpu_flags.bitfield.cpuavx512vl
+ || is_cpu (&i.tm, CpuAVX512VL)
|| (i.tm.operand_types[2].bitfield.zmmword
&& i.types[2].bitfield.ymmword))))
&& i.tm.opcode_space == SPACE_0F
i.tm.opcode_modifier.vex = VEX128;
i.tm.opcode_modifier.vexw = VEXW0;
i.tm.opcode_modifier.evex = 0;
+ i.vec_encoding = vex_encoding_vex;
+ i.mask.reg = NULL;
}
else if (optimize > 1)
i.tm.opcode_modifier.evex = EVEX128;
i.types[j].bitfield.disp8
= fits_in_disp8 (i.op[j].disps->X_add_number);
}
+ else if (optimize_for_space
+ && i.tm.base_opcode == 0x29
+ && i.tm.opcode_space == SPACE_0F38
+ && i.operands == i.reg_operands
+ && i.op[0].regs == i.op[1].regs
+ && (!i.tm.opcode_modifier.vex
+ || !(i.op[0].regs->reg_flags & RegRex))
+ && !is_evex_encoding (&i.tm))
+ {
+ /* Optimize: -Os:
+ pcmpeqq %xmmN, %xmmN -> pcmpeqd %xmmN, %xmmN
+ vpcmpeqq %xmmN, %xmmN, %xmmM -> vpcmpeqd %xmmN, %xmmN, %xmmM (N < 8)
+ vpcmpeqq %ymmN, %ymmN, %ymmM -> vpcmpeqd %ymmN, %ymmN, %ymmM (N < 8)
+ */
+ i.tm.opcode_space = SPACE_0F;
+ i.tm.base_opcode = 0x76;
+ }
+ else if (((i.tm.base_opcode >= 0x64
+ && i.tm.base_opcode <= 0x66
+ && i.tm.opcode_space == SPACE_0F)
+ || (i.tm.base_opcode == 0x37
+ && i.tm.opcode_space == SPACE_0F38))
+ && i.operands == i.reg_operands
+ && i.op[0].regs == i.op[1].regs
+ && !is_evex_encoding (&i.tm))
+ {
+ /* Optimize: -O:
+ pcmpgt[bwd] %mmN, %mmN -> pxor %mmN, %mmN
+ pcmpgt[bwdq] %xmmN, %xmmN -> pxor %xmmN, %xmmN
+ vpcmpgt[bwdq] %xmmN, %xmmN, %xmmM -> vpxor %xmmN, %xmmN, %xmmM (N < 8)
+ vpcmpgt[bwdq] %xmmN, %xmmN, %xmmM -> vpxor %xmm0, %xmm0, %xmmM (N > 7)
+ vpcmpgt[bwdq] %ymmN, %ymmN, %ymmM -> vpxor %ymmN, %ymmN, %ymmM (N < 8)
+ vpcmpgt[bwdq] %ymmN, %ymmN, %ymmM -> vpxor %ymm0, %ymm0, %ymmM (N > 7)
+ */
+ i.tm.opcode_space = SPACE_0F;
+ i.tm.base_opcode = 0xef;
+ if (i.tm.opcode_modifier.vex && (i.op[0].regs->reg_flags & RegRex))
+ {
+ if (i.operands == 2)
+ {
+ gas_assert (i.tm.opcode_modifier.sse2avx);
+
+ i.operands = 3;
+ i.reg_operands = 3;
+ i.tm.operands = 3;
+
+ i.op[2].regs = i.op[0].regs;
+ i.types[2] = i.types[0];
+ i.flags[2] = i.flags[0];
+ i.tm.operand_types[2] = i.tm.operand_types[0];
+
+ i.tm.opcode_modifier.sse2avx = 0;
+ }
+ i.op[0].regs -= i.op[0].regs->reg_num + 8;
+ i.op[1].regs = i.op[0].regs;
+ }
+ }
+ else if (optimize_for_space
+ && i.tm.base_opcode == 0x59
+ && i.tm.opcode_space == SPACE_0F38
+ && i.operands == i.reg_operands
+ && i.tm.opcode_modifier.vex
+ && !(i.op[0].regs->reg_flags & RegRex)
+ && i.op[0].regs->reg_type.bitfield.xmmword
+ && i.vec_encoding != vex_encoding_vex3)
+ {
+ /* Optimize: -Os:
+ vpbroadcastq %xmmN, %xmmM -> vpunpcklqdq %xmmN, %xmmN, %xmmM (N < 8)
+ */
+ i.tm.opcode_space = SPACE_0F;
+ i.tm.base_opcode = 0x6c;
+ i.tm.opcode_modifier.vexvvvv = 1;
+
+ ++i.operands;
+ ++i.reg_operands;
+ ++i.tm.operands;
+
+ i.op[2].regs = i.op[0].regs;
+ i.types[2] = i.types[0];
+ i.flags[2] = i.flags[0];
+ i.tm.operand_types[2] = i.tm.operand_types[0];
+
+ swap_2_operands (1, 2);
+ }
}
/* Return non-zero for load instruction. */
case invalid_register_operand:
err_msg = _("invalid register operand");
break;
+ case internal_error:
+ err_msg = _("internal error");
+ break;
}
as_bad (_("%s for `%s'"), err_msg,
pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
bypass the logic below when easily possible. */
&& t->opcode_space >= SPACE_0F
&& t->opcode_space <= SPACE_0F3A
- && !i.tm.cpu_flags.bitfield.cpusse4a
+ && !is_cpu (&i.tm, CpuSSE4a)
&& !is_any_vex_encoding (t))
{
bool simd = false;
if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
as_bad (_("expecting indirect branch instruction after `notrack'"));
- if (i.tm.cpu_flags.bitfield.cpumpx)
+ if (is_cpu (&i.tm, CpuMPX))
{
if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
optimize_encoding ();
+ /* Past optimization there's no need to distinguish vex_encoding_evex and
+ vex_encoding_evex512 anymore. */
+ if (i.vec_encoding == vex_encoding_evex512)
+ i.vec_encoding = vex_encoding_evex;
+
if (use_unaligned_vector_move)
encode_with_unaligned_vector_move ();
return;
/* Check if IP-relative addressing requirements can be satisfied. */
- if (i.tm.cpu_flags.bitfield.cpuprefetchi
+ if (is_cpu (&i.tm, CpuPREFETCHI)
&& !(i.base_reg && i.base_reg->reg_num == RegIP))
as_warn (_("'%s' only supports RIP-relative address"), insn_name (&i.tm));
case RegSIMD:
if (i.tm.operand_types[j].bitfield.tmmword)
i.xstate |= xstate_tmm;
- else if (i.tm.operand_types[j].bitfield.zmmword)
+ else if (i.tm.operand_types[j].bitfield.zmmword
+ && !i.tm.opcode_modifier.vex
+ && vector_size >= VSZ512)
i.xstate |= xstate_zmm;
- else if (i.tm.operand_types[j].bitfield.ymmword)
+ else if (i.tm.operand_types[j].bitfield.ymmword
+ && vector_size >= VSZ256)
i.xstate |= xstate_ymm;
else if (i.tm.operand_types[j].bitfield.xmmword)
i.xstate |= xstate_xmm;
while (1)
{
mnem_p = mnemonic;
+ /* Pseudo-prefixes start with an opening figure brace. */
+ if ((*mnem_p = *l) == '{')
+ {
+ ++mnem_p;
+ ++l;
+ }
while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
{
if (*mnem_p == '.')
mnem_p++;
if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
{
+ too_long:
as_bad (_("no such instruction: `%s'"), token_start);
return NULL;
}
l++;
}
- if (!is_space_char (*l)
- && *l != END_OF_INSN
- && (intel_syntax
- || (*l != PREFIX_SEPARATOR
- && *l != ',')))
+ /* Pseudo-prefixes end with a closing figure brace. */
+ if (*mnemonic == '{' && *l == '}')
+ {
+ *mnem_p++ = *l++;
+ if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
+ goto too_long;
+ *mnem_p = '\0';
+
+ /* Point l at the closing brace if there's no other separator. */
+ if (*l != END_OF_INSN && !is_space_char (*l)
+ && *l != PREFIX_SEPARATOR)
+ --l;
+ }
+ else if (!is_space_char (*l)
+ && *l != END_OF_INSN
+ && (intel_syntax
+ || (*l != PREFIX_SEPARATOR && *l != ',')))
{
if (prefix_only)
break;
&& current_templates
&& current_templates->start->opcode_modifier.isprefix)
{
- if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
+ if (!cpu_flags_check_cpu64 (current_templates->start))
{
as_bad ((flag_code != CODE_64BIT
? _("`%s' is only supported in 64-bit mode")
case PREFIX_EXIST:
return NULL;
case PREFIX_DS:
- if (current_templates->start->cpu_flags.bitfield.cpuibt)
+ if (is_cpu (current_templates->start, CpuIBT))
i.notrack_prefix = insn_name (current_templates->start);
break;
case PREFIX_REP:
- if (current_templates->start->cpu_flags.bitfield.cpuhle)
+ if (is_cpu (current_templates->start, CpuHLE))
i.hle_prefix = insn_name (current_templates->start);
- else if (current_templates->start->cpu_flags.bitfield.cpumpx)
+ else if (is_cpu (current_templates->start, CpuMPX))
i.bnd_prefix = insn_name (current_templates->start);
else
i.rep_prefix = insn_name (current_templates->start);
break;
}
}
- else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
+ else if ((flag_code == CODE_16BIT)
+ ^ (i.prefix[DATA_PREFIX] != 0 && !(i.prefix[REX_PREFIX] & REX_W)))
guess_suffix = WORD_MNEM_SUFFIX;
- else if (flag_code != CODE_64BIT || !(i.prefix[REX_PREFIX] & REX_W))
+ else if (flag_code != CODE_64BIT
+ || (!(i.prefix[REX_PREFIX] & REX_W)
+ /* A more generic (but also more involved) way of dealing
+ with the special case(s) would be to go look for
+ DefaultSize attributes on any of the templates. */
+ && current_templates->start->mnem_off != MN_push))
guess_suffix = LONG_MNEM_SUFFIX;
for (op = i.operands; --op >= 0;)
operand size is YMMword or XMMword. Since this function runs after
template matching, there's no need to check for YMMword/XMMword in
the template. */
- cpu = cpu_flags_and (t->cpu_flags, avx512);
+ cpu = cpu_flags_and (cpu_flags_from_attr (t->cpu), avx512);
if (!cpu_flags_all_zero (&cpu)
- && !t->cpu_flags.bitfield.cpuavx512vl
- && !cpu_arch_flags.bitfield.cpuavx512vl)
+ && !is_cpu (t, CpuAVX512VL)
+ && !cpu_arch_flags.bitfield.cpuavx512vl
+ && (!t->opcode_modifier.vex || need_evex_encoding ()))
{
for (op = 0; op < t->operands; ++op)
{
/* Somewhat similarly, templates specifying both AVX and AVX2 are
requiring AVX2 support if the actual operand size is YMMword. */
- if (t->cpu_flags.bitfield.cpuavx
- && t->cpu_flags.bitfield.cpuavx2
+ if (is_cpu (t, CpuAVX) && is_cpu (t, CpuAVX2)
&& !cpu_arch_flags.bitfield.cpuavx2)
{
for (op = 0; op < t->operands; ++op)
type.bitfield.xmmword = 1;
break;
case 32:
+ if (vector_size < VSZ256)
+ goto bad_broadcast;
type.bitfield.ymmword = 1;
break;
case 64:
+ if (vector_size < VSZ512)
+ goto bad_broadcast;
type.bitfield.zmmword = 1;
break;
default:
/* Check if requested masking is supported. */
if (i.mask.reg)
{
- switch (t->opcode_modifier.masking)
+ if (!t->opcode_modifier.masking)
{
- case BOTH_MASKING:
- break;
- case MERGING_MASKING:
- if (i.mask.zeroing)
- {
- case 0:
- i.error = unsupported_masking;
- return 1;
- }
- break;
- case DYNAMIC_MASKING:
- /* Memory destinations allow only merging masking. */
- if (i.mask.zeroing && i.mem_operands)
- {
- /* Find memory operand. */
- for (op = 0; op < i.operands; op++)
- if (i.flags[op] & Operand_Mem)
- break;
- gas_assert (op < i.operands);
- if (op == i.operands - 1)
- {
- i.error = unsupported_masking;
- return 1;
- }
- }
- break;
- default:
- abort ();
+ i.error = unsupported_masking;
+ return 1;
+ }
+
+ /* Common rules for masking:
+ - mask register destinations permit only zeroing-masking, without
+ that actually being expressed by a {z} operand suffix or EVEX.z,
+ - memory destinations allow only merging-masking,
+ - scatter/gather insns (i.e. ones using vSIB) only allow merging-
+ masking. */
+ if (i.mask.zeroing
+ && (t->operand_types[t->operands - 1].bitfield.class == RegMask
+ || (i.flags[t->operands - 1] & Operand_Mem)
+ || t->opcode_modifier.sib))
+ {
+ i.error = unsupported_masking;
+ return 1;
}
}
}
/* Check the special Imm4 cases; must be the first operand. */
- if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
+ if (is_cpu (t, CpuXOP) && t->operands == 5)
{
if (i.op[0].imms->X_op != O_constant
|| !fits_in_imm4 (i.op[0].imms->X_add_number))
/* Check vector Disp8 operand. */
if (t->opcode_modifier.disp8memshift
+ && (!t->opcode_modifier.vex
+ || need_evex_encoding ())
&& i.disp_encoding <= disp_encoding_8bit)
{
if (i.broadcast.type || i.broadcast.bytes)
return 1;
}
- if (i.vec_encoding == vex_encoding_evex)
+ /* Vector size restrictions. */
+ if ((vector_size < VSZ512
+ && (t->opcode_modifier.evex == EVEX512
+ || t->opcode_modifier.vsz >= VSZ512))
+ || (vector_size < VSZ256
+ && (t->opcode_modifier.evex == EVEX256
+ || t->opcode_modifier.vex == VEX256
+ || t->opcode_modifier.vsz >= VSZ256)))
+ {
+ i.error = unsupported;
+ return 1;
+ }
+
+ if (i.vec_encoding == vex_encoding_evex
+ || i.vec_encoding == vex_encoding_evex512)
{
/* This instruction must be encoded with EVEX prefix. */
if (!is_evex_encoding (t))
if (!(size_match & MATCH_REVERSE))
continue;
/* Try reversing direction of operands. */
- j = t->cpu_flags.bitfield.cpufma4
- || t->cpu_flags.bitfield.cpuxop ? 1 : i.operands - 1;
+ j = is_cpu (t, CpuFMA4)
+ || is_cpu (t, CpuXOP) ? 1 : i.operands - 1;
overlap0 = operand_type_and (i.types[0], operand_types[j]);
overlap1 = operand_type_and (i.types[j], operand_types[0]);
overlap2 = operand_type_and (i.types[1], operand_types[1]);
&& (intel_syntax || intel_mnemonic))
found_reverse_match |= Opcode_FloatR;
}
- else if (t->cpu_flags.bitfield.cpufma4
- || t->cpu_flags.bitfield.cpuxop)
+ else if (is_cpu (t, CpuFMA4) || is_cpu (t, CpuXOP))
{
found_reverse_match = Opcode_VexW;
goto check_operands_345;
continue;
}
+ /* Check whether to use the shorter VEX encoding for certain insns where
+ the EVEX enconding comes first in the table. This requires the respective
+ AVX-* feature to be explicitly enabled. */
+ if (t == current_templates->start
+ && t->opcode_modifier.disp8memshift
+ && !t->opcode_modifier.vex
+ && !need_evex_encoding ()
+ && t + 1 < current_templates->end
+ && t[1].opcode_modifier.vex)
+ {
+ i386_cpu_flags cpu;
+ unsigned int memshift = i.memshift;
+
+ i.memshift = 0;
+ cpu = cpu_flags_and (cpu_flags_from_attr (t[1].cpu), cpu_arch_isa_flags);
+ if (!cpu_flags_all_zero (&cpu)
+ && (!i.types[0].bitfield.disp8
+ || !operand_type_check (i.types[0], disp)
+ || i.op[0].disps->X_op != O_constant
+ || fits_in_disp8 (i.op[0].disps->X_add_number)))
+ {
+ specific_error = progress (internal_error);
+ continue;
+ }
+ i.memshift = memshift;
+ }
+
/* We've found a match; break out of loop. */
break;
}
&& (i.tm.base_opcode | 8) == 0xbe)
|| (i.tm.opcode_space == SPACE_BASE
&& i.tm.base_opcode == 0x63
- && i.tm.cpu_flags.bitfield.cpu64);
+ && is_cpu (&i.tm, Cpu64));
/* movsx/movzx want only their source operand considered here, for the
ambiguity checking below. The suffix will be replaced afterwards
for (op = 0; op < i.tm.operands; ++op)
{
- if (is_evex_encoding (&i.tm)
- && !cpu_arch_flags.bitfield.cpuavx512vl)
+ if (vector_size < VSZ512)
+ {
+ i.tm.operand_types[op].bitfield.zmmword = 0;
+ if (vector_size < VSZ256)
+ {
+ i.tm.operand_types[op].bitfield.ymmword = 0;
+ if (i.tm.operand_types[op].bitfield.xmmword
+ && (i.tm.opcode_modifier.evex == EVEXDYN
+ || (!i.tm.opcode_modifier.evex
+ && is_evex_encoding (&i.tm))))
+ i.tm.opcode_modifier.evex = EVEX128;
+ }
+ else if (i.tm.operand_types[op].bitfield.ymmword
+ && !i.tm.operand_types[op].bitfield.xmmword
+ && (i.tm.opcode_modifier.evex == EVEXDYN
+ || (!i.tm.opcode_modifier.evex
+ && is_evex_encoding (&i.tm))))
+ i.tm.opcode_modifier.evex = EVEX256;
+ }
+ else if (is_evex_encoding (&i.tm)
+ && !cpu_arch_flags.bitfield.cpuavx512vl)
{
if (i.tm.operand_types[op].bitfield.ymmword)
i.tm.operand_types[op].bitfield.xmmword = 0;
|| operand_type_equal (&overlap, &imm16_32)
|| operand_type_equal (&overlap, &imm16_32s))
{
- if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
+ if ((flag_code == CODE_16BIT)
+ ^ (i.prefix[DATA_PREFIX] != 0 && !(i.prefix[REX_PREFIX] & REX_W)))
overlap = imm16;
else
overlap = imm32s;
off = 0;
}
- frag_now->tc_frag_data.code64 = flag_code == CODE_64BIT;
-
/* 1 possible extra opcode + 4 byte displacement go in var part.
Pass reloc in fr_var. */
frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
}
/* inc, dec without inc/dec m. */
- if ((i.tm.cpu_flags.bitfield.cpuno64
+ if ((is_cpu (&i.tm, CpuNo64)
&& (i.tm.base_opcode | 0xf) == 0x4f)
|| ((i.tm.base_opcode | 1) == 0xff
&& i.tm.extension_opcode <= 0x1))
if (!align_branch_power
|| !align_branch_prefix_size
|| now_seg == absolute_section
- || i.tm.cpu_flags.bitfield.cpupadlock
+ || is_cpu (&i.tm, CpuPadLock)
|| !cpu_arch_flags.bitfield.cpui386)
return 0;
if (IS_ELF && x86_used_note && now_seg != absolute_section)
{
if ((i.xstate & xstate_tmm) == xstate_tmm
- || i.tm.cpu_flags.bitfield.cpuamx_tile)
+ || is_cpu (&i.tm, CpuAMX_TILE))
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
- if (i.tm.cpu_flags.bitfield.cpu8087
- || i.tm.cpu_flags.bitfield.cpu287
- || i.tm.cpu_flags.bitfield.cpu387
- || i.tm.cpu_flags.bitfield.cpu687
- || i.tm.cpu_flags.bitfield.cpufisttp)
+ if (is_cpu (&i.tm, Cpu8087)
+ || is_cpu (&i.tm, Cpu287)
+ || is_cpu (&i.tm, Cpu387)
+ || is_cpu (&i.tm, Cpu687)
+ || is_cpu (&i.tm, CpuFISTTP))
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
if ((i.xstate & xstate_mmx)
}
/* vzeroall / vzeroupper */
- if (i.tm.base_opcode == 0x77 && i.tm.cpu_flags.bitfield.cpuavx)
+ if (i.tm.base_opcode == 0x77 && is_cpu (&i.tm, CpuAVX))
i.xstate |= xstate_ymm;
if ((i.xstate & xstate_xmm)
/* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
|| (i.tm.base_opcode == 0xae
- && (i.tm.cpu_flags.bitfield.cpusse
- || i.tm.cpu_flags.bitfield.cpuavx))
- || i.tm.cpu_flags.bitfield.cpuwidekl
- || i.tm.cpu_flags.bitfield.cpukl)
+ && (is_cpu (&i.tm, CpuSSE)
+ || is_cpu (&i.tm, CpuAVX)))
+ || is_cpu (&i.tm, CpuWideKL)
+ || is_cpu (&i.tm, CpuKL))
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
if ((i.xstate & xstate_ymm) == xstate_ymm)
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
- if (i.tm.cpu_flags.bitfield.cpufxsr)
+ if (is_cpu (&i.tm, CpuFXSR))
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
- if (i.tm.cpu_flags.bitfield.cpuxsave)
+ if (is_cpu (&i.tm, CpuXsave))
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
- if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
+ if (is_cpu (&i.tm, CpuXsaveopt))
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
- if (i.tm.cpu_flags.bitfield.cpuxsavec)
+ if (is_cpu (&i.tm, CpuXSAVEC))
x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
if (x86_feature_2_used
- || i.tm.cpu_flags.bitfield.cpucmov
- || i.tm.cpu_flags.bitfield.cpusyscall
+ || is_cpu (&i.tm, CpuCMOV)
+ || is_cpu (&i.tm, CpuSYSCALL)
|| i.tm.mnem_off == MN_cmpxchg8b)
x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
- if (i.tm.cpu_flags.bitfield.cpusse3
- || i.tm.cpu_flags.bitfield.cpussse3
- || i.tm.cpu_flags.bitfield.cpusse4_1
- || i.tm.cpu_flags.bitfield.cpusse4_2
- || i.tm.cpu_flags.bitfield.cpucx16
- || i.tm.cpu_flags.bitfield.cpupopcnt
+ if (is_cpu (&i.tm, CpuSSE3)
+ || is_cpu (&i.tm, CpuSSSE3)
+ || is_cpu (&i.tm, CpuSSE4_1)
+ || is_cpu (&i.tm, CpuSSE4_2)
+ || is_cpu (&i.tm, CpuCX16)
+ || is_cpu (&i.tm, CpuPOPCNT)
/* LAHF-SAHF insns in 64-bit mode. */
|| (flag_code == CODE_64BIT
&& (i.tm.base_opcode | 1) == 0x9f
&& i.tm.opcode_space == SPACE_BASE))
x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
- if (i.tm.cpu_flags.bitfield.cpuavx
- || i.tm.cpu_flags.bitfield.cpuavx2
+ if (is_cpu (&i.tm, CpuAVX)
+ || is_cpu (&i.tm, CpuAVX2)
/* Any VEX encoded insns execpt for AVX512F, AVX512BW, AVX512DQ,
XOP, FMA4, LPW, TBM, and AMX. */
|| (i.tm.opcode_modifier.vex
- && !i.tm.cpu_flags.bitfield.cpuavx512f
- && !i.tm.cpu_flags.bitfield.cpuavx512bw
- && !i.tm.cpu_flags.bitfield.cpuavx512dq
- && !i.tm.cpu_flags.bitfield.cpuxop
- && !i.tm.cpu_flags.bitfield.cpufma4
- && !i.tm.cpu_flags.bitfield.cpulwp
- && !i.tm.cpu_flags.bitfield.cputbm
+ && !is_cpu (&i.tm, CpuAVX512F)
+ && !is_cpu (&i.tm, CpuAVX512BW)
+ && !is_cpu (&i.tm, CpuAVX512DQ)
+ && !is_cpu (&i.tm, CpuXOP)
+ && !is_cpu (&i.tm, CpuFMA4)
+ && !is_cpu (&i.tm, CpuLWP)
+ && !is_cpu (&i.tm, CpuTBM)
&& !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
- || i.tm.cpu_flags.bitfield.cpuf16c
- || i.tm.cpu_flags.bitfield.cpufma
- || i.tm.cpu_flags.bitfield.cpulzcnt
- || i.tm.cpu_flags.bitfield.cpumovbe
- || i.tm.cpu_flags.bitfield.cpuxsaves
+ || is_cpu (&i.tm, CpuF16C)
+ || is_cpu (&i.tm, CpuFMA)
+ || is_cpu (&i.tm, CpuLZCNT)
+ || is_cpu (&i.tm, CpuMovbe)
+ || is_cpu (&i.tm, CpuXSAVES)
|| (x86_feature_2_used
& (GNU_PROPERTY_X86_FEATURE_2_XSAVE
| GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
| GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
- if (i.tm.cpu_flags.bitfield.cpuavx512f
- || i.tm.cpu_flags.bitfield.cpuavx512bw
- || i.tm.cpu_flags.bitfield.cpuavx512dq
- || i.tm.cpu_flags.bitfield.cpuavx512vl
+ if (is_cpu (&i.tm, CpuAVX512F)
+ || is_cpu (&i.tm, CpuAVX512BW)
+ || is_cpu (&i.tm, CpuAVX512DQ)
+ || is_cpu (&i.tm, CpuAVX512VL)
/* Any EVEX encoded insns except for AVX512ER, AVX512PF,
AVX512-4FMAPS, and AVX512-4VNNIW. */
|| (i.tm.opcode_modifier.evex
- && !i.tm.cpu_flags.bitfield.cpuavx512er
- && !i.tm.cpu_flags.bitfield.cpuavx512pf
- && !i.tm.cpu_flags.bitfield.cpuavx512_4fmaps
- && !i.tm.cpu_flags.bitfield.cpuavx512_4vnniw))
+ && !is_cpu (&i.tm, CpuAVX512ER)
+ && !is_cpu (&i.tm, CpuAVX512PF)
+ && !is_cpu (&i.tm, CpuAVX512_4FMAPS)
+ && !is_cpu (&i.tm, CpuAVX512_4VNNIW)))
x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
}
#endif
add_prefix (0xf2);
break;
case PREFIX_0XF3:
- if (!i.tm.cpu_flags.bitfield.cpupadlock
+ if (!is_cpu (&i.tm, CpuPadLock)
|| (i.prefix[REP_PREFIX] != 0xf3))
add_prefix (0xf3);
break;
if (i.types[n].bitfield.imm32s
&& (i.suffix == QWORD_MNEM_SUFFIX
|| (!i.suffix && i.tm.opcode_modifier.no_lsuf)
+ || (i.prefix[REX_PREFIX] & REX_W)
|| dot_insn ()))
sign = 1;
else
if (line > end && i.vec_encoding == vex_encoding_default)
i.vec_encoding = evex ? vex_encoding_evex : vex_encoding_vex;
+ if (i.vec_encoding != vex_encoding_default)
+ {
+ /* Only address size and segment override prefixes are permitted with
+ VEX/XOP/EVEX encodings. */
+ const unsigned char *p = i.prefix;
+
+ for (j = 0; j < ARRAY_SIZE (i.prefix); ++j, ++p)
+ {
+ if (!*p)
+ continue;
+
+ switch (j)
+ {
+ case SEG_PREFIX:
+ case ADDR_PREFIX:
+ break;
+ default:
+ as_bad (_("illegal prefix used with VEX/XOP/EVEX"));
+ goto bad;
+ }
+ }
+ }
+
if (line > end && *line == '.')
{
/* Length specifier (VEX.L, XOP.L, EVEX.L'L). */
goto done;
}
+ /* No need to distinguish vex_encoding_evex and vex_encoding_evex512. */
+ if (i.vec_encoding == vex_encoding_evex512)
+ i.vec_encoding = vex_encoding_evex;
+
/* Are we to emit ModR/M encoding? */
if (!i.short_form
&& (i.mem_operands
? i.broadcast.type || i.broadcast.bytes
|| i.rounding.type != rc_none
|| i.mask.reg
- : (i.broadcast.type || i.broadcast.bytes)
- && i.rounding.type != rc_none))
+ : (i.mem_operands && i.rounding.type != rc_none)
+ || ((i.broadcast.type || i.broadcast.bytes)
+ && !(i.flags[i.broadcast.operand] & Operand_Mem))))
{
as_bad (_("conflicting .insn operands"));
goto done;
return NULL;
}
+ if (i.vec_encoding == vex_encoding_default)
+ i.vec_encoding = vex_encoding_evex512;
+ else if (i.vec_encoding != vex_encoding_evex
+ && i.vec_encoding != vex_encoding_evex512)
+ return NULL;
+
i.rounding.type = RC_NamesTable[j].type;
return (char *)(pstr + RC_NamesTable[j].len);
}
op_string++;
+ if (i.vec_encoding == vex_encoding_default)
+ i.vec_encoding = vex_encoding_evex;
+ else if (i.vec_encoding != vex_encoding_evex
+ && i.vec_encoding != vex_encoding_evex512)
+ goto unknown_vec_op;
+
i.broadcast.type = bcst_type;
i.broadcast.operand = this_operand;
if (i.prefix[ADDR_PREFIX])
addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
else if (flag_code == CODE_16BIT
- && current_templates->start->cpu_flags.bitfield.cpumpx
+ && is_cpu (current_templates->start, CpuMPX)
/* Avoid replacing the "16-bit addressing not allowed" diagnostic
from md_assemble() by "is not a valid base/index expression"
when there is a base and/or index. */
/* Memory operands of string insns are special in that they only allow
a single register (rDI, rSI, or rBX) as their memory address. */
const reg_entry *expected_reg;
- static const char *di_si[][2] =
+ static const char di_si[][2][4] =
{
{ "esi", "edi" },
{ "si", "di" },
{ "rsi", "rdi" }
};
- static const char *bx[] = { "ebx", "bx", "rbx" };
+ static const char bx[][4] = { "ebx", "bx", "rbx" };
kind = "string address";
else if (size == 2)
reloc_type = BFD_RELOC_16_PCREL;
#if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
- else if (fragP->tc_frag_data.code64 && fragP->fr_offset == 0
+ else if (fragP->tc_frag_data.code == CODE_64BIT
+ && fragP->fr_offset == 0
&& need_plt32_p (fragP->fr_symbol))
reloc_type = BFD_RELOC_X86_64_PLT32;
#endif
}
}
+ if (r->reg_type.bitfield.zmmword)
+ {
+ if (vector_size < VSZ512)
+ return false;
+
+ if (i.vec_encoding == vex_encoding_default)
+ i.vec_encoding = vex_encoding_evex512;
+ else if (i.vec_encoding != vex_encoding_evex
+ && i.vec_encoding != vex_encoding_evex512)
+ i.vec_encoding = vex_encoding_error;
+ }
+
+ if (vector_size < VSZ256 && r->reg_type.bitfield.ymmword)
+ return false;
+
if (r->reg_type.bitfield.tmmword
&& (!cpu_arch_flags.bitfield.cpuamx_tile
|| flag_code != CODE_64BIT))
|| flag_code != CODE_64BIT)
return false;
- if (i.vec_encoding == vex_encoding_default)
+ if (i.vec_encoding == vex_encoding_default
+ || i.vec_encoding == vex_encoding_evex512)
i.vec_encoding = vex_encoding_evex;
else if (i.vec_encoding != vex_encoding_evex)
i.vec_encoding = vex_encoding_error;
}
if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
- && (!cpu_arch_flags.bitfield.cpulm
+ && (!cpu_arch_flags.bitfield.cpu64
|| r->reg_type.bitfield.class != RegCR
|| dot_insn ())
&& flag_code != CODE_64BIT)
const reg_entry *r = NULL;
char *end = input_line_pointer;
+ /* We only know the terminating character here. It being double quote could
+ be the closing one of a quoted symbol name, or an opening one from a
+ following string (or another quoted symbol name). Since the latter can't
+ be valid syntax for anything, bailing in either case is good enough. */
+ if (*nextcharP == '"')
+ return 0;
+
*end = *nextcharP;
if (*name == REGISTER_PREFIX || allow_naked_reg)
r = parse_real_register (name, &input_line_pointer);
#endif
case OPTION_32:
- default_arch = "i386";
+ {
+ const char **list, **l;
+
+ list = bfd_target_list ();
+ for (l = list; *l != NULL; l++)
+ if (strstr (*l, "-i386")
+ || strstr (*l, "-go32"))
+ {
+ default_arch = "i386";
+ break;
+ }
+ if (*l == NULL)
+ as_fatal (_("no compiled in support for ix86"));
+ free (list);
+ }
break;
case OPTION_DIVIDE:
arch++;
do
{
+ char *vsz;
+
if (*arch == '.')
as_fatal (_("invalid -march= option: `%s'"), arg);
next = strchr (arch, '+');
if (next)
*next++ = '\0';
+ vsz = strchr (arch, '/');
+ if (vsz)
+ *vsz++ = '\0';
for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
{
+ if (vsz && cpu_arch[j].vsz != vsz_set)
+ continue;
+
if (arch == saved && cpu_arch[j].type != PROCESSOR_NONE
&& strcmp (arch, cpu_arch[j].name) == 0)
{
cpu_arch_isa = cpu_arch[j].type;
cpu_arch_isa_flags = cpu_arch[j].enable;
if (!cpu_arch_tune_set)
- {
- cpu_arch_tune = cpu_arch_isa;
- cpu_arch_tune_flags = cpu_arch_isa_flags;
- }
+ cpu_arch_tune = cpu_arch_isa;
+ vector_size = VSZ_DEFAULT;
break;
}
else if (cpu_arch[j].type == PROCESSOR_NONE
&& !cpu_flags_all_zero (&cpu_arch[j].enable))
{
/* ISA extension. */
- i386_cpu_flags flags;
+ isa_enable (j);
- flags = cpu_flags_or (cpu_arch_flags,
- cpu_arch[j].enable);
-
- if (!cpu_flags_equal (&flags, &cpu_arch_flags))
+ switch (cpu_arch[j].vsz)
{
- extend_cpu_sub_arch_name (arch);
- cpu_arch_flags = flags;
- cpu_arch_isa_flags = flags;
+ default:
+ break;
+
+ case vsz_set:
+ if (vsz)
+ {
+ char *end;
+ unsigned long val = strtoul (vsz, &end, 0);
+
+ if (*end)
+ val = 0;
+ switch (val)
+ {
+ case 512: vector_size = VSZ512; break;
+ case 256: vector_size = VSZ256; break;
+ case 128: vector_size = VSZ128; break;
+ default:
+ as_warn (_("Unrecognized vector size specifier ignored"));
+ break;
+ }
+ break;
+ }
+ /* Fall through. */
+ case vsz_reset:
+ vector_size = VSZ_DEFAULT;
+ break;
}
- else
- cpu_arch_isa_flags
- = cpu_flags_or (cpu_arch_isa_flags,
- cpu_arch[j].enable);
+
break;
}
}
if (cpu_arch[j].type == PROCESSOR_NONE
&& strcmp (arch + 2, cpu_arch[j].name) == 0)
{
- i386_cpu_flags flags;
-
- flags = cpu_flags_and_not (cpu_arch_flags,
- cpu_arch[j].disable);
- if (!cpu_flags_equal (&flags, &cpu_arch_flags))
- {
- extend_cpu_sub_arch_name (arch);
- cpu_arch_flags = flags;
- cpu_arch_isa_flags = flags;
- }
+ isa_disable (j);
+ if (cpu_arch[j].vsz == vsz_set)
+ vector_size = VSZ_DEFAULT;
break;
}
}
{
cpu_arch_tune_set = 1;
cpu_arch_tune = cpu_arch [j].type;
- cpu_arch_tune_flags = cpu_arch[j].enable;
break;
}
}
cpu_arch_isa = PROCESSOR_IAMCU;
cpu_arch_isa_flags = iamcu_flags;
if (!cpu_arch_tune_set)
- {
- cpu_arch_tune = cpu_arch_isa;
- cpu_arch_tune_flags = cpu_arch_isa_flags;
- }
+ cpu_arch_tune = PROCESSOR_IAMCU;
}
else if (cpu_arch_isa != PROCESSOR_IAMCU)
as_fatal (_("Intel MCU doesn't support `%s' architecture"),
if (cpu_flags_all_zero (&cpu_arch_isa_flags))
cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
- if (cpu_flags_all_zero (&cpu_arch_tune_flags))
- cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].enable;
switch (OUTPUT_FLAVOR)
{
return -1;
}
-bfd_vma
-x86_64_section_word (char *str, size_t len)
-{
- if (len == 5 && flag_code == CODE_64BIT && startswith (str, "large"))
- return SHF_X86_64_LARGE;
-
- return -1;
-}
-
static void
handle_large_common (int small ATTRIBUTE_UNUSED)
{