gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2023 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
  23    x86_64 support by Jan Hubicka (jh@suse.cz)
  24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "gen-sframe.h"
  34 #include "sframe.h"
  35 #include "elf/x86-64.h"
  36 #include "opcodes/i386-init.h"
  37 #include "opcodes/i386-mnem.h"
  38 #include <limits.h>
  39
  40 #ifndef INFER_ADDR_PREFIX
  41 #define INFER_ADDR_PREFIX 1
  42 #endif
  43
  44 #ifndef DEFAULT_ARCH
  45 #define DEFAULT_ARCH "i386"
  46 #endif
  47
  48 #ifndef INLINE
  49 #if __GNUC__ >= 2
  50 #define INLINE __inline__
  51 #else
  52 #define INLINE
  53 #endif
  54 #endif
  55
  56 /* Prefixes will be emitted in the order defined below.
  57    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  58    instruction, and so must come before any prefixes.
  59    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  60    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  61 #define WAIT_PREFIX     0
  62 #define SEG_PREFIX      1
  63 #define ADDR_PREFIX     2
  64 #define DATA_PREFIX     3
  65 #define REP_PREFIX      4
  66 #define HLE_PREFIX      REP_PREFIX
  67 #define BND_PREFIX      REP_PREFIX
  68 #define LOCK_PREFIX     5
  69 #define REX_PREFIX      6       /* must come last.  */
  70 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  71
  72 /* we define the syntax here (modulo base,index,scale syntax) */
  73 #define REGISTER_PREFIX '%'
  74 #define IMMEDIATE_PREFIX '$'
  75 #define ABSOLUTE_PREFIX '*'
  76
  77 /* these are the instruction mnemonic suffixes in AT&T syntax or
  78    memory operand size in Intel syntax.  */
  79 #define WORD_MNEM_SUFFIX  'w'
  80 #define BYTE_MNEM_SUFFIX  'b'
  81 #define SHORT_MNEM_SUFFIX 's'
  82 #define LONG_MNEM_SUFFIX  'l'
  83 #define QWORD_MNEM_SUFFIX  'q'
  84
  85 #define END_OF_INSN '\0'
  86
  87 #define OPERAND_TYPE_NONE { .bitfield = { .class = ClassNone } }
  88
  89 /* This matches the C -> StaticRounding alias in the opcode table.  */
  90 #define commutative staticrounding
  91
  92 /*
  93   'templates' is for grouping together 'template' structures for opcodes
  94   of the same name.  This is only used for storing the insns in the grand
  95   ole hash table of insns.
  96   The templates themselves start at START and range up to (but not including)
  97   END.
  98   */
  99 typedef struct
 100 {
 101   const insn_template *start;
 102   const insn_template *end;
 103 }
 104 templates;
 105
 106 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 107 typedef struct
 108 {
 109   unsigned int regmem;  /* codes register or memory operand */
 110   unsigned int reg;     /* codes register operand (or extended opcode) */
 111   unsigned int mode;    /* how to interpret regmem & reg */
 112 }
 113 modrm_byte;
 114
 115 /* x86-64 extension prefix.  */
 116 typedef int rex_byte;
 117
 118 /* 386 opcode byte to code indirect addressing.  */
 119 typedef struct
 120 {
 121   unsigned base;
 122   unsigned index;
 123   unsigned scale;
 124 }
 125 sib_byte;
 126
 127 /* x86 arch names, types and features */
 128 typedef struct
 129 {
 130   const char *name;             /* arch name */
 131   unsigned int len:8;           /* arch string length */
 132   bool skip:1;                  /* show_arch should skip this. */
 133   enum processor_type type;     /* arch type */
 134   i386_cpu_flags enable;                /* cpu feature enable flags */
 135   i386_cpu_flags disable;       /* cpu feature disable flags */
 136 }
 137 arch_entry;
 138
 139 static void update_code_flag (int, int);
 140 static void s_insn (int);
 141 static void set_code_flag (int);
 142 static void set_16bit_gcc_code_flag (int);
 143 static void set_intel_syntax (int);
 144 static void set_intel_mnemonic (int);
 145 static void set_allow_index_reg (int);
 146 static void set_check (int);
 147 static void set_cpu_arch (int);
 148 #ifdef TE_PE
 149 static void pe_directive_secrel (int);
 150 static void pe_directive_secidx (int);
 151 #endif
 152 static void signed_cons (int);
 153 static char *output_invalid (int c);
 154 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 155                                     const char *);
 156 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 157                                        const char *);
 158 static int i386_att_operand (char *);
 159 static int i386_intel_operand (char *, int);
 160 static int i386_intel_simplify (expressionS *);
 161 static int i386_intel_parse_name (const char *, expressionS *);
 162 static const reg_entry *parse_register (char *, char **);
 163 static const char *parse_insn (const char *, char *, bool);
 164 static char *parse_operands (char *, const char *);
 165 static void swap_operands (void);
 166 static void swap_2_operands (unsigned int, unsigned int);
 167 static enum flag_code i386_addressing_mode (void);
 168 static void optimize_imm (void);
 169 static bool optimize_disp (const insn_template *t);
 170 static const insn_template *match_template (char);
 171 static int check_string (void);
 172 static int process_suffix (void);
 173 static int check_byte_reg (void);
 174 static int check_long_reg (void);
 175 static int check_qword_reg (void);
 176 static int check_word_reg (void);
 177 static int finalize_imm (void);
 178 static int process_operands (void);
 179 static const reg_entry *build_modrm_byte (void);
 180 static void output_insn (void);
 181 static void output_imm (fragS *, offsetT);
 182 static void output_disp (fragS *, offsetT);
 183 #ifndef I386COFF
 184 static void s_bss (int);
 185 #endif
 186 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 187 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 188
 189 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 190 static unsigned int x86_isa_1_used;
 191 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 192 static unsigned int x86_feature_2_used;
 193 /* Generate x86 used ISA and feature properties.  */
 194 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 195 #endif
 196
 197 static const char *default_arch = DEFAULT_ARCH;
 198
 199 /* parse_register() returns this when a register alias cannot be used.  */
 200 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 201                                    { Dw2Inval, Dw2Inval } };
 202
 203 static const reg_entry *reg_eax;
 204 static const reg_entry *reg_ds;
 205 static const reg_entry *reg_es;
 206 static const reg_entry *reg_ss;
 207 static const reg_entry *reg_st0;
 208 static const reg_entry *reg_k0;
 209
 210 /* VEX prefix.  */
 211 typedef struct
 212 {
 213   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 214   unsigned char bytes[4];
 215   unsigned int length;
 216   /* Destination or source register specifier.  */
 217   const reg_entry *register_specifier;
 218 } vex_prefix;
 219
 220 /* 'md_assemble ()' gathers together information and puts it into a
 221    i386_insn.  */
 222
 223 union i386_op
 224   {
 225     expressionS *disps;
 226     expressionS *imms;
 227     const reg_entry *regs;
 228   };
 229
 230 enum i386_error
 231   {
 232     no_error, /* Must be first.  */
 233     operand_size_mismatch,
 234     operand_type_mismatch,
 235     register_type_mismatch,
 236     number_of_operands_mismatch,
 237     invalid_instruction_suffix,
 238     bad_imm4,
 239     unsupported_with_intel_mnemonic,
 240     unsupported_syntax,
 241     unsupported,
 242     unsupported_on_arch,
 243     unsupported_64bit,
 244     invalid_sib_address,
 245     invalid_vsib_address,
 246     invalid_vector_register_set,
 247     invalid_tmm_register_set,
 248     invalid_dest_and_src_register_set,
 249     unsupported_vector_index_register,
 250     unsupported_broadcast,
 251     broadcast_needed,
 252     unsupported_masking,
 253     mask_not_on_destination,
 254     no_default_mask,
 255     unsupported_rc_sae,
 256     invalid_register_operand,
 257   };
 258
 259 struct _i386_insn
 260   {
 261     /* TM holds the template for the insn were currently assembling.  */
 262     insn_template tm;
 263
 264     /* SUFFIX holds the instruction size suffix for byte, word, dword
 265        or qword, if given.  */
 266     char suffix;
 267
 268     /* OPCODE_LENGTH holds the number of base opcode bytes.  */
 269     unsigned char opcode_length;
 270
 271     /* OPERANDS gives the number of given operands.  */
 272     unsigned int operands;
 273
 274     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 275        of given register, displacement, memory operands and immediate
 276        operands.  */
 277     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 278
 279     /* TYPES [i] is the type (see above #defines) which tells us how to
 280        use OP[i] for the corresponding operand.  */
 281     i386_operand_type types[MAX_OPERANDS];
 282
 283     /* Displacement expression, immediate expression, or register for each
 284        operand.  */
 285     union i386_op op[MAX_OPERANDS];
 286
 287     /* Flags for operands.  */
 288     unsigned int flags[MAX_OPERANDS];
 289 #define Operand_PCrel 1
 290 #define Operand_Mem   2
 291
 292     /* Relocation type for operand */
 293     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 294
 295     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 296        the base index byte below.  */
 297     const reg_entry *base_reg;
 298     const reg_entry *index_reg;
 299     unsigned int log2_scale_factor;
 300
 301     /* SEG gives the seg_entries of this insn.  They are zero unless
 302        explicit segment overrides are given.  */
 303     const reg_entry *seg[2];
 304
 305     /* PREFIX holds all the given prefix opcodes (usually null).
 306        PREFIXES is the number of prefix opcodes.  */
 307     unsigned int prefixes;
 308     unsigned char prefix[MAX_PREFIXES];
 309
 310     /* .insn allows for reserved opcode spaces.  */
 311     unsigned char insn_opcode_space;
 312
 313     /* Register is in low 3 bits of opcode.  */
 314     bool short_form;
 315
 316     /* The operand to a branch insn indicates an absolute branch.  */
 317     bool jumpabsolute;
 318
 319     /* The operand to a branch insn indicates a far branch.  */
 320     bool far_branch;
 321
 322     /* There is a memory operand of (%dx) which should be only used
 323        with input/output instructions.  */
 324     bool input_output_operand;
 325
 326     /* Extended states.  */
 327     enum
 328       {
 329         /* Use MMX state.  */
 330         xstate_mmx = 1 << 0,
 331         /* Use XMM state.  */
 332         xstate_xmm = 1 << 1,
 333         /* Use YMM state.  */
 334         xstate_ymm = 1 << 2 | xstate_xmm,
 335         /* Use ZMM state.  */
 336         xstate_zmm = 1 << 3 | xstate_ymm,
 337         /* Use TMM state.  */
 338         xstate_tmm = 1 << 4,
 339         /* Use MASK state.  */
 340         xstate_mask = 1 << 5
 341       } xstate;
 342
 343     /* Has GOTPC or TLS relocation.  */
 344     bool has_gotpc_tls_reloc;
 345
 346     /* RM and SIB are the modrm byte and the sib byte where the
 347        addressing modes of this insn are encoded.  */
 348     modrm_byte rm;
 349     rex_byte rex;
 350     rex_byte vrex;
 351     sib_byte sib;
 352     vex_prefix vex;
 353
 354     /* Masking attributes.
 355
 356        The struct describes masking, applied to OPERAND in the instruction.
 357        REG is a pointer to the corresponding mask register.  ZEROING tells
 358        whether merging or zeroing mask is used.  */
 359     struct Mask_Operation
 360     {
 361       const reg_entry *reg;
 362       unsigned int zeroing;
 363       /* The operand where this operation is associated.  */
 364       unsigned int operand;
 365     } mask;
 366
 367     /* Rounding control and SAE attributes.  */
 368     struct RC_Operation
 369     {
 370       enum rc_type
 371         {
 372           rc_none = -1,
 373           rne,
 374           rd,
 375           ru,
 376           rz,
 377           saeonly
 378         } type;
 379       /* In Intel syntax the operand modifier form is supposed to be used, but
 380          we continue to accept the immediate forms as well.  */
 381       bool modifier;
 382     } rounding;
 383
 384     /* Broadcasting attributes.
 385
 386        The struct describes broadcasting, applied to OPERAND.  TYPE is
 387        expresses the broadcast factor.  */
 388     struct Broadcast_Operation
 389     {
 390       /* Type of broadcast: {1to2}, {1to4}, {1to8}, {1to16} or {1to32}.  */
 391       unsigned int type;
 392
 393       /* Index of broadcasted operand.  */
 394       unsigned int operand;
 395
 396       /* Number of bytes to broadcast.  */
 397       unsigned int bytes;
 398     } broadcast;
 399
 400     /* Compressed disp8*N attribute.  */
 401     unsigned int memshift;
 402
 403     /* Prefer load or store in encoding.  */
 404     enum
 405       {
 406         dir_encoding_default = 0,
 407         dir_encoding_load,
 408         dir_encoding_store,
 409         dir_encoding_swap
 410       } dir_encoding;
 411
 412     /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 413     enum
 414       {
 415         disp_encoding_default = 0,
 416         disp_encoding_8bit,
 417         disp_encoding_16bit,
 418         disp_encoding_32bit
 419       } disp_encoding;
 420
 421     /* Prefer the REX byte in encoding.  */
 422     bool rex_encoding;
 423
 424     /* Disable instruction size optimization.  */
 425     bool no_optimize;
 426
 427     /* How to encode vector instructions.  */
 428     enum
 429       {
 430         vex_encoding_default = 0,
 431         vex_encoding_vex,
 432         vex_encoding_vex3,
 433         vex_encoding_evex,
 434         vex_encoding_error
 435       } vec_encoding;
 436
 437     /* REP prefix.  */
 438     const char *rep_prefix;
 439
 440     /* HLE prefix.  */
 441     const char *hle_prefix;
 442
 443     /* Have BND prefix.  */
 444     const char *bnd_prefix;
 445
 446     /* Have NOTRACK prefix.  */
 447     const char *notrack_prefix;
 448
 449     /* Error message.  */
 450     enum i386_error error;
 451   };
 452
 453 typedef struct _i386_insn i386_insn;
 454
 455 /* Link RC type with corresponding string, that'll be looked for in
 456    asm.  */
 457 struct RC_name
 458 {
 459   enum rc_type type;
 460   const char *name;
 461   unsigned int len;
 462 };
 463
 464 static const struct RC_name RC_NamesTable[] =
 465 {
 466   {  rne, STRING_COMMA_LEN ("rn-sae") },
 467   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 468   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 469   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 470   {  saeonly,  STRING_COMMA_LEN ("sae") },
 471 };
 472
 473 /* To be indexed by segment register number.  */
 474 static const unsigned char i386_seg_prefixes[] = {
 475   ES_PREFIX_OPCODE,
 476   CS_PREFIX_OPCODE,
 477   SS_PREFIX_OPCODE,
 478   DS_PREFIX_OPCODE,
 479   FS_PREFIX_OPCODE,
 480   GS_PREFIX_OPCODE
 481 };
 482
 483 /* List of chars besides those in app.c:symbol_chars that can start an
 484    operand.  Used to prevent the scrubber eating vital white-space.  */
 485 const char extra_symbol_chars[] = "*%-([{}"
 486 #ifdef LEX_AT
 487         "@"
 488 #endif
 489 #ifdef LEX_QM
 490         "?"
 491 #endif
 492         ;
 493
 494 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))     \
 495      && !defined (TE_GNU)                               \
 496      && !defined (TE_LINUX)                             \
 497      && !defined (TE_Haiku)                             \
 498      && !defined (TE_FreeBSD)                           \
 499      && !defined (TE_DragonFly)                         \
 500      && !defined (TE_NetBSD))
 501 /* This array holds the chars that always start a comment.  If the
 502    pre-processor is disabled, these aren't very useful.  The option
 503    --divide will remove '/' from this list.  */
 504 const char *i386_comment_chars = "#/";
 505 #define SVR4_COMMENT_CHARS 1
 506 #define PREFIX_SEPARATOR '\\'
 507
 508 #else
 509 const char *i386_comment_chars = "#";
 510 #define PREFIX_SEPARATOR '/'
 511 #endif
 512
 513 /* This array holds the chars that only start a comment at the beginning of
 514    a line.  If the line seems to have the form '# 123 filename'
 515    .line and .file directives will appear in the pre-processed output.
 516    Note that input_file.c hand checks for '#' at the beginning of the
 517    first line of the input file.  This is because the compiler outputs
 518    #NO_APP at the beginning of its output.
 519    Also note that comments started like this one will always work if
 520    '/' isn't otherwise defined.  */
 521 const char line_comment_chars[] = "#/";
 522
 523 const char line_separator_chars[] = ";";
 524
 525 /* Chars that can be used to separate mant from exp in floating point
 526    nums.  */
 527 const char EXP_CHARS[] = "eE";
 528
 529 /* Chars that mean this number is a floating point constant
 530    As in 0f12.456
 531    or    0d1.2345e12.  */
 532 const char FLT_CHARS[] = "fFdDxXhHbB";
 533
 534 /* Tables for lexical analysis.  */
 535 static char mnemonic_chars[256];
 536 static char register_chars[256];
 537 static char operand_chars[256];
 538
 539 /* Lexical macros.  */
 540 #define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
 541 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 542 #define is_register_char(x) (register_chars[(unsigned char) x])
 543 #define is_space_char(x) ((x) == ' ')
 544
 545 /* All non-digit non-letter characters that may occur in an operand.  */
 546 static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!:[@]";
 547
 548 /* md_assemble() always leaves the strings it's passed unaltered.  To
 549    effect this we maintain a stack of saved characters that we've smashed
 550    with '\0's (indicating end of strings for various sub-fields of the
 551    assembler instruction).  */
 552 static char save_stack[32];
 553 static char *save_stack_p;
 554 #define END_STRING_AND_SAVE(s) \
 555         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 556 #define RESTORE_END_STRING(s) \
 557         do { *(s) = *--save_stack_p; } while (0)
 558
 559 /* The instruction we're assembling.  */
 560 static i386_insn i;
 561
 562 /* Possible templates for current insn.  */
 563 static const templates *current_templates;
 564
 565 /* Per instruction expressionS buffers: max displacements & immediates.  */
 566 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 567 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 568
 569 /* Current operand we are working on.  */
 570 static int this_operand = -1;
 571
 572 /* Are we processing a .insn directive?  */
 573 #define dot_insn() (i.tm.mnem_off == MN__insn)
 574
 575 /* We support four different modes.  FLAG_CODE variable is used to distinguish
 576    these.  */
 577
 578 enum flag_code {
 579         CODE_32BIT,
 580         CODE_16BIT,
 581         CODE_64BIT };
 582
 583 static enum flag_code flag_code;
 584 static unsigned int object_64bit;
 585 static unsigned int disallow_64bit_reloc;
 586 static int use_rela_relocations = 0;
 587 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 588 static const char *tls_get_addr;
 589
 590 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
 591      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
 592      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
 593
 594 /* The ELF ABI to use.  */
 595 enum x86_elf_abi
 596 {
 597   I386_ABI,
 598   X86_64_ABI,
 599   X86_64_X32_ABI
 600 };
 601
 602 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 603 #endif
 604
 605 #if defined (TE_PE) || defined (TE_PEP)
 606 /* Use big object file format.  */
 607 static int use_big_obj = 0;
 608 #endif
 609
 610 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 611 /* 1 if generating code for a shared library.  */
 612 static int shared = 0;
 613
 614 unsigned int x86_sframe_cfa_sp_reg;
 615 /* The other CFA base register for SFrame stack trace info.  */
 616 unsigned int x86_sframe_cfa_fp_reg;
 617 unsigned int x86_sframe_cfa_ra_reg;
 618
 619 #endif
 620
 621 /* 1 for intel syntax,
 622    0 if att syntax.  */
 623 static int intel_syntax = 0;
 624
 625 static enum x86_64_isa
 626 {
 627   amd64 = 1,    /* AMD64 ISA.  */
 628   intel64       /* Intel64 ISA.  */
 629 } isa64;
 630
 631 /* 1 for intel mnemonic,
 632    0 if att mnemonic.  */
 633 static int intel_mnemonic = !SYSV386_COMPAT;
 634
 635 /* 1 if pseudo registers are permitted.  */
 636 static int allow_pseudo_reg = 0;
 637
 638 /* 1 if register prefix % not required.  */
 639 static int allow_naked_reg = 0;
 640
 641 /* 1 if the assembler should add BND prefix for all control-transferring
 642    instructions supporting it, even if this prefix wasn't specified
 643    explicitly.  */
 644 static int add_bnd_prefix = 0;
 645
 646 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 647 static int allow_index_reg = 0;
 648
 649 /* 1 if the assembler should ignore LOCK prefix, even if it was
 650    specified explicitly.  */
 651 static int omit_lock_prefix = 0;
 652
 653 /* 1 if the assembler should encode lfence, mfence, and sfence as
 654    "lock addl $0, (%{re}sp)".  */
 655 static int avoid_fence = 0;
 656
 657 /* 1 if lfence should be inserted after every load.  */
 658 static int lfence_after_load = 0;
 659
 660 /* Non-zero if lfence should be inserted before indirect branch.  */
 661 static enum lfence_before_indirect_branch_kind
 662   {
 663     lfence_branch_none = 0,
 664     lfence_branch_register,
 665     lfence_branch_memory,
 666     lfence_branch_all
 667   }
 668 lfence_before_indirect_branch;
 669
 670 /* Non-zero if lfence should be inserted before ret.  */
 671 static enum lfence_before_ret_kind
 672   {
 673     lfence_before_ret_none = 0,
 674     lfence_before_ret_not,
 675     lfence_before_ret_or,
 676     lfence_before_ret_shl
 677   }
 678 lfence_before_ret;
 679
 680 /* Types of previous instruction is .byte or prefix.  */
 681 static struct
 682   {
 683     segT seg;
 684     const char *file;
 685     const char *name;
 686     unsigned int line;
 687     enum last_insn_kind
 688       {
 689         last_insn_other = 0,
 690         last_insn_directive,
 691         last_insn_prefix
 692       } kind;
 693   } last_insn;
 694
 695 /* 1 if the assembler should generate relax relocations.  */
 696
 697 static int generate_relax_relocations
 698   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 699
 700 static enum check_kind
 701   {
 702     check_none = 0,
 703     check_warning,
 704     check_error
 705   }
 706 sse_check, operand_check = check_warning;
 707
 708 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 709 static int align_branch_power = 0;
 710
 711 /* Types of branches to align.  */
 712 enum align_branch_kind
 713   {
 714     align_branch_none = 0,
 715     align_branch_jcc = 1,
 716     align_branch_fused = 2,
 717     align_branch_jmp = 3,
 718     align_branch_call = 4,
 719     align_branch_indirect = 5,
 720     align_branch_ret = 6
 721   };
 722
 723 /* Type bits of branches to align.  */
 724 enum align_branch_bit
 725   {
 726     align_branch_jcc_bit = 1 << align_branch_jcc,
 727     align_branch_fused_bit = 1 << align_branch_fused,
 728     align_branch_jmp_bit = 1 << align_branch_jmp,
 729     align_branch_call_bit = 1 << align_branch_call,
 730     align_branch_indirect_bit = 1 << align_branch_indirect,
 731     align_branch_ret_bit = 1 << align_branch_ret
 732   };
 733
 734 static unsigned int align_branch = (align_branch_jcc_bit
 735                                     | align_branch_fused_bit
 736                                     | align_branch_jmp_bit);
 737
 738 /* Types of condition jump used by macro-fusion.  */
 739 enum mf_jcc_kind
 740   {
 741     mf_jcc_jo = 0,  /* base opcode 0x70  */
 742     mf_jcc_jc,      /* base opcode 0x72  */
 743     mf_jcc_je,      /* base opcode 0x74  */
 744     mf_jcc_jna,     /* base opcode 0x76  */
 745     mf_jcc_js,      /* base opcode 0x78  */
 746     mf_jcc_jp,      /* base opcode 0x7a  */
 747     mf_jcc_jl,      /* base opcode 0x7c  */
 748     mf_jcc_jle,     /* base opcode 0x7e  */
 749   };
 750
 751 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 752 enum mf_cmp_kind
 753   {
 754     mf_cmp_test_and,  /* test/cmp */
 755     mf_cmp_alu_cmp,  /* add/sub/cmp */
 756     mf_cmp_incdec  /* inc/dec */
 757   };
 758
 759 /* The maximum padding size for fused jcc.  CMP like instruction can
 760    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 761    prefixes.   */
 762 #define MAX_FUSED_JCC_PADDING_SIZE 20
 763
 764 /* The maximum number of prefixes added for an instruction.  */
 765 static unsigned int align_branch_prefix_size = 5;
 766
 767 /* Optimization:
 768    1. Clear the REX_W bit with register operand if possible.
 769    2. Above plus use 128bit vector instruction to clear the full vector
 770       register.
 771  */
 772 static int optimize = 0;
 773
 774 /* Optimization:
 775    1. Clear the REX_W bit with register operand if possible.
 776    2. Above plus use 128bit vector instruction to clear the full vector
 777       register.
 778    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 779       "testb $imm7,%r8".
 780  */
 781 static int optimize_for_space = 0;
 782
 783 /* Register prefix used for error message.  */
 784 static const char *register_prefix = "%";
 785
 786 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 787    leave, push, and pop instructions so that gcc has the same stack
 788    frame as in 32 bit mode.  */
 789 static char stackop_size = '\0';
 790
 791 /* Non-zero to optimize code alignment.  */
 792 int optimize_align_code = 1;
 793
 794 /* Non-zero to quieten some warnings.  */
 795 static int quiet_warnings = 0;
 796
 797 /* Guard to avoid repeated warnings about non-16-bit code on 16-bit CPUs.  */
 798 static bool pre_386_16bit_warned;
 799
 800 /* CPU name.  */
 801 static const char *cpu_arch_name = NULL;
 802 static char *cpu_sub_arch_name = NULL;
 803
 804 /* CPU feature flags.  */
 805 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 806
 807 /* If we have selected a cpu we are generating instructions for.  */
 808 static int cpu_arch_tune_set = 0;
 809
 810 /* Cpu we are generating instructions for.  */
 811 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 812
 813 /* CPU feature flags of cpu we are generating instructions for.  */
 814 static i386_cpu_flags cpu_arch_tune_flags;
 815
 816 /* CPU instruction set architecture used.  */
 817 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 818
 819 /* CPU feature flags of instruction set architecture used.  */
 820 i386_cpu_flags cpu_arch_isa_flags;
 821
 822 /* If set, conditional jumps are not automatically promoted to handle
 823    larger than a byte offset.  */
 824 static bool no_cond_jump_promotion = false;
 825
 826 /* Encode SSE instructions with VEX prefix.  */
 827 static unsigned int sse2avx;
 828
 829 /* Encode aligned vector move as unaligned vector move.  */
 830 static unsigned int use_unaligned_vector_move;
 831
 832 /* Encode scalar AVX instructions with specific vector length.  */
 833 static enum
 834   {
 835     vex128 = 0,
 836     vex256
 837   } avxscalar;
 838
 839 /* Encode VEX WIG instructions with specific vex.w.  */
 840 static enum
 841   {
 842     vexw0 = 0,
 843     vexw1
 844   } vexwig;
 845
 846 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 847 static enum
 848   {
 849     evexl128 = 0,
 850     evexl256,
 851     evexl512
 852   } evexlig;
 853
 854 /* Encode EVEX WIG instructions with specific evex.w.  */
 855 static enum
 856   {
 857     evexw0 = 0,
 858     evexw1
 859   } evexwig;
 860
 861 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 862 static enum rc_type evexrcig = rne;
 863
 864 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 865 static symbolS *GOT_symbol;
 866
 867 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 868 unsigned int x86_dwarf2_return_column;
 869
 870 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 871 int x86_cie_data_alignment;
 872
 873 /* Interface to relax_segment.
 874    There are 3 major relax states for 386 jump insns because the
 875    different types of jumps add different sizes to frags when we're
 876    figuring out what sort of jump to choose to reach a given label.
 877
 878    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 879    branches which are handled by md_estimate_size_before_relax() and
 880    i386_generic_table_relax_frag().  */
 881
 882 /* Types.  */
 883 #define UNCOND_JUMP 0
 884 #define COND_JUMP 1
 885 #define COND_JUMP86 2
 886 #define BRANCH_PADDING 3
 887 #define BRANCH_PREFIX 4
 888 #define FUSED_JCC_PADDING 5
 889
 890 /* Sizes.  */
 891 #define CODE16  1
 892 #define SMALL   0
 893 #define SMALL16 (SMALL | CODE16)
 894 #define BIG     2
 895 #define BIG16   (BIG | CODE16)
 896
 897 #ifndef INLINE
 898 #ifdef __GNUC__
 899 #define INLINE __inline__
 900 #else
 901 #define INLINE
 902 #endif
 903 #endif
 904
 905 #define ENCODE_RELAX_STATE(type, size) \
 906   ((relax_substateT) (((type) << 2) | (size)))
 907 #define TYPE_FROM_RELAX_STATE(s) \
 908   ((s) >> 2)
 909 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 910     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 911
 912 /* This table is used by relax_frag to promote short jumps to long
 913    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 914    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 915    don't allow a short jump in a 32 bit code segment to be promoted to
 916    a 16 bit offset jump because it's slower (requires data size
 917    prefix), and doesn't work, unless the destination is in the bottom
 918    64k of the code segment (The top 16 bits of eip are zeroed).  */
 919
 920 const relax_typeS md_relax_table[] =
 921 {
 922   /* The fields are:
 923      1) most positive reach of this state,
 924      2) most negative reach of this state,
 925      3) how many bytes this mode will have in the variable part of the frag
 926      4) which index into the table to try if we can't fit into this one.  */
 927
 928   /* UNCOND_JUMP states.  */
 929   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 930   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 931   /* dword jmp adds 4 bytes to frag:
 932      0 extra opcode bytes, 4 displacement bytes.  */
 933   {0, 0, 4, 0},
 934   /* word jmp adds 2 byte2 to frag:
 935      0 extra opcode bytes, 2 displacement bytes.  */
 936   {0, 0, 2, 0},
 937
 938   /* COND_JUMP states.  */
 939   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 940   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 941   /* dword conditionals adds 5 bytes to frag:
 942      1 extra opcode byte, 4 displacement bytes.  */
 943   {0, 0, 5, 0},
 944   /* word conditionals add 3 bytes to frag:
 945      1 extra opcode byte, 2 displacement bytes.  */
 946   {0, 0, 3, 0},
 947
 948   /* COND_JUMP86 states.  */
 949   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 950   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 951   /* dword conditionals adds 5 bytes to frag:
 952      1 extra opcode byte, 4 displacement bytes.  */
 953   {0, 0, 5, 0},
 954   /* word conditionals add 4 bytes to frag:
 955      1 displacement byte and a 3 byte long branch insn.  */
 956   {0, 0, 4, 0}
 957 };
 958
 959 #define ARCH(n, t, f, s) \
 960   { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, CPU_ ## f ## _FLAGS, \
 961     CPU_NONE_FLAGS }
 962 #define SUBARCH(n, e, d, s) \
 963   { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, CPU_ ## e ## _FLAGS, \
 964     CPU_ ## d ## _FLAGS }
 965
 966 static const arch_entry cpu_arch[] =
 967 {
 968   /* Do not replace the first two entries - i386_target_format() and
 969      set_cpu_arch() rely on them being there in this order.  */
 970   ARCH (generic32, GENERIC32, GENERIC32, false),
 971   ARCH (generic64, GENERIC64, GENERIC64, false),
 972   ARCH (i8086, UNKNOWN, NONE, false),
 973   ARCH (i186, UNKNOWN, 186, false),
 974   ARCH (i286, UNKNOWN, 286, false),
 975   ARCH (i386, I386, 386, false),
 976   ARCH (i486, I486, 486, false),
 977   ARCH (i586, PENTIUM, 586, false),
 978   ARCH (i686, PENTIUMPRO, 686, false),
 979   ARCH (pentium, PENTIUM, 586, false),
 980   ARCH (pentiumpro, PENTIUMPRO, PENTIUMPRO, false),
 981   ARCH (pentiumii, PENTIUMPRO, P2, false),
 982   ARCH (pentiumiii, PENTIUMPRO, P3, false),
 983   ARCH (pentium4, PENTIUM4, P4, false),
 984   ARCH (prescott, NOCONA, CORE, false),
 985   ARCH (nocona, NOCONA, NOCONA, false),
 986   ARCH (yonah, CORE, CORE, true),
 987   ARCH (core, CORE, CORE, false),
 988   ARCH (merom, CORE2, CORE2, true),
 989   ARCH (core2, CORE2, CORE2, false),
 990   ARCH (corei7, COREI7, COREI7, false),
 991   ARCH (iamcu, IAMCU, IAMCU, false),
 992   ARCH (k6, K6, K6, false),
 993   ARCH (k6_2, K6, K6_2, false),
 994   ARCH (athlon, ATHLON, ATHLON, false),
 995   ARCH (sledgehammer, K8, K8, true),
 996   ARCH (opteron, K8, K8, false),
 997   ARCH (k8, K8, K8, false),
 998   ARCH (amdfam10, AMDFAM10, AMDFAM10, false),
 999   ARCH (bdver1, BD, BDVER1, false),
1000   ARCH (bdver2, BD, BDVER2, false),
1001   ARCH (bdver3, BD, BDVER3, false),
1002   ARCH (bdver4, BD, BDVER4, false),
1003   ARCH (znver1, ZNVER, ZNVER1, false),
1004   ARCH (znver2, ZNVER, ZNVER2, false),
1005   ARCH (znver3, ZNVER, ZNVER3, false),
1006   ARCH (znver4, ZNVER, ZNVER4, false),
1007   ARCH (btver1, BT, BTVER1, false),
1008   ARCH (btver2, BT, BTVER2, false),
1009
1010   SUBARCH (8087, 8087, ANY_8087, false),
1011   SUBARCH (87, NONE, ANY_8087, false), /* Disable only!  */
1012   SUBARCH (287, 287, ANY_287, false),
1013   SUBARCH (387, 387, ANY_387, false),
1014   SUBARCH (687, 687, ANY_687, false),
1015   SUBARCH (cmov, CMOV, CMOV, false),
1016   SUBARCH (fxsr, FXSR, ANY_FXSR, false),
1017   SUBARCH (mmx, MMX, ANY_MMX, false),
1018   SUBARCH (sse, SSE, ANY_SSE, false),
1019   SUBARCH (sse2, SSE2, ANY_SSE2, false),
1020   SUBARCH (sse3, SSE3, ANY_SSE3, false),
1021   SUBARCH (sse4a, SSE4A, ANY_SSE4A, false),
1022   SUBARCH (ssse3, SSSE3, ANY_SSSE3, false),
1023   SUBARCH (sse4.1, SSE4_1, ANY_SSE4_1, false),
1024   SUBARCH (sse4.2, SSE4_2, ANY_SSE4_2, false),
1025   SUBARCH (sse4, SSE4_2, ANY_SSE4_1, false),
1026   SUBARCH (avx, AVX, ANY_AVX, false),
1027   SUBARCH (avx2, AVX2, ANY_AVX2, false),
1028   SUBARCH (avx512f, AVX512F, ANY_AVX512F, false),
1029   SUBARCH (avx512cd, AVX512CD, ANY_AVX512CD, false),
1030   SUBARCH (avx512er, AVX512ER, ANY_AVX512ER, false),
1031   SUBARCH (avx512pf, AVX512PF, ANY_AVX512PF, false),
1032   SUBARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, false),
1033   SUBARCH (avx512bw, AVX512BW, ANY_AVX512BW, false),
1034   SUBARCH (avx512vl, AVX512VL, ANY_AVX512VL, false),
1035   SUBARCH (monitor, MONITOR, MONITOR, false),
1036   SUBARCH (vmx, VMX, ANY_VMX, false),
1037   SUBARCH (vmfunc, VMFUNC, ANY_VMFUNC, false),
1038   SUBARCH (smx, SMX, SMX, false),
1039   SUBARCH (xsave, XSAVE, ANY_XSAVE, false),
1040   SUBARCH (xsaveopt, XSAVEOPT, ANY_XSAVEOPT, false),
1041   SUBARCH (xsavec, XSAVEC, ANY_XSAVEC, false),
1042   SUBARCH (xsaves, XSAVES, ANY_XSAVES, false),
1043   SUBARCH (aes, AES, ANY_AES, false),
1044   SUBARCH (pclmul, PCLMUL, ANY_PCLMUL, false),
1045   SUBARCH (clmul, PCLMUL, ANY_PCLMUL, true),
1046   SUBARCH (fsgsbase, FSGSBASE, FSGSBASE, false),
1047   SUBARCH (rdrnd, RDRND, RDRND, false),
1048   SUBARCH (f16c, F16C, ANY_F16C, false),
1049   SUBARCH (bmi2, BMI2, BMI2, false),
1050   SUBARCH (fma, FMA, ANY_FMA, false),
1051   SUBARCH (fma4, FMA4, ANY_FMA4, false),
1052   SUBARCH (xop, XOP, ANY_XOP, false),
1053   SUBARCH (lwp, LWP, ANY_LWP, false),
1054   SUBARCH (movbe, MOVBE, MOVBE, false),
1055   SUBARCH (cx16, CX16, CX16, false),
1056   SUBARCH (lahf_sahf, LAHF_SAHF, LAHF_SAHF, false),
1057   SUBARCH (ept, EPT, ANY_EPT, false),
1058   SUBARCH (lzcnt, LZCNT, LZCNT, false),
1059   SUBARCH (popcnt, POPCNT, POPCNT, false),
1060   SUBARCH (hle, HLE, HLE, false),
1061   SUBARCH (rtm, RTM, ANY_RTM, false),
1062   SUBARCH (tsx, TSX, TSX, false),
1063   SUBARCH (invpcid, INVPCID, INVPCID, false),
1064   SUBARCH (clflush, CLFLUSH, CLFLUSH, false),
1065   SUBARCH (nop, NOP, NOP, false),
1066   SUBARCH (syscall, SYSCALL, SYSCALL, false),
1067   SUBARCH (rdtscp, RDTSCP, RDTSCP, false),
1068   SUBARCH (3dnow, 3DNOW, ANY_3DNOW, false),
1069   SUBARCH (3dnowa, 3DNOWA, ANY_3DNOWA, false),
1070   SUBARCH (padlock, PADLOCK, PADLOCK, false),
1071   SUBARCH (pacifica, SVME, ANY_SVME, true),
1072   SUBARCH (svme, SVME, ANY_SVME, false),
1073   SUBARCH (abm, ABM, ABM, false),
1074   SUBARCH (bmi, BMI, BMI, false),
1075   SUBARCH (tbm, TBM, TBM, false),
1076   SUBARCH (adx, ADX, ADX, false),
1077   SUBARCH (rdseed, RDSEED, RDSEED, false),
1078   SUBARCH (prfchw, PRFCHW, PRFCHW, false),
1079   SUBARCH (smap, SMAP, SMAP, false),
1080   SUBARCH (mpx, MPX, ANY_MPX, false),
1081   SUBARCH (sha, SHA, ANY_SHA, false),
1082   SUBARCH (clflushopt, CLFLUSHOPT, CLFLUSHOPT, false),
1083   SUBARCH (prefetchwt1, PREFETCHWT1, PREFETCHWT1, false),
1084   SUBARCH (se1, SE1, SE1, false),
1085   SUBARCH (clwb, CLWB, CLWB, false),
1086   SUBARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, false),
1087   SUBARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, false),
1088   SUBARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, false),
1089   SUBARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, false),
1090   SUBARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, false),
1091   SUBARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, false),
1092   SUBARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, false),
1093   SUBARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, false),
1094   SUBARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, false),
1095   SUBARCH (clzero, CLZERO, CLZERO, false),
1096   SUBARCH (mwaitx, MWAITX, MWAITX, false),
1097   SUBARCH (ospke, OSPKE, ANY_OSPKE, false),
1098   SUBARCH (rdpid, RDPID, RDPID, false),
1099   SUBARCH (ptwrite, PTWRITE, PTWRITE, false),
1100   SUBARCH (ibt, IBT, IBT, false),
1101   SUBARCH (shstk, SHSTK, SHSTK, false),
1102   SUBARCH (gfni, GFNI, ANY_GFNI, false),
1103   SUBARCH (vaes, VAES, ANY_VAES, false),
1104   SUBARCH (vpclmulqdq, VPCLMULQDQ, ANY_VPCLMULQDQ, false),
1105   SUBARCH (wbnoinvd, WBNOINVD, WBNOINVD, false),
1106   SUBARCH (pconfig, PCONFIG, PCONFIG, false),
1107   SUBARCH (waitpkg, WAITPKG, WAITPKG, false),
1108   SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
1109   SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
1110   SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
1111   SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
1112   SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
1113   SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
1114   SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
1115   SUBARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, false),
1116   SUBARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
1117            ANY_AVX512_VP2INTERSECT, false),
1118   SUBARCH (tdx, TDX, TDX, false),
1119   SUBARCH (enqcmd, ENQCMD, ENQCMD, false),
1120   SUBARCH (serialize, SERIALIZE, SERIALIZE, false),
1121   SUBARCH (rdpru, RDPRU, RDPRU, false),
1122   SUBARCH (mcommit, MCOMMIT, MCOMMIT, false),
1123   SUBARCH (sev_es, SEV_ES, ANY_SEV_ES, false),
1124   SUBARCH (tsxldtrk, TSXLDTRK, ANY_TSXLDTRK, false),
1125   SUBARCH (kl, KL, ANY_KL, false),
1126   SUBARCH (widekl, WIDEKL, ANY_WIDEKL, false),
1127   SUBARCH (uintr, UINTR, UINTR, false),
1128   SUBARCH (hreset, HRESET, HRESET, false),
1129   SUBARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, false),
1130   SUBARCH (prefetchi, PREFETCHI, PREFETCHI, false),
1131   SUBARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, false),
1132   SUBARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, false),
1133   SUBARCH (cmpccxadd, CMPCCXADD, CMPCCXADD, false),
1134   SUBARCH (wrmsrns, WRMSRNS, WRMSRNS, false),
1135   SUBARCH (msrlist, MSRLIST, MSRLIST, false),
1136   SUBARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, false),
1137   SUBARCH (rao_int, RAO_INT, RAO_INT, false),
1138   SUBARCH (rmpquery, RMPQUERY, ANY_RMPQUERY, false),
1139 };
1140
1141 #undef SUBARCH
1142 #undef ARCH
1143
1144 #ifdef I386COFF
1145 /* Like s_lcomm_internal in gas/read.c but the alignment string
1146    is allowed to be optional.  */
1147
1148 static symbolS *
1149 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1150 {
1151   addressT align = 0;
1152
1153   SKIP_WHITESPACE ();
1154
1155   if (needs_align
1156       && *input_line_pointer == ',')
1157     {
1158       align = parse_align (needs_align - 1);
1159
1160       if (align == (addressT) -1)
1161         return NULL;
1162     }
1163   else
1164     {
1165       if (size >= 8)
1166         align = 3;
1167       else if (size >= 4)
1168         align = 2;
1169       else if (size >= 2)
1170         align = 1;
1171       else
1172         align = 0;
1173     }
1174
1175   bss_alloc (symbolP, size, align);
1176   return symbolP;
1177 }
1178
1179 static void
1180 pe_lcomm (int needs_align)
1181 {
1182   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1183 }
1184 #endif
1185
1186 const pseudo_typeS md_pseudo_table[] =
1187 {
1188 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1189   {"align", s_align_bytes, 0},
1190 #else
1191   {"align", s_align_ptwo, 0},
1192 #endif
1193   {"arch", set_cpu_arch, 0},
1194 #ifndef I386COFF
1195   {"bss", s_bss, 0},
1196 #else
1197   {"lcomm", pe_lcomm, 1},
1198 #endif
1199   {"ffloat", float_cons, 'f'},
1200   {"dfloat", float_cons, 'd'},
1201   {"tfloat", float_cons, 'x'},
1202   {"hfloat", float_cons, 'h'},
1203   {"bfloat16", float_cons, 'b'},
1204   {"value", cons, 2},
1205   {"slong", signed_cons, 4},
1206   {"insn", s_insn, 0},
1207   {"noopt", s_ignore, 0},
1208   {"optim", s_ignore, 0},
1209   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1210   {"code16", set_code_flag, CODE_16BIT},
1211   {"code32", set_code_flag, CODE_32BIT},
1212 #ifdef BFD64
1213   {"code64", set_code_flag, CODE_64BIT},
1214 #endif
1215   {"intel_syntax", set_intel_syntax, 1},
1216   {"att_syntax", set_intel_syntax, 0},
1217   {"intel_mnemonic", set_intel_mnemonic, 1},
1218   {"att_mnemonic", set_intel_mnemonic, 0},
1219   {"allow_index_reg", set_allow_index_reg, 1},
1220   {"disallow_index_reg", set_allow_index_reg, 0},
1221   {"sse_check", set_check, 0},
1222   {"operand_check", set_check, 1},
1223 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1224   {"largecomm", handle_large_common, 0},
1225 #else
1226   {"file", dwarf2_directive_file, 0},
1227   {"loc", dwarf2_directive_loc, 0},
1228   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1229 #endif
1230 #ifdef TE_PE
1231   {"secrel32", pe_directive_secrel, 0},
1232   {"secidx", pe_directive_secidx, 0},
1233 #endif
1234   {0, 0, 0}
1235 };
1236
1237 /* For interface with expression ().  */
1238 extern char *input_line_pointer;
1239
1240 /* Hash table for instruction mnemonic lookup.  */
1241 static htab_t op_hash;
1242
1243 /* Hash table for register lookup.  */
1244 static htab_t reg_hash;
1245 \f
1246   /* Various efficient no-op patterns for aligning code labels.
1247      Note: Don't try to assemble the instructions in the comments.
1248      0L and 0w are not legal.  */
1249 static const unsigned char f32_1[] =
1250   {0x90};                               /* nop                  */
1251 static const unsigned char f32_2[] =
1252   {0x66,0x90};                          /* xchg %ax,%ax         */
1253 static const unsigned char f32_3[] =
1254   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1255 static const unsigned char f32_4[] =
1256   {0x8d,0x74,0x26,0x00};                /* leal 0(%esi,1),%esi  */
1257 static const unsigned char f32_6[] =
1258   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1259 static const unsigned char f32_7[] =
1260   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
1261 static const unsigned char f16_3[] =
1262   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1263 static const unsigned char f16_4[] =
1264   {0x8d,0xb4,0x00,0x00};                /* lea 0W(%si),%si      */
1265 static const unsigned char jump_disp8[] =
1266   {0xeb};                               /* jmp disp8           */
1267 static const unsigned char jump32_disp32[] =
1268   {0xe9};                               /* jmp disp32          */
1269 static const unsigned char jump16_disp32[] =
1270   {0x66,0xe9};                          /* jmp disp32          */
1271 /* 32-bit NOPs patterns.  */
1272 static const unsigned char *const f32_patt[] = {
1273   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1274 };
1275 /* 16-bit NOPs patterns.  */
1276 static const unsigned char *const f16_patt[] = {
1277   f32_1, f32_2, f16_3, f16_4
1278 };
1279 /* nopl (%[re]ax) */
1280 static const unsigned char alt_3[] =
1281   {0x0f,0x1f,0x00};
1282 /* nopl 0(%[re]ax) */
1283 static const unsigned char alt_4[] =
1284   {0x0f,0x1f,0x40,0x00};
1285 /* nopl 0(%[re]ax,%[re]ax,1) */
1286 static const unsigned char alt_5[] =
1287   {0x0f,0x1f,0x44,0x00,0x00};
1288 /* nopw 0(%[re]ax,%[re]ax,1) */
1289 static const unsigned char alt_6[] =
1290   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1291 /* nopl 0L(%[re]ax) */
1292 static const unsigned char alt_7[] =
1293   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1294 /* nopl 0L(%[re]ax,%[re]ax,1) */
1295 static const unsigned char alt_8[] =
1296   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1297 /* nopw 0L(%[re]ax,%[re]ax,1) */
1298 static const unsigned char alt_9[] =
1299   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1300 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1301 static const unsigned char alt_10[] =
1302   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1303 /* data16 nopw %cs:0L(%eax,%eax,1) */
1304 static const unsigned char alt_11[] =
1305   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1306 /* 32-bit and 64-bit NOPs patterns.  */
1307 static const unsigned char *const alt_patt[] = {
1308   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1309   alt_9, alt_10, alt_11
1310 };
1311
1312 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1313    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1314
1315 static void
1316 i386_output_nops (char *where, const unsigned char *const *patt,
1317                   int count, int max_single_nop_size)
1318
1319 {
1320   /* Place the longer NOP first.  */
1321   int last;
1322   int offset;
1323   const unsigned char *nops;
1324
1325   if (max_single_nop_size < 1)
1326     {
1327       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1328                 max_single_nop_size);
1329       return;
1330     }
1331
1332   nops = patt[max_single_nop_size - 1];
1333
1334   /* Use the smaller one if the requsted one isn't available.  */
1335   if (nops == NULL)
1336     {
1337       max_single_nop_size--;
1338       nops = patt[max_single_nop_size - 1];
1339     }
1340
1341   last = count % max_single_nop_size;
1342
1343   count -= last;
1344   for (offset = 0; offset < count; offset += max_single_nop_size)
1345     memcpy (where + offset, nops, max_single_nop_size);
1346
1347   if (last)
1348     {
1349       nops = patt[last - 1];
1350       if (nops == NULL)
1351         {
1352           /* Use the smaller one plus one-byte NOP if the needed one
1353              isn't available.  */
1354           last--;
1355           nops = patt[last - 1];
1356           memcpy (where + offset, nops, last);
1357           where[offset + last] = *patt[0];
1358         }
1359       else
1360         memcpy (where + offset, nops, last);
1361     }
1362 }
1363
1364 static INLINE int
1365 fits_in_imm7 (offsetT num)
1366 {
1367   return (num & 0x7f) == num;
1368 }
1369
1370 static INLINE int
1371 fits_in_imm31 (offsetT num)
1372 {
1373   return (num & 0x7fffffff) == num;
1374 }
1375
1376 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1377    single NOP instruction LIMIT.  */
1378
1379 void
1380 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1381 {
1382   const unsigned char *const *patt = NULL;
1383   int max_single_nop_size;
1384   /* Maximum number of NOPs before switching to jump over NOPs.  */
1385   int max_number_of_nops;
1386
1387   switch (fragP->fr_type)
1388     {
1389     case rs_fill_nop:
1390     case rs_align_code:
1391       break;
1392     case rs_machine_dependent:
1393       /* Allow NOP padding for jumps and calls.  */
1394       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1395           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1396         break;
1397       /* Fall through.  */
1398     default:
1399       return;
1400     }
1401
1402   /* We need to decide which NOP sequence to use for 32bit and
1403      64bit. When -mtune= is used:
1404
1405      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1406      PROCESSOR_GENERIC32, f32_patt will be used.
1407      2. For the rest, alt_patt will be used.
1408
1409      When -mtune= isn't used, alt_patt will be used if
1410      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1411      be used.
1412
1413      When -march= or .arch is used, we can't use anything beyond
1414      cpu_arch_isa_flags.   */
1415
1416   if (flag_code == CODE_16BIT)
1417     {
1418       patt = f16_patt;
1419       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1420       /* Limit number of NOPs to 2 in 16-bit mode.  */
1421       max_number_of_nops = 2;
1422     }
1423   else
1424     {
1425       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1426         {
1427           /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1428           switch (cpu_arch_tune)
1429             {
1430             case PROCESSOR_UNKNOWN:
1431               /* We use cpu_arch_isa_flags to check if we SHOULD
1432                  optimize with nops.  */
1433               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1434                 patt = alt_patt;
1435               else
1436                 patt = f32_patt;
1437               break;
1438             case PROCESSOR_PENTIUM4:
1439             case PROCESSOR_NOCONA:
1440             case PROCESSOR_CORE:
1441             case PROCESSOR_CORE2:
1442             case PROCESSOR_COREI7:
1443             case PROCESSOR_GENERIC64:
1444             case PROCESSOR_K6:
1445             case PROCESSOR_ATHLON:
1446             case PROCESSOR_K8:
1447             case PROCESSOR_AMDFAM10:
1448             case PROCESSOR_BD:
1449             case PROCESSOR_ZNVER:
1450             case PROCESSOR_BT:
1451               patt = alt_patt;
1452               break;
1453             case PROCESSOR_I386:
1454             case PROCESSOR_I486:
1455             case PROCESSOR_PENTIUM:
1456             case PROCESSOR_PENTIUMPRO:
1457             case PROCESSOR_IAMCU:
1458             case PROCESSOR_GENERIC32:
1459               patt = f32_patt;
1460               break;
1461             case PROCESSOR_NONE:
1462               abort ();
1463             }
1464         }
1465       else
1466         {
1467           switch (fragP->tc_frag_data.tune)
1468             {
1469             case PROCESSOR_UNKNOWN:
1470               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1471                  PROCESSOR_UNKNOWN.  */
1472               abort ();
1473               break;
1474
1475             case PROCESSOR_I386:
1476             case PROCESSOR_I486:
1477             case PROCESSOR_PENTIUM:
1478             case PROCESSOR_IAMCU:
1479             case PROCESSOR_K6:
1480             case PROCESSOR_ATHLON:
1481             case PROCESSOR_K8:
1482             case PROCESSOR_AMDFAM10:
1483             case PROCESSOR_BD:
1484             case PROCESSOR_ZNVER:
1485             case PROCESSOR_BT:
1486             case PROCESSOR_GENERIC32:
1487               /* We use cpu_arch_isa_flags to check if we CAN optimize
1488                  with nops.  */
1489               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1490                 patt = alt_patt;
1491               else
1492                 patt = f32_patt;
1493               break;
1494             case PROCESSOR_PENTIUMPRO:
1495             case PROCESSOR_PENTIUM4:
1496             case PROCESSOR_NOCONA:
1497             case PROCESSOR_CORE:
1498             case PROCESSOR_CORE2:
1499             case PROCESSOR_COREI7:
1500               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1501                 patt = alt_patt;
1502               else
1503                 patt = f32_patt;
1504               break;
1505             case PROCESSOR_GENERIC64:
1506               patt = alt_patt;
1507               break;
1508             case PROCESSOR_NONE:
1509               abort ();
1510             }
1511         }
1512
1513       if (patt == f32_patt)
1514         {
1515           max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1516           /* Limit number of NOPs to 2 for older processors.  */
1517           max_number_of_nops = 2;
1518         }
1519       else
1520         {
1521           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1522           /* Limit number of NOPs to 7 for newer processors.  */
1523           max_number_of_nops = 7;
1524         }
1525     }
1526
1527   if (limit == 0)
1528     limit = max_single_nop_size;
1529
1530   if (fragP->fr_type == rs_fill_nop)
1531     {
1532       /* Output NOPs for .nop directive.  */
1533       if (limit > max_single_nop_size)
1534         {
1535           as_bad_where (fragP->fr_file, fragP->fr_line,
1536                         _("invalid single nop size: %d "
1537                           "(expect within [0, %d])"),
1538                         limit, max_single_nop_size);
1539           return;
1540         }
1541     }
1542   else if (fragP->fr_type != rs_machine_dependent)
1543     fragP->fr_var = count;
1544
1545   if ((count / max_single_nop_size) > max_number_of_nops)
1546     {
1547       /* Generate jump over NOPs.  */
1548       offsetT disp = count - 2;
1549       if (fits_in_imm7 (disp))
1550         {
1551           /* Use "jmp disp8" if possible.  */
1552           count = disp;
1553           where[0] = jump_disp8[0];
1554           where[1] = count;
1555           where += 2;
1556         }
1557       else
1558         {
1559           unsigned int size_of_jump;
1560
1561           if (flag_code == CODE_16BIT)
1562             {
1563               where[0] = jump16_disp32[0];
1564               where[1] = jump16_disp32[1];
1565               size_of_jump = 2;
1566             }
1567           else
1568             {
1569               where[0] = jump32_disp32[0];
1570               size_of_jump = 1;
1571             }
1572
1573           count -= size_of_jump + 4;
1574           if (!fits_in_imm31 (count))
1575             {
1576               as_bad_where (fragP->fr_file, fragP->fr_line,
1577                             _("jump over nop padding out of range"));
1578               return;
1579             }
1580
1581           md_number_to_chars (where + size_of_jump, count, 4);
1582           where += size_of_jump + 4;
1583         }
1584     }
1585
1586   /* Generate multiple NOPs.  */
1587   i386_output_nops (where, patt, count, limit);
1588 }
1589
1590 static INLINE int
1591 operand_type_all_zero (const union i386_operand_type *x)
1592 {
1593   switch (ARRAY_SIZE(x->array))
1594     {
1595     case 3:
1596       if (x->array[2])
1597         return 0;
1598       /* Fall through.  */
1599     case 2:
1600       if (x->array[1])
1601         return 0;
1602       /* Fall through.  */
1603     case 1:
1604       return !x->array[0];
1605     default:
1606       abort ();
1607     }
1608 }
1609
1610 static INLINE void
1611 operand_type_set (union i386_operand_type *x, unsigned int v)
1612 {
1613   switch (ARRAY_SIZE(x->array))
1614     {
1615     case 3:
1616       x->array[2] = v;
1617       /* Fall through.  */
1618     case 2:
1619       x->array[1] = v;
1620       /* Fall through.  */
1621     case 1:
1622       x->array[0] = v;
1623       /* Fall through.  */
1624       break;
1625     default:
1626       abort ();
1627     }
1628
1629   x->bitfield.class = ClassNone;
1630   x->bitfield.instance = InstanceNone;
1631 }
1632
1633 static INLINE int
1634 operand_type_equal (const union i386_operand_type *x,
1635                     const union i386_operand_type *y)
1636 {
1637   switch (ARRAY_SIZE(x->array))
1638     {
1639     case 3:
1640       if (x->array[2] != y->array[2])
1641         return 0;
1642       /* Fall through.  */
1643     case 2:
1644       if (x->array[1] != y->array[1])
1645         return 0;
1646       /* Fall through.  */
1647     case 1:
1648       return x->array[0] == y->array[0];
1649       break;
1650     default:
1651       abort ();
1652     }
1653 }
1654
1655 static INLINE int
1656 cpu_flags_all_zero (const union i386_cpu_flags *x)
1657 {
1658   switch (ARRAY_SIZE(x->array))
1659     {
1660     case 5:
1661       if (x->array[4])
1662         return 0;
1663       /* Fall through.  */
1664     case 4:
1665       if (x->array[3])
1666         return 0;
1667       /* Fall through.  */
1668     case 3:
1669       if (x->array[2])
1670         return 0;
1671       /* Fall through.  */
1672     case 2:
1673       if (x->array[1])
1674         return 0;
1675       /* Fall through.  */
1676     case 1:
1677       return !x->array[0];
1678     default:
1679       abort ();
1680     }
1681 }
1682
1683 static INLINE int
1684 cpu_flags_equal (const union i386_cpu_flags *x,
1685                  const union i386_cpu_flags *y)
1686 {
1687   switch (ARRAY_SIZE(x->array))
1688     {
1689     case 5:
1690       if (x->array[4] != y->array[4])
1691         return 0;
1692       /* Fall through.  */
1693     case 4:
1694       if (x->array[3] != y->array[3])
1695         return 0;
1696       /* Fall through.  */
1697     case 3:
1698       if (x->array[2] != y->array[2])
1699         return 0;
1700       /* Fall through.  */
1701     case 2:
1702       if (x->array[1] != y->array[1])
1703         return 0;
1704       /* Fall through.  */
1705     case 1:
1706       return x->array[0] == y->array[0];
1707       break;
1708     default:
1709       abort ();
1710     }
1711 }
1712
1713 static INLINE int
1714 cpu_flags_check_cpu64 (i386_cpu_flags f)
1715 {
1716   return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
1717            || (flag_code != CODE_64BIT && f.bitfield.cpu64));
1718 }
1719
1720 static INLINE i386_cpu_flags
1721 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1722 {
1723   switch (ARRAY_SIZE (x.array))
1724     {
1725     case 5:
1726       x.array [4] &= y.array [4];
1727       /* Fall through.  */
1728     case 4:
1729       x.array [3] &= y.array [3];
1730       /* Fall through.  */
1731     case 3:
1732       x.array [2] &= y.array [2];
1733       /* Fall through.  */
1734     case 2:
1735       x.array [1] &= y.array [1];
1736       /* Fall through.  */
1737     case 1:
1738       x.array [0] &= y.array [0];
1739       break;
1740     default:
1741       abort ();
1742     }
1743   return x;
1744 }
1745
1746 static INLINE i386_cpu_flags
1747 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1748 {
1749   switch (ARRAY_SIZE (x.array))
1750     {
1751     case 5:
1752       x.array [4] |= y.array [4];
1753       /* Fall through.  */
1754     case 4:
1755       x.array [3] |= y.array [3];
1756       /* Fall through.  */
1757     case 3:
1758       x.array [2] |= y.array [2];
1759       /* Fall through.  */
1760     case 2:
1761       x.array [1] |= y.array [1];
1762       /* Fall through.  */
1763     case 1:
1764       x.array [0] |= y.array [0];
1765       break;
1766     default:
1767       abort ();
1768     }
1769   return x;
1770 }
1771
1772 static INLINE i386_cpu_flags
1773 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1774 {
1775   switch (ARRAY_SIZE (x.array))
1776     {
1777     case 5:
1778       x.array [4] &= ~y.array [4];
1779       /* Fall through.  */
1780     case 4:
1781       x.array [3] &= ~y.array [3];
1782       /* Fall through.  */
1783     case 3:
1784       x.array [2] &= ~y.array [2];
1785       /* Fall through.  */
1786     case 2:
1787       x.array [1] &= ~y.array [1];
1788       /* Fall through.  */
1789     case 1:
1790       x.array [0] &= ~y.array [0];
1791       break;
1792     default:
1793       abort ();
1794     }
1795   return x;
1796 }
1797
1798 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1799
1800 #define CPU_FLAGS_ARCH_MATCH            0x1
1801 #define CPU_FLAGS_64BIT_MATCH           0x2
1802
1803 #define CPU_FLAGS_PERFECT_MATCH \
1804   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1805
1806 /* Return CPU flags match bits. */
1807
1808 static int
1809 cpu_flags_match (const insn_template *t)
1810 {
1811   i386_cpu_flags x = t->cpu_flags;
1812   int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
1813
1814   x.bitfield.cpu64 = 0;
1815   x.bitfield.cpuno64 = 0;
1816
1817   if (cpu_flags_all_zero (&x))
1818     {
1819       /* This instruction is available on all archs.  */
1820       match |= CPU_FLAGS_ARCH_MATCH;
1821     }
1822   else
1823     {
1824       /* This instruction is available only on some archs.  */
1825       i386_cpu_flags cpu = cpu_arch_flags;
1826
1827       /* AVX512VL is no standalone feature - match it and then strip it.  */
1828       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1829         return match;
1830       x.bitfield.cpuavx512vl = 0;
1831
1832       /* AVX and AVX2 present at the same time express an operand size
1833          dependency - strip AVX2 for the purposes here.  The operand size
1834          dependent check occurs in check_vecOperands().  */
1835       if (x.bitfield.cpuavx && x.bitfield.cpuavx2)
1836         x.bitfield.cpuavx2 = 0;
1837
1838       cpu = cpu_flags_and (x, cpu);
1839       if (!cpu_flags_all_zero (&cpu))
1840         {
1841           if (x.bitfield.cpuavx)
1842             {
1843               /* We need to check a few extra flags with AVX.  */
1844               if (cpu.bitfield.cpuavx
1845                   && (!t->opcode_modifier.sse2avx
1846                       || (sse2avx && !i.prefix[DATA_PREFIX]))
1847                   && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1848                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1849                   && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
1850                 match |= CPU_FLAGS_ARCH_MATCH;
1851             }
1852           else if (x.bitfield.cpuavx512f)
1853             {
1854               /* We need to check a few extra flags with AVX512F.  */
1855               if (cpu.bitfield.cpuavx512f
1856                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1857                   && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1858                   && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1859                 match |= CPU_FLAGS_ARCH_MATCH;
1860             }
1861           else
1862             match |= CPU_FLAGS_ARCH_MATCH;
1863         }
1864     }
1865   return match;
1866 }
1867
1868 static INLINE i386_operand_type
1869 operand_type_and (i386_operand_type x, i386_operand_type y)
1870 {
1871   if (x.bitfield.class != y.bitfield.class)
1872     x.bitfield.class = ClassNone;
1873   if (x.bitfield.instance != y.bitfield.instance)
1874     x.bitfield.instance = InstanceNone;
1875
1876   switch (ARRAY_SIZE (x.array))
1877     {
1878     case 3:
1879       x.array [2] &= y.array [2];
1880       /* Fall through.  */
1881     case 2:
1882       x.array [1] &= y.array [1];
1883       /* Fall through.  */
1884     case 1:
1885       x.array [0] &= y.array [0];
1886       break;
1887     default:
1888       abort ();
1889     }
1890   return x;
1891 }
1892
1893 static INLINE i386_operand_type
1894 operand_type_and_not (i386_operand_type x, i386_operand_type y)
1895 {
1896   gas_assert (y.bitfield.class == ClassNone);
1897   gas_assert (y.bitfield.instance == InstanceNone);
1898
1899   switch (ARRAY_SIZE (x.array))
1900     {
1901     case 3:
1902       x.array [2] &= ~y.array [2];
1903       /* Fall through.  */
1904     case 2:
1905       x.array [1] &= ~y.array [1];
1906       /* Fall through.  */
1907     case 1:
1908       x.array [0] &= ~y.array [0];
1909       break;
1910     default:
1911       abort ();
1912     }
1913   return x;
1914 }
1915
1916 static INLINE i386_operand_type
1917 operand_type_or (i386_operand_type x, i386_operand_type y)
1918 {
1919   gas_assert (x.bitfield.class == ClassNone ||
1920               y.bitfield.class == ClassNone ||
1921               x.bitfield.class == y.bitfield.class);
1922   gas_assert (x.bitfield.instance == InstanceNone ||
1923               y.bitfield.instance == InstanceNone ||
1924               x.bitfield.instance == y.bitfield.instance);
1925
1926   switch (ARRAY_SIZE (x.array))
1927     {
1928     case 3:
1929       x.array [2] |= y.array [2];
1930       /* Fall through.  */
1931     case 2:
1932       x.array [1] |= y.array [1];
1933       /* Fall through.  */
1934     case 1:
1935       x.array [0] |= y.array [0];
1936       break;
1937     default:
1938       abort ();
1939     }
1940   return x;
1941 }
1942
1943 static INLINE i386_operand_type
1944 operand_type_xor (i386_operand_type x, i386_operand_type y)
1945 {
1946   gas_assert (y.bitfield.class == ClassNone);
1947   gas_assert (y.bitfield.instance == InstanceNone);
1948
1949   switch (ARRAY_SIZE (x.array))
1950     {
1951     case 3:
1952       x.array [2] ^= y.array [2];
1953       /* Fall through.  */
1954     case 2:
1955       x.array [1] ^= y.array [1];
1956       /* Fall through.  */
1957     case 1:
1958       x.array [0] ^= y.array [0];
1959       break;
1960     default:
1961       abort ();
1962     }
1963   return x;
1964 }
1965
1966 static const i386_operand_type anydisp = {
1967   .bitfield = { .disp8 = 1, .disp16 = 1, .disp32 = 1, .disp64 = 1 }
1968 };
1969
1970 enum operand_type
1971 {
1972   reg,
1973   imm,
1974   disp,
1975   anymem
1976 };
1977
1978 static INLINE int
1979 operand_type_check (i386_operand_type t, enum operand_type c)
1980 {
1981   switch (c)
1982     {
1983     case reg:
1984       return t.bitfield.class == Reg;
1985
1986     case imm:
1987       return (t.bitfield.imm8
1988               || t.bitfield.imm8s
1989               || t.bitfield.imm16
1990               || t.bitfield.imm32
1991               || t.bitfield.imm32s
1992               || t.bitfield.imm64);
1993
1994     case disp:
1995       return (t.bitfield.disp8
1996               || t.bitfield.disp16
1997               || t.bitfield.disp32
1998               || t.bitfield.disp64);
1999
2000     case anymem:
2001       return (t.bitfield.disp8
2002               || t.bitfield.disp16
2003               || t.bitfield.disp32
2004               || t.bitfield.disp64
2005               || t.bitfield.baseindex);
2006
2007     default:
2008       abort ();
2009     }
2010
2011   return 0;
2012 }
2013
2014 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2015    between operand GIVEN and opeand WANTED for instruction template T.  */
2016
2017 static INLINE int
2018 match_operand_size (const insn_template *t, unsigned int wanted,
2019                     unsigned int given)
2020 {
2021   return !((i.types[given].bitfield.byte
2022             && !t->operand_types[wanted].bitfield.byte)
2023            || (i.types[given].bitfield.word
2024                && !t->operand_types[wanted].bitfield.word)
2025            || (i.types[given].bitfield.dword
2026                && !t->operand_types[wanted].bitfield.dword)
2027            || (i.types[given].bitfield.qword
2028                && (!t->operand_types[wanted].bitfield.qword
2029                    /* Don't allow 64-bit (memory) operands outside of 64-bit
2030                       mode, when they're used where a 64-bit GPR could also
2031                       be used.  Checking is needed for Intel Syntax only.  */
2032                    || (intel_syntax
2033                        && flag_code != CODE_64BIT
2034                        && (t->operand_types[wanted].bitfield.class == Reg
2035                            || t->operand_types[wanted].bitfield.class == Accum
2036                            || t->opcode_modifier.isstring))))
2037            || (i.types[given].bitfield.tbyte
2038                && !t->operand_types[wanted].bitfield.tbyte));
2039 }
2040
2041 /* Return 1 if there is no conflict in SIMD register between operand
2042    GIVEN and opeand WANTED for instruction template T.  */
2043
2044 static INLINE int
2045 match_simd_size (const insn_template *t, unsigned int wanted,
2046                  unsigned int given)
2047 {
2048   return !((i.types[given].bitfield.xmmword
2049             && !t->operand_types[wanted].bitfield.xmmword)
2050            || (i.types[given].bitfield.ymmword
2051                && !t->operand_types[wanted].bitfield.ymmword)
2052            || (i.types[given].bitfield.zmmword
2053                && !t->operand_types[wanted].bitfield.zmmword)
2054            || (i.types[given].bitfield.tmmword
2055                && !t->operand_types[wanted].bitfield.tmmword));
2056 }
2057
2058 /* Return 1 if there is no conflict in any size between operand GIVEN
2059    and opeand WANTED for instruction template T.  */
2060
2061 static INLINE int
2062 match_mem_size (const insn_template *t, unsigned int wanted,
2063                 unsigned int given)
2064 {
2065   return (match_operand_size (t, wanted, given)
2066           && !((i.types[given].bitfield.unspecified
2067                 && !i.broadcast.type
2068                 && !i.broadcast.bytes
2069                 && !t->operand_types[wanted].bitfield.unspecified)
2070                || (i.types[given].bitfield.fword
2071                    && !t->operand_types[wanted].bitfield.fword)
2072                /* For scalar opcode templates to allow register and memory
2073                   operands at the same time, some special casing is needed
2074                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2075                   down-conversion vpmov*.  */
2076                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2077                     && t->operand_types[wanted].bitfield.byte
2078                        + t->operand_types[wanted].bitfield.word
2079                        + t->operand_types[wanted].bitfield.dword
2080                        + t->operand_types[wanted].bitfield.qword
2081                        > !!t->opcode_modifier.broadcast)
2082                    ? (i.types[given].bitfield.xmmword
2083                       || i.types[given].bitfield.ymmword
2084                       || i.types[given].bitfield.zmmword)
2085                    : !match_simd_size(t, wanted, given))));
2086 }
2087
2088 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2089    operands for instruction template T, and it has MATCH_REVERSE set if there
2090    is no size conflict on any operands for the template with operands reversed
2091    (and the template allows for reversing in the first place).  */
2092
2093 #define MATCH_STRAIGHT 1
2094 #define MATCH_REVERSE  2
2095
2096 static INLINE unsigned int
2097 operand_size_match (const insn_template *t)
2098 {
2099   unsigned int j, match = MATCH_STRAIGHT;
2100
2101   /* Don't check non-absolute jump instructions.  */
2102   if (t->opcode_modifier.jump
2103       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2104     return match;
2105
2106   /* Check memory and accumulator operand size.  */
2107   for (j = 0; j < i.operands; j++)
2108     {
2109       if (i.types[j].bitfield.class != Reg
2110           && i.types[j].bitfield.class != RegSIMD
2111           && t->opcode_modifier.operandconstraint == ANY_SIZE)
2112         continue;
2113
2114       if (t->operand_types[j].bitfield.class == Reg
2115           && !match_operand_size (t, j, j))
2116         {
2117           match = 0;
2118           break;
2119         }
2120
2121       if (t->operand_types[j].bitfield.class == RegSIMD
2122           && !match_simd_size (t, j, j))
2123         {
2124           match = 0;
2125           break;
2126         }
2127
2128       if (t->operand_types[j].bitfield.instance == Accum
2129           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2130         {
2131           match = 0;
2132           break;
2133         }
2134
2135       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2136         {
2137           match = 0;
2138           break;
2139         }
2140     }
2141
2142   if (!t->opcode_modifier.d)
2143     return match;
2144
2145   /* Check reverse.  */
2146   gas_assert (i.operands >= 2);
2147
2148   for (j = 0; j < i.operands; j++)
2149     {
2150       unsigned int given = i.operands - j - 1;
2151
2152       /* For FMA4 and XOP insns VEX.W controls just the first two
2153          register operands.  */
2154       if (t->cpu_flags.bitfield.cpufma4 || t->cpu_flags.bitfield.cpuxop)
2155         given = j < 2 ? 1 - j : j;
2156
2157       if (t->operand_types[j].bitfield.class == Reg
2158           && !match_operand_size (t, j, given))
2159         return match;
2160
2161       if (t->operand_types[j].bitfield.class == RegSIMD
2162           && !match_simd_size (t, j, given))
2163         return match;
2164
2165       if (t->operand_types[j].bitfield.instance == Accum
2166           && (!match_operand_size (t, j, given)
2167               || !match_simd_size (t, j, given)))
2168         return match;
2169
2170       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2171         return match;
2172     }
2173
2174   return match | MATCH_REVERSE;
2175 }
2176
2177 static INLINE int
2178 operand_type_match (i386_operand_type overlap,
2179                     i386_operand_type given)
2180 {
2181   i386_operand_type temp = overlap;
2182
2183   temp.bitfield.unspecified = 0;
2184   temp.bitfield.byte = 0;
2185   temp.bitfield.word = 0;
2186   temp.bitfield.dword = 0;
2187   temp.bitfield.fword = 0;
2188   temp.bitfield.qword = 0;
2189   temp.bitfield.tbyte = 0;
2190   temp.bitfield.xmmword = 0;
2191   temp.bitfield.ymmword = 0;
2192   temp.bitfield.zmmword = 0;
2193   temp.bitfield.tmmword = 0;
2194   if (operand_type_all_zero (&temp))
2195     goto mismatch;
2196
2197   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2198     return 1;
2199
2200  mismatch:
2201   i.error = operand_type_mismatch;
2202   return 0;
2203 }
2204
2205 /* If given types g0 and g1 are registers they must be of the same type
2206    unless the expected operand type register overlap is null.
2207    Intel syntax sized memory operands are also checked here.  */
2208
2209 static INLINE int
2210 operand_type_register_match (i386_operand_type g0,
2211                              i386_operand_type t0,
2212                              i386_operand_type g1,
2213                              i386_operand_type t1)
2214 {
2215   if (g0.bitfield.class != Reg
2216       && g0.bitfield.class != RegSIMD
2217       && (g0.bitfield.unspecified
2218           || !operand_type_check (g0, anymem)))
2219     return 1;
2220
2221   if (g1.bitfield.class != Reg
2222       && g1.bitfield.class != RegSIMD
2223       && (g1.bitfield.unspecified
2224           || !operand_type_check (g1, anymem)))
2225     return 1;
2226
2227   if (g0.bitfield.byte == g1.bitfield.byte
2228       && g0.bitfield.word == g1.bitfield.word
2229       && g0.bitfield.dword == g1.bitfield.dword
2230       && g0.bitfield.qword == g1.bitfield.qword
2231       && g0.bitfield.xmmword == g1.bitfield.xmmword
2232       && g0.bitfield.ymmword == g1.bitfield.ymmword
2233       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2234     return 1;
2235
2236   /* If expectations overlap in no more than a single size, all is fine. */
2237   g0 = operand_type_and (t0, t1);
2238   if (g0.bitfield.byte
2239       + g0.bitfield.word
2240       + g0.bitfield.dword
2241       + g0.bitfield.qword
2242       + g0.bitfield.xmmword
2243       + g0.bitfield.ymmword
2244       + g0.bitfield.zmmword <= 1)
2245     return 1;
2246
2247   i.error = register_type_mismatch;
2248
2249   return 0;
2250 }
2251
2252 static INLINE unsigned int
2253 register_number (const reg_entry *r)
2254 {
2255   unsigned int nr = r->reg_num;
2256
2257   if (r->reg_flags & RegRex)
2258     nr += 8;
2259
2260   if (r->reg_flags & RegVRex)
2261     nr += 16;
2262
2263   return nr;
2264 }
2265
2266 static INLINE unsigned int
2267 mode_from_disp_size (i386_operand_type t)
2268 {
2269   if (t.bitfield.disp8)
2270     return 1;
2271   else if (t.bitfield.disp16
2272            || t.bitfield.disp32)
2273     return 2;
2274   else
2275     return 0;
2276 }
2277
2278 static INLINE int
2279 fits_in_signed_byte (addressT num)
2280 {
2281   return num + 0x80 <= 0xff;
2282 }
2283
2284 static INLINE int
2285 fits_in_unsigned_byte (addressT num)
2286 {
2287   return num <= 0xff;
2288 }
2289
2290 static INLINE int
2291 fits_in_unsigned_word (addressT num)
2292 {
2293   return num <= 0xffff;
2294 }
2295
2296 static INLINE int
2297 fits_in_signed_word (addressT num)
2298 {
2299   return num + 0x8000 <= 0xffff;
2300 }
2301
2302 static INLINE int
2303 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2304 {
2305 #ifndef BFD64
2306   return 1;
2307 #else
2308   return num + 0x80000000 <= 0xffffffff;
2309 #endif
2310 }                               /* fits_in_signed_long() */
2311
2312 static INLINE int
2313 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2314 {
2315 #ifndef BFD64
2316   return 1;
2317 #else
2318   return num <= 0xffffffff;
2319 #endif
2320 }                               /* fits_in_unsigned_long() */
2321
2322 static INLINE valueT extend_to_32bit_address (addressT num)
2323 {
2324 #ifdef BFD64
2325   if (fits_in_unsigned_long(num))
2326     return (num ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2327
2328   if (!fits_in_signed_long (num))
2329     return num & 0xffffffff;
2330 #endif
2331
2332   return num;
2333 }
2334
2335 static INLINE int
2336 fits_in_disp8 (offsetT num)
2337 {
2338   int shift = i.memshift;
2339   unsigned int mask;
2340
2341   if (shift == -1)
2342     abort ();
2343
2344   mask = (1 << shift) - 1;
2345
2346   /* Return 0 if NUM isn't properly aligned.  */
2347   if ((num & mask))
2348     return 0;
2349
2350   /* Check if NUM will fit in 8bit after shift.  */
2351   return fits_in_signed_byte (num >> shift);
2352 }
2353
2354 static INLINE int
2355 fits_in_imm4 (offsetT num)
2356 {
2357   return (num & 0xf) == num;
2358 }
2359
2360 static i386_operand_type
2361 smallest_imm_type (offsetT num)
2362 {
2363   i386_operand_type t;
2364
2365   operand_type_set (&t, 0);
2366   t.bitfield.imm64 = 1;
2367
2368   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2369     {
2370       /* This code is disabled on the 486 because all the Imm1 forms
2371          in the opcode table are slower on the i486.  They're the
2372          versions with the implicitly specified single-position
2373          displacement, which has another syntax if you really want to
2374          use that form.  */
2375       t.bitfield.imm1 = 1;
2376       t.bitfield.imm8 = 1;
2377       t.bitfield.imm8s = 1;
2378       t.bitfield.imm16 = 1;
2379       t.bitfield.imm32 = 1;
2380       t.bitfield.imm32s = 1;
2381     }
2382   else if (fits_in_signed_byte (num))
2383     {
2384       if (fits_in_unsigned_byte (num))
2385         t.bitfield.imm8 = 1;
2386       t.bitfield.imm8s = 1;
2387       t.bitfield.imm16 = 1;
2388       t.bitfield.imm32 = 1;
2389       t.bitfield.imm32s = 1;
2390     }
2391   else if (fits_in_unsigned_byte (num))
2392     {
2393       t.bitfield.imm8 = 1;
2394       t.bitfield.imm16 = 1;
2395       t.bitfield.imm32 = 1;
2396       t.bitfield.imm32s = 1;
2397     }
2398   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2399     {
2400       t.bitfield.imm16 = 1;
2401       t.bitfield.imm32 = 1;
2402       t.bitfield.imm32s = 1;
2403     }
2404   else if (fits_in_signed_long (num))
2405     {
2406       t.bitfield.imm32 = 1;
2407       t.bitfield.imm32s = 1;
2408     }
2409   else if (fits_in_unsigned_long (num))
2410     t.bitfield.imm32 = 1;
2411
2412   return t;
2413 }
2414
2415 static offsetT
2416 offset_in_range (offsetT val, int size)
2417 {
2418   addressT mask;
2419
2420   switch (size)
2421     {
2422     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2423     case 2: mask = ((addressT) 1 << 16) - 1; break;
2424 #ifdef BFD64
2425     case 4: mask = ((addressT) 1 << 32) - 1; break;
2426 #endif
2427     case sizeof (val): return val;
2428     default: abort ();
2429     }
2430
2431   if ((val & ~mask) != 0 && (-val & ~mask) != 0)
2432     as_warn (_("0x%" PRIx64 " shortened to 0x%" PRIx64),
2433              (uint64_t) val, (uint64_t) (val & mask));
2434
2435   return val & mask;
2436 }
2437
2438 static INLINE const char *insn_name (const insn_template *t)
2439 {
2440   return &i386_mnemonics[t->mnem_off];
2441 }
2442
2443 enum PREFIX_GROUP
2444 {
2445   PREFIX_EXIST = 0,
2446   PREFIX_LOCK,
2447   PREFIX_REP,
2448   PREFIX_DS,
2449   PREFIX_OTHER
2450 };
2451
2452 /* Returns
2453    a. PREFIX_EXIST if attempting to add a prefix where one from the
2454    same class already exists.
2455    b. PREFIX_LOCK if lock prefix is added.
2456    c. PREFIX_REP if rep/repne prefix is added.
2457    d. PREFIX_DS if ds prefix is added.
2458    e. PREFIX_OTHER if other prefix is added.
2459  */
2460
2461 static enum PREFIX_GROUP
2462 add_prefix (unsigned int prefix)
2463 {
2464   enum PREFIX_GROUP ret = PREFIX_OTHER;
2465   unsigned int q;
2466
2467   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2468       && flag_code == CODE_64BIT)
2469     {
2470       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2471           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2472           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2473           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2474         ret = PREFIX_EXIST;
2475       q = REX_PREFIX;
2476     }
2477   else
2478     {
2479       switch (prefix)
2480         {
2481         default:
2482           abort ();
2483
2484         case DS_PREFIX_OPCODE:
2485           ret = PREFIX_DS;
2486           /* Fall through.  */
2487         case CS_PREFIX_OPCODE:
2488         case ES_PREFIX_OPCODE:
2489         case FS_PREFIX_OPCODE:
2490         case GS_PREFIX_OPCODE:
2491         case SS_PREFIX_OPCODE:
2492           q = SEG_PREFIX;
2493           break;
2494
2495         case REPNE_PREFIX_OPCODE:
2496         case REPE_PREFIX_OPCODE:
2497           q = REP_PREFIX;
2498           ret = PREFIX_REP;
2499           break;
2500
2501         case LOCK_PREFIX_OPCODE:
2502           q = LOCK_PREFIX;
2503           ret = PREFIX_LOCK;
2504           break;
2505
2506         case FWAIT_OPCODE:
2507           q = WAIT_PREFIX;
2508           break;
2509
2510         case ADDR_PREFIX_OPCODE:
2511           q = ADDR_PREFIX;
2512           break;
2513
2514         case DATA_PREFIX_OPCODE:
2515           q = DATA_PREFIX;
2516           break;
2517         }
2518       if (i.prefix[q] != 0)
2519         ret = PREFIX_EXIST;
2520     }
2521
2522   if (ret)
2523     {
2524       if (!i.prefix[q])
2525         ++i.prefixes;
2526       i.prefix[q] |= prefix;
2527     }
2528   else
2529     as_bad (_("same type of prefix used twice"));
2530
2531   return ret;
2532 }
2533
2534 static void
2535 update_code_flag (int value, int check)
2536 {
2537   PRINTF_LIKE ((*as_error));
2538
2539   flag_code = (enum flag_code) value;
2540   if (flag_code == CODE_64BIT)
2541     {
2542       cpu_arch_flags.bitfield.cpu64 = 1;
2543       cpu_arch_flags.bitfield.cpuno64 = 0;
2544     }
2545   else
2546     {
2547       cpu_arch_flags.bitfield.cpu64 = 0;
2548       cpu_arch_flags.bitfield.cpuno64 = 1;
2549     }
2550   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
2551     {
2552       if (check)
2553         as_error = as_fatal;
2554       else
2555         as_error = as_bad;
2556       (*as_error) (_("64bit mode not supported on `%s'."),
2557                    cpu_arch_name ? cpu_arch_name : default_arch);
2558     }
2559   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2560     {
2561       if (check)
2562         as_error = as_fatal;
2563       else
2564         as_error = as_bad;
2565       (*as_error) (_("32bit mode not supported on `%s'."),
2566                    cpu_arch_name ? cpu_arch_name : default_arch);
2567     }
2568   stackop_size = '\0';
2569 }
2570
2571 static void
2572 set_code_flag (int value)
2573 {
2574   update_code_flag (value, 0);
2575 }
2576
2577 static void
2578 set_16bit_gcc_code_flag (int new_code_flag)
2579 {
2580   flag_code = (enum flag_code) new_code_flag;
2581   if (flag_code != CODE_16BIT)
2582     abort ();
2583   cpu_arch_flags.bitfield.cpu64 = 0;
2584   cpu_arch_flags.bitfield.cpuno64 = 1;
2585   stackop_size = LONG_MNEM_SUFFIX;
2586 }
2587
2588 static void
2589 set_intel_syntax (int syntax_flag)
2590 {
2591   /* Find out if register prefixing is specified.  */
2592   int ask_naked_reg = 0;
2593
2594   SKIP_WHITESPACE ();
2595   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2596     {
2597       char *string;
2598       int e = get_symbol_name (&string);
2599
2600       if (strcmp (string, "prefix") == 0)
2601         ask_naked_reg = 1;
2602       else if (strcmp (string, "noprefix") == 0)
2603         ask_naked_reg = -1;
2604       else
2605         as_bad (_("bad argument to syntax directive."));
2606       (void) restore_line_pointer (e);
2607     }
2608   demand_empty_rest_of_line ();
2609
2610   intel_syntax = syntax_flag;
2611
2612   if (ask_naked_reg == 0)
2613     allow_naked_reg = (intel_syntax
2614                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2615   else
2616     allow_naked_reg = (ask_naked_reg < 0);
2617
2618   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2619
2620   register_prefix = allow_naked_reg ? "" : "%";
2621 }
2622
2623 static void
2624 set_intel_mnemonic (int mnemonic_flag)
2625 {
2626   intel_mnemonic = mnemonic_flag;
2627 }
2628
2629 static void
2630 set_allow_index_reg (int flag)
2631 {
2632   allow_index_reg = flag;
2633 }
2634
2635 static void
2636 set_check (int what)
2637 {
2638   enum check_kind *kind;
2639   const char *str;
2640
2641   if (what)
2642     {
2643       kind = &operand_check;
2644       str = "operand";
2645     }
2646   else
2647     {
2648       kind = &sse_check;
2649       str = "sse";
2650     }
2651
2652   SKIP_WHITESPACE ();
2653
2654   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2655     {
2656       char *string;
2657       int e = get_symbol_name (&string);
2658
2659       if (strcmp (string, "none") == 0)
2660         *kind = check_none;
2661       else if (strcmp (string, "warning") == 0)
2662         *kind = check_warning;
2663       else if (strcmp (string, "error") == 0)
2664         *kind = check_error;
2665       else
2666         as_bad (_("bad argument to %s_check directive."), str);
2667       (void) restore_line_pointer (e);
2668     }
2669   else
2670     as_bad (_("missing argument for %s_check directive"), str);
2671
2672   demand_empty_rest_of_line ();
2673 }
2674
2675 static void
2676 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2677                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2678 {
2679 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2680   static const char *arch;
2681
2682   /* Intel MCU is only supported on ELF.  */
2683   if (!IS_ELF)
2684     return;
2685
2686   if (!arch)
2687     {
2688       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2689          use default_arch.  */
2690       arch = cpu_arch_name;
2691       if (!arch)
2692         arch = default_arch;
2693     }
2694
2695   /* If we are targeting Intel MCU, we must enable it.  */
2696   if ((get_elf_backend_data (stdoutput)->elf_machine_code == EM_IAMCU)
2697       == new_flag.bitfield.cpuiamcu)
2698     return;
2699
2700   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2701 #endif
2702 }
2703
2704 static void
2705 extend_cpu_sub_arch_name (const char *name)
2706 {
2707   if (cpu_sub_arch_name)
2708     cpu_sub_arch_name = reconcat (cpu_sub_arch_name, cpu_sub_arch_name,
2709                                   ".", name, (const char *) NULL);
2710   else
2711     cpu_sub_arch_name = concat (".", name, (const char *) NULL);
2712 }
2713
2714 static void
2715 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2716 {
2717   typedef struct arch_stack_entry
2718   {
2719     const struct arch_stack_entry *prev;
2720     const char *name;
2721     char *sub_name;
2722     i386_cpu_flags flags;
2723     i386_cpu_flags isa_flags;
2724     enum processor_type isa;
2725     enum flag_code flag_code;
2726     char stackop_size;
2727     bool no_cond_jump_promotion;
2728   } arch_stack_entry;
2729   static const arch_stack_entry *arch_stack_top;
2730
2731   SKIP_WHITESPACE ();
2732
2733   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2734     {
2735       char *s;
2736       int e = get_symbol_name (&s);
2737       const char *string = s;
2738       unsigned int j = 0;
2739       i386_cpu_flags flags;
2740
2741       if (strcmp (string, "default") == 0)
2742         {
2743           if (strcmp (default_arch, "iamcu") == 0)
2744             string = default_arch;
2745           else
2746             {
2747               static const i386_cpu_flags cpu_unknown_flags = CPU_UNKNOWN_FLAGS;
2748
2749               cpu_arch_name = NULL;
2750               free (cpu_sub_arch_name);
2751               cpu_sub_arch_name = NULL;
2752               cpu_arch_flags = cpu_unknown_flags;
2753               if (flag_code == CODE_64BIT)
2754                 {
2755                   cpu_arch_flags.bitfield.cpu64 = 1;
2756                   cpu_arch_flags.bitfield.cpuno64 = 0;
2757                 }
2758               else
2759                 {
2760                   cpu_arch_flags.bitfield.cpu64 = 0;
2761                   cpu_arch_flags.bitfield.cpuno64 = 1;
2762                 }
2763               cpu_arch_isa = PROCESSOR_UNKNOWN;
2764               cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
2765               if (!cpu_arch_tune_set)
2766                 {
2767                   cpu_arch_tune = cpu_arch_isa;
2768                   cpu_arch_tune_flags = cpu_arch_isa_flags;
2769                 }
2770
2771               j = ARRAY_SIZE (cpu_arch) + 1;
2772             }
2773         }
2774       else if (strcmp (string, "push") == 0)
2775         {
2776           arch_stack_entry *top = XNEW (arch_stack_entry);
2777
2778           top->name = cpu_arch_name;
2779           if (cpu_sub_arch_name)
2780             top->sub_name = xstrdup (cpu_sub_arch_name);
2781           else
2782             top->sub_name = NULL;
2783           top->flags = cpu_arch_flags;
2784           top->isa = cpu_arch_isa;
2785           top->isa_flags = cpu_arch_isa_flags;
2786           top->flag_code = flag_code;
2787           top->stackop_size = stackop_size;
2788           top->no_cond_jump_promotion = no_cond_jump_promotion;
2789
2790           top->prev = arch_stack_top;
2791           arch_stack_top = top;
2792
2793           (void) restore_line_pointer (e);
2794           demand_empty_rest_of_line ();
2795           return;
2796         }
2797       else if (strcmp (string, "pop") == 0)
2798         {
2799           const arch_stack_entry *top = arch_stack_top;
2800
2801           if (!top)
2802             as_bad (_(".arch stack is empty"));
2803           else if (top->flag_code != flag_code
2804                    || top->stackop_size != stackop_size)
2805             {
2806               static const unsigned int bits[] = {
2807                 [CODE_16BIT] = 16,
2808                 [CODE_32BIT] = 32,
2809                 [CODE_64BIT] = 64,
2810               };
2811
2812               as_bad (_("this `.arch pop' requires `.code%u%s' to be in effect"),
2813                       bits[top->flag_code],
2814                       top->stackop_size == LONG_MNEM_SUFFIX ? "gcc" : "");
2815             }
2816           else
2817             {
2818               arch_stack_top = top->prev;
2819
2820               cpu_arch_name = top->name;
2821               free (cpu_sub_arch_name);
2822               cpu_sub_arch_name = top->sub_name;
2823               cpu_arch_flags = top->flags;
2824               cpu_arch_isa = top->isa;
2825               cpu_arch_isa_flags = top->isa_flags;
2826               no_cond_jump_promotion = top->no_cond_jump_promotion;
2827
2828               XDELETE (top);
2829             }
2830
2831           (void) restore_line_pointer (e);
2832           demand_empty_rest_of_line ();
2833           return;
2834         }
2835
2836       for (; j < ARRAY_SIZE (cpu_arch); j++)
2837         {
2838           if (strcmp (string + (*string == '.'), cpu_arch[j].name) == 0
2839              && (*string == '.') == (cpu_arch[j].type == PROCESSOR_NONE))
2840             {
2841               if (*string != '.')
2842                 {
2843                   check_cpu_arch_compatible (string, cpu_arch[j].enable);
2844
2845                   cpu_arch_name = cpu_arch[j].name;
2846                   free (cpu_sub_arch_name);
2847                   cpu_sub_arch_name = NULL;
2848                   cpu_arch_flags = cpu_arch[j].enable;
2849                   if (flag_code == CODE_64BIT)
2850                     {
2851                       cpu_arch_flags.bitfield.cpu64 = 1;
2852                       cpu_arch_flags.bitfield.cpuno64 = 0;
2853                     }
2854                   else
2855                     {
2856                       cpu_arch_flags.bitfield.cpu64 = 0;
2857                       cpu_arch_flags.bitfield.cpuno64 = 1;
2858                     }
2859                   cpu_arch_isa = cpu_arch[j].type;
2860                   cpu_arch_isa_flags = cpu_arch[j].enable;
2861                   if (!cpu_arch_tune_set)
2862                     {
2863                       cpu_arch_tune = cpu_arch_isa;
2864                       cpu_arch_tune_flags = cpu_arch_isa_flags;
2865                     }
2866                   pre_386_16bit_warned = false;
2867                   break;
2868                 }
2869
2870               if (cpu_flags_all_zero (&cpu_arch[j].enable))
2871                 continue;
2872
2873               flags = cpu_flags_or (cpu_arch_flags,
2874                                     cpu_arch[j].enable);
2875
2876               if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2877                 {
2878                   extend_cpu_sub_arch_name (string + 1);
2879                   cpu_arch_flags = flags;
2880                   cpu_arch_isa_flags = flags;
2881                 }
2882               else
2883                 cpu_arch_isa_flags
2884                   = cpu_flags_or (cpu_arch_isa_flags,
2885                                   cpu_arch[j].enable);
2886               (void) restore_line_pointer (e);
2887               demand_empty_rest_of_line ();
2888               return;
2889             }
2890         }
2891
2892       if (startswith (string, ".no") && j >= ARRAY_SIZE (cpu_arch))
2893         {
2894           /* Disable an ISA extension.  */
2895           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
2896             if (cpu_arch[j].type == PROCESSOR_NONE
2897                 && strcmp (string + 3, cpu_arch[j].name) == 0)
2898               {
2899                 flags = cpu_flags_and_not (cpu_arch_flags,
2900                                            cpu_arch[j].disable);
2901                 if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2902                   {
2903                     extend_cpu_sub_arch_name (string + 1);
2904                     cpu_arch_flags = flags;
2905                     cpu_arch_isa_flags = flags;
2906                   }
2907                 (void) restore_line_pointer (e);
2908                 demand_empty_rest_of_line ();
2909                 return;
2910               }
2911         }
2912
2913       if (j == ARRAY_SIZE (cpu_arch))
2914         as_bad (_("no such architecture: `%s'"), string);
2915
2916       *input_line_pointer = e;
2917     }
2918   else
2919     as_bad (_("missing cpu architecture"));
2920
2921   no_cond_jump_promotion = 0;
2922   if (*input_line_pointer == ','
2923       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
2924     {
2925       char *string;
2926       char e;
2927
2928       ++input_line_pointer;
2929       e = get_symbol_name (&string);
2930
2931       if (strcmp (string, "nojumps") == 0)
2932         no_cond_jump_promotion = 1;
2933       else if (strcmp (string, "jumps") == 0)
2934         ;
2935       else
2936         as_bad (_("no such architecture modifier: `%s'"), string);
2937
2938       (void) restore_line_pointer (e);
2939     }
2940
2941   demand_empty_rest_of_line ();
2942 }
2943
2944 enum bfd_architecture
2945 i386_arch (void)
2946 {
2947   if (cpu_arch_isa == PROCESSOR_IAMCU)
2948     {
2949       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2950           || flag_code == CODE_64BIT)
2951         as_fatal (_("Intel MCU is 32bit ELF only"));
2952       return bfd_arch_iamcu;
2953     }
2954   else
2955     return bfd_arch_i386;
2956 }
2957
2958 unsigned long
2959 i386_mach (void)
2960 {
2961   if (startswith (default_arch, "x86_64"))
2962     {
2963       if (default_arch[6] == '\0')
2964         return bfd_mach_x86_64;
2965       else
2966         return bfd_mach_x64_32;
2967     }
2968   else if (!strcmp (default_arch, "i386")
2969            || !strcmp (default_arch, "iamcu"))
2970     {
2971       if (cpu_arch_isa == PROCESSOR_IAMCU)
2972         {
2973           if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
2974             as_fatal (_("Intel MCU is 32bit ELF only"));
2975           return bfd_mach_i386_iamcu;
2976         }
2977       else
2978         return bfd_mach_i386_i386;
2979     }
2980   else
2981     as_fatal (_("unknown architecture"));
2982 }
2983 \f
2984 #include "opcodes/i386-tbl.h"
2985
2986 void
2987 md_begin (void)
2988 {
2989   /* Support pseudo prefixes like {disp32}.  */
2990   lex_type ['{'] = LEX_BEGIN_NAME;
2991
2992   /* Initialize op_hash hash table.  */
2993   op_hash = str_htab_create ();
2994
2995   {
2996     const insn_template *const *sets = i386_op_sets;
2997     const insn_template *const *end = sets + ARRAY_SIZE (i386_op_sets) - 1;
2998
2999     /* Type checks to compensate for the conversion through void * which
3000        occurs during hash table insertion / lookup.  */
3001     (void) sizeof (sets == &current_templates->start);
3002     (void) sizeof (end == &current_templates->end);
3003     for (; sets < end; ++sets)
3004       if (str_hash_insert (op_hash, insn_name (*sets), sets, 0))
3005         as_fatal (_("duplicate %s"), insn_name (*sets));
3006   }
3007
3008   /* Initialize reg_hash hash table.  */
3009   reg_hash = str_htab_create ();
3010   {
3011     const reg_entry *regtab;
3012     unsigned int regtab_size = i386_regtab_size;
3013
3014     for (regtab = i386_regtab; regtab_size--; regtab++)
3015       {
3016         switch (regtab->reg_type.bitfield.class)
3017           {
3018           case Reg:
3019             if (regtab->reg_type.bitfield.dword)
3020               {
3021                 if (regtab->reg_type.bitfield.instance == Accum)
3022                   reg_eax = regtab;
3023               }
3024             else if (regtab->reg_type.bitfield.tbyte)
3025               {
3026                 /* There's no point inserting st(<N>) in the hash table, as
3027                    parentheses aren't included in register_chars[] anyway.  */
3028                 if (regtab->reg_type.bitfield.instance != Accum)
3029                   continue;
3030                 reg_st0 = regtab;
3031               }
3032             break;
3033
3034           case SReg:
3035             switch (regtab->reg_num)
3036               {
3037               case 0: reg_es = regtab; break;
3038               case 2: reg_ss = regtab; break;
3039               case 3: reg_ds = regtab; break;
3040               }
3041             break;
3042
3043           case RegMask:
3044             if (!regtab->reg_num)
3045               reg_k0 = regtab;
3046             break;
3047           }
3048
3049         if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3050           as_fatal (_("duplicate %s"), regtab->reg_name);
3051       }
3052   }
3053
3054   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3055   {
3056     int c;
3057     char *p;
3058
3059     for (c = 0; c < 256; c++)
3060       {
3061         if (ISDIGIT (c) || ISLOWER (c))
3062           {
3063             mnemonic_chars[c] = c;
3064             register_chars[c] = c;
3065             operand_chars[c] = c;
3066           }
3067         else if (ISUPPER (c))
3068           {
3069             mnemonic_chars[c] = TOLOWER (c);
3070             register_chars[c] = mnemonic_chars[c];
3071             operand_chars[c] = c;
3072           }
3073         else if (c == '{' || c == '}')
3074           {
3075             mnemonic_chars[c] = c;
3076             operand_chars[c] = c;
3077           }
3078 #ifdef SVR4_COMMENT_CHARS
3079         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3080           operand_chars[c] = c;
3081 #endif
3082
3083         if (c >= 128)
3084           operand_chars[c] = c;
3085       }
3086
3087 #ifdef LEX_QM
3088     operand_chars['?'] = '?';
3089 #endif
3090     mnemonic_chars['_'] = '_';
3091     mnemonic_chars['-'] = '-';
3092     mnemonic_chars['.'] = '.';
3093
3094     for (p = operand_special_chars; *p != '\0'; p++)
3095       operand_chars[(unsigned char) *p] = *p;
3096   }
3097
3098   if (flag_code == CODE_64BIT)
3099     {
3100 #if defined (OBJ_COFF) && defined (TE_PE)
3101       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3102                                   ? 32 : 16);
3103 #else
3104       x86_dwarf2_return_column = 16;
3105 #endif
3106       x86_cie_data_alignment = -8;
3107 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3108       x86_sframe_cfa_sp_reg = 7;
3109       x86_sframe_cfa_fp_reg = 6;
3110 #endif
3111     }
3112   else
3113     {
3114       x86_dwarf2_return_column = 8;
3115       x86_cie_data_alignment = -4;
3116     }
3117
3118   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3119      can be turned into BRANCH_PREFIX frag.  */
3120   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3121     abort ();
3122 }
3123
3124 void
3125 i386_print_statistics (FILE *file)
3126 {
3127   htab_print_statistics (file, "i386 opcode", op_hash);
3128   htab_print_statistics (file, "i386 register", reg_hash);
3129 }
3130
3131 void
3132 i386_md_end (void)
3133 {
3134   htab_delete (op_hash);
3135   htab_delete (reg_hash);
3136 }
3137 \f
3138 #ifdef DEBUG386
3139
3140 /* Debugging routines for md_assemble.  */
3141 static void pte (insn_template *);
3142 static void pt (i386_operand_type);
3143 static void pe (expressionS *);
3144 static void ps (symbolS *);
3145
3146 static void
3147 pi (const char *line, i386_insn *x)
3148 {
3149   unsigned int j;
3150
3151   fprintf (stdout, "%s: template ", line);
3152   pte (&x->tm);
3153   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3154            x->base_reg ? x->base_reg->reg_name : "none",
3155            x->index_reg ? x->index_reg->reg_name : "none",
3156            x->log2_scale_factor);
3157   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3158            x->rm.mode, x->rm.reg, x->rm.regmem);
3159   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3160            x->sib.base, x->sib.index, x->sib.scale);
3161   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3162            (x->rex & REX_W) != 0,
3163            (x->rex & REX_R) != 0,
3164            (x->rex & REX_X) != 0,
3165            (x->rex & REX_B) != 0);
3166   for (j = 0; j < x->operands; j++)
3167     {
3168       fprintf (stdout, "    #%d:  ", j + 1);
3169       pt (x->types[j]);
3170       fprintf (stdout, "\n");
3171       if (x->types[j].bitfield.class == Reg
3172           || x->types[j].bitfield.class == RegMMX
3173           || x->types[j].bitfield.class == RegSIMD
3174           || x->types[j].bitfield.class == RegMask
3175           || x->types[j].bitfield.class == SReg
3176           || x->types[j].bitfield.class == RegCR
3177           || x->types[j].bitfield.class == RegDR
3178           || x->types[j].bitfield.class == RegTR
3179           || x->types[j].bitfield.class == RegBND)
3180         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3181       if (operand_type_check (x->types[j], imm))
3182         pe (x->op[j].imms);
3183       if (operand_type_check (x->types[j], disp))
3184         pe (x->op[j].disps);
3185     }
3186 }
3187
3188 static void
3189 pte (insn_template *t)
3190 {
3191   static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
3192   static const char *const opc_spc[] = {
3193     NULL, "0f", "0f38", "0f3a", NULL, "evexmap5", "evexmap6", NULL,
3194     "XOP08", "XOP09", "XOP0A",
3195   };
3196   unsigned int j;
3197
3198   fprintf (stdout, " %d operands ", t->operands);
3199   if (opc_pfx[t->opcode_modifier.opcodeprefix])
3200     fprintf (stdout, "pfx %x ", opc_pfx[t->opcode_modifier.opcodeprefix]);
3201   if (opc_spc[t->opcode_space])
3202     fprintf (stdout, "space %s ", opc_spc[t->opcode_space]);
3203   fprintf (stdout, "opcode %x ", t->base_opcode);
3204   if (t->extension_opcode != None)
3205     fprintf (stdout, "ext %x ", t->extension_opcode);
3206   if (t->opcode_modifier.d)
3207     fprintf (stdout, "D");
3208   if (t->opcode_modifier.w)
3209     fprintf (stdout, "W");
3210   fprintf (stdout, "\n");
3211   for (j = 0; j < t->operands; j++)
3212     {
3213       fprintf (stdout, "    #%d type ", j + 1);
3214       pt (t->operand_types[j]);
3215       fprintf (stdout, "\n");
3216     }
3217 }
3218
3219 static void
3220 pe (expressionS *e)
3221 {
3222   fprintf (stdout, "    operation     %d\n", e->X_op);
3223   fprintf (stdout, "    add_number    %" PRId64 " (%" PRIx64 ")\n",
3224            (int64_t) e->X_add_number, (uint64_t) (valueT) e->X_add_number);
3225   if (e->X_add_symbol)
3226     {
3227       fprintf (stdout, "    add_symbol    ");
3228       ps (e->X_add_symbol);
3229       fprintf (stdout, "\n");
3230     }
3231   if (e->X_op_symbol)
3232     {
3233       fprintf (stdout, "    op_symbol    ");
3234       ps (e->X_op_symbol);
3235       fprintf (stdout, "\n");
3236     }
3237 }
3238
3239 static void
3240 ps (symbolS *s)
3241 {
3242   fprintf (stdout, "%s type %s%s",
3243            S_GET_NAME (s),
3244            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3245            segment_name (S_GET_SEGMENT (s)));
3246 }
3247
3248 static struct type_name
3249   {
3250     i386_operand_type mask;
3251     const char *name;
3252   }
3253 const type_names[] =
3254 {
3255   { { .bitfield = { .class = Reg, .byte = 1 } }, "r8" },
3256   { { .bitfield = { .class = Reg, .word = 1 } }, "r16" },
3257   { { .bitfield = { .class = Reg, .dword = 1 } }, "r32" },
3258   { { .bitfield = { .class = Reg, .qword = 1 } }, "r64" },
3259   { { .bitfield = { .instance = Accum, .byte = 1 } }, "acc8" },
3260   { { .bitfield = { .instance = Accum, .word = 1 } }, "acc16" },
3261   { { .bitfield = { .instance = Accum, .dword = 1 } }, "acc32" },
3262   { { .bitfield = { .instance = Accum, .qword = 1 } }, "acc64" },
3263   { { .bitfield = { .imm8 = 1 } }, "i8" },
3264   { { .bitfield = { .imm8s = 1 } }, "i8s" },
3265   { { .bitfield = { .imm16 = 1 } }, "i16" },
3266   { { .bitfield = { .imm32 = 1 } }, "i32" },
3267   { { .bitfield = { .imm32s = 1 } }, "i32s" },
3268   { { .bitfield = { .imm64 = 1 } }, "i64" },
3269   { { .bitfield = { .imm1 = 1 } }, "i1" },
3270   { { .bitfield = { .baseindex = 1 } }, "BaseIndex" },
3271   { { .bitfield = { .disp8 = 1 } }, "d8" },
3272   { { .bitfield = { .disp16 = 1 } }, "d16" },
3273   { { .bitfield = { .disp32 = 1 } }, "d32" },
3274   { { .bitfield = { .disp64 = 1 } }, "d64" },
3275   { { .bitfield = { .instance = RegD, .word = 1 } }, "InOutPortReg" },
3276   { { .bitfield = { .instance = RegC, .byte = 1 } }, "ShiftCount" },
3277   { { .bitfield = { .class = RegCR } }, "control reg" },
3278   { { .bitfield = { .class = RegTR } }, "test reg" },
3279   { { .bitfield = { .class = RegDR } }, "debug reg" },
3280   { { .bitfield = { .class = Reg, .tbyte = 1 } }, "FReg" },
3281   { { .bitfield = { .instance = Accum, .tbyte = 1 } }, "FAcc" },
3282   { { .bitfield = { .class = SReg } }, "SReg" },
3283   { { .bitfield = { .class = RegMMX } }, "rMMX" },
3284   { { .bitfield = { .class = RegSIMD, .xmmword = 1 } }, "rXMM" },
3285   { { .bitfield = { .class = RegSIMD, .ymmword = 1 } }, "rYMM" },
3286   { { .bitfield = { .class = RegSIMD, .zmmword = 1 } }, "rZMM" },
3287   { { .bitfield = { .class = RegSIMD, .tmmword = 1 } }, "rTMM" },
3288   { { .bitfield = { .class = RegMask } }, "Mask reg" },
3289 };
3290
3291 static void
3292 pt (i386_operand_type t)
3293 {
3294   unsigned int j;
3295   i386_operand_type a;
3296
3297   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3298     {
3299       a = operand_type_and (t, type_names[j].mask);
3300       if (operand_type_equal (&a, &type_names[j].mask))
3301         fprintf (stdout, "%s, ",  type_names[j].name);
3302     }
3303   fflush (stdout);
3304 }
3305
3306 #endif /* DEBUG386 */
3307 \f
3308 static bfd_reloc_code_real_type
3309 reloc (unsigned int size,
3310        int pcrel,
3311        int sign,
3312        bfd_reloc_code_real_type other)
3313 {
3314   if (other != NO_RELOC)
3315     {
3316       reloc_howto_type *rel;
3317
3318       if (size == 8)
3319         switch (other)
3320           {
3321           case BFD_RELOC_X86_64_GOT32:
3322             return BFD_RELOC_X86_64_GOT64;
3323             break;
3324           case BFD_RELOC_X86_64_GOTPLT64:
3325             return BFD_RELOC_X86_64_GOTPLT64;
3326             break;
3327           case BFD_RELOC_X86_64_PLTOFF64:
3328             return BFD_RELOC_X86_64_PLTOFF64;
3329             break;
3330           case BFD_RELOC_X86_64_GOTPC32:
3331             other = BFD_RELOC_X86_64_GOTPC64;
3332             break;
3333           case BFD_RELOC_X86_64_GOTPCREL:
3334             other = BFD_RELOC_X86_64_GOTPCREL64;
3335             break;
3336           case BFD_RELOC_X86_64_TPOFF32:
3337             other = BFD_RELOC_X86_64_TPOFF64;
3338             break;
3339           case BFD_RELOC_X86_64_DTPOFF32:
3340             other = BFD_RELOC_X86_64_DTPOFF64;
3341             break;
3342           default:
3343             break;
3344           }
3345
3346 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3347       if (other == BFD_RELOC_SIZE32)
3348         {
3349           if (size == 8)
3350             other = BFD_RELOC_SIZE64;
3351           if (pcrel)
3352             {
3353               as_bad (_("there are no pc-relative size relocations"));
3354               return NO_RELOC;
3355             }
3356         }
3357 #endif
3358
3359       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3360       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3361         sign = -1;
3362
3363       rel = bfd_reloc_type_lookup (stdoutput, other);
3364       if (!rel)
3365         as_bad (_("unknown relocation (%u)"), other);
3366       else if (size != bfd_get_reloc_size (rel))
3367         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3368                 bfd_get_reloc_size (rel),
3369                 size);
3370       else if (pcrel && !rel->pc_relative)
3371         as_bad (_("non-pc-relative relocation for pc-relative field"));
3372       else if ((rel->complain_on_overflow == complain_overflow_signed
3373                 && !sign)
3374                || (rel->complain_on_overflow == complain_overflow_unsigned
3375                    && sign > 0))
3376         as_bad (_("relocated field and relocation type differ in signedness"));
3377       else
3378         return other;
3379       return NO_RELOC;
3380     }
3381
3382   if (pcrel)
3383     {
3384       if (!sign)
3385         as_bad (_("there are no unsigned pc-relative relocations"));
3386       switch (size)
3387         {
3388         case 1: return BFD_RELOC_8_PCREL;
3389         case 2: return BFD_RELOC_16_PCREL;
3390         case 4: return BFD_RELOC_32_PCREL;
3391         case 8: return BFD_RELOC_64_PCREL;
3392         }
3393       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3394     }
3395   else
3396     {
3397       if (sign > 0)
3398         switch (size)
3399           {
3400           case 4: return BFD_RELOC_X86_64_32S;
3401           }
3402       else
3403         switch (size)
3404           {
3405           case 1: return BFD_RELOC_8;
3406           case 2: return BFD_RELOC_16;
3407           case 4: return BFD_RELOC_32;
3408           case 8: return BFD_RELOC_64;
3409           }
3410       as_bad (_("cannot do %s %u byte relocation"),
3411               sign > 0 ? "signed" : "unsigned", size);
3412     }
3413
3414   return NO_RELOC;
3415 }
3416
3417 /* Here we decide which fixups can be adjusted to make them relative to
3418    the beginning of the section instead of the symbol.  Basically we need
3419    to make sure that the dynamic relocations are done correctly, so in
3420    some cases we force the original symbol to be used.  */
3421
3422 int
3423 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3424 {
3425 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3426   if (!IS_ELF)
3427     return 1;
3428
3429   /* Don't adjust pc-relative references to merge sections in 64-bit
3430      mode.  */
3431   if (use_rela_relocations
3432       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3433       && fixP->fx_pcrel)
3434     return 0;
3435
3436   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3437      and changed later by validate_fix.  */
3438   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3439       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3440     return 0;
3441
3442   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3443      for size relocations.  */
3444   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3445       || fixP->fx_r_type == BFD_RELOC_SIZE64
3446       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3447       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3448       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3449       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3450       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3451       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3452       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3453       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3454       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3455       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3456       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3457       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3458       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3459       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3460       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3461       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3462       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3463       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3464       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3465       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3466       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3467       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3468       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3469       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3470       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3471       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3472       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3473       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3474       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3475     return 0;
3476 #endif
3477   return 1;
3478 }
3479
3480 static INLINE bool
3481 want_disp32 (const insn_template *t)
3482 {
3483   return flag_code != CODE_64BIT
3484          || i.prefix[ADDR_PREFIX]
3485          || (t->mnem_off == MN_lea
3486              && (!i.types[1].bitfield.qword
3487                 || t->opcode_modifier.size == SIZE32));
3488 }
3489
3490 static int
3491 intel_float_operand (const char *mnemonic)
3492 {
3493   /* Note that the value returned is meaningful only for opcodes with (memory)
3494      operands, hence the code here is free to improperly handle opcodes that
3495      have no operands (for better performance and smaller code). */
3496
3497   if (mnemonic[0] != 'f')
3498     return 0; /* non-math */
3499
3500   switch (mnemonic[1])
3501     {
3502     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3503        the fs segment override prefix not currently handled because no
3504        call path can make opcodes without operands get here */
3505     case 'i':
3506       return 2 /* integer op */;
3507     case 'l':
3508       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3509         return 3; /* fldcw/fldenv */
3510       break;
3511     case 'n':
3512       if (mnemonic[2] != 'o' /* fnop */)
3513         return 3; /* non-waiting control op */
3514       break;
3515     case 'r':
3516       if (mnemonic[2] == 's')
3517         return 3; /* frstor/frstpm */
3518       break;
3519     case 's':
3520       if (mnemonic[2] == 'a')
3521         return 3; /* fsave */
3522       if (mnemonic[2] == 't')
3523         {
3524           switch (mnemonic[3])
3525             {
3526             case 'c': /* fstcw */
3527             case 'd': /* fstdw */
3528             case 'e': /* fstenv */
3529             case 's': /* fsts[gw] */
3530               return 3;
3531             }
3532         }
3533       break;
3534     case 'x':
3535       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3536         return 0; /* fxsave/fxrstor are not really math ops */
3537       break;
3538     }
3539
3540   return 1;
3541 }
3542
3543 static INLINE void
3544 install_template (const insn_template *t)
3545 {
3546   unsigned int l;
3547
3548   i.tm = *t;
3549
3550   /* Note that for pseudo prefixes this produces a length of 1. But for them
3551      the length isn't interesting at all.  */
3552   for (l = 1; l < 4; ++l)
3553     if (!(t->base_opcode >> (8 * l)))
3554       break;
3555
3556   i.opcode_length = l;
3557 }
3558
3559 /* Build the VEX prefix.  */
3560
3561 static void
3562 build_vex_prefix (const insn_template *t)
3563 {
3564   unsigned int register_specifier;
3565   unsigned int vector_length;
3566   unsigned int w;
3567
3568   /* Check register specifier.  */
3569   if (i.vex.register_specifier)
3570     {
3571       register_specifier =
3572         ~register_number (i.vex.register_specifier) & 0xf;
3573       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3574     }
3575   else
3576     register_specifier = 0xf;
3577
3578   /* Use 2-byte VEX prefix by swapping destination and source operand
3579      if there are more than 1 register operand.  */
3580   if (i.reg_operands > 1
3581       && i.vec_encoding != vex_encoding_vex3
3582       && i.dir_encoding == dir_encoding_default
3583       && i.operands == i.reg_operands
3584       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3585       && i.tm.opcode_space == SPACE_0F
3586       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3587       && i.rex == REX_B)
3588     {
3589       unsigned int xchg;
3590
3591       swap_2_operands (0, i.operands - 1);
3592
3593       gas_assert (i.rm.mode == 3);
3594
3595       i.rex = REX_R;
3596       xchg = i.rm.regmem;
3597       i.rm.regmem = i.rm.reg;
3598       i.rm.reg = xchg;
3599
3600       if (i.tm.opcode_modifier.d)
3601         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3602                             ? Opcode_ExtD : Opcode_SIMD_IntD;
3603       else /* Use the next insn.  */
3604         install_template (&t[1]);
3605     }
3606
3607   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3608      are no memory operands and at least 3 register ones.  */
3609   if (i.reg_operands >= 3
3610       && i.vec_encoding != vex_encoding_vex3
3611       && i.reg_operands == i.operands - i.imm_operands
3612       && i.tm.opcode_modifier.vex
3613       && i.tm.opcode_modifier.commutative
3614       && (i.tm.opcode_modifier.sse2avx
3615           || (optimize > 1 && !i.no_optimize))
3616       && i.rex == REX_B
3617       && i.vex.register_specifier
3618       && !(i.vex.register_specifier->reg_flags & RegRex))
3619     {
3620       unsigned int xchg = i.operands - i.reg_operands;
3621
3622       gas_assert (i.tm.opcode_space == SPACE_0F);
3623       gas_assert (!i.tm.opcode_modifier.sae);
3624       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3625                                       &i.types[i.operands - 3]));
3626       gas_assert (i.rm.mode == 3);
3627
3628       swap_2_operands (xchg, xchg + 1);
3629
3630       i.rex = 0;
3631       xchg = i.rm.regmem | 8;
3632       i.rm.regmem = ~register_specifier & 0xf;
3633       gas_assert (!(i.rm.regmem & 8));
3634       i.vex.register_specifier += xchg - i.rm.regmem;
3635       register_specifier = ~xchg & 0xf;
3636     }
3637
3638   if (i.tm.opcode_modifier.vex == VEXScalar)
3639     vector_length = avxscalar;
3640   else if (i.tm.opcode_modifier.vex == VEX256)
3641     vector_length = 1;
3642   else if (dot_insn () && i.tm.opcode_modifier.vex == VEX128)
3643     vector_length = 0;
3644   else
3645     {
3646       unsigned int op;
3647
3648       /* Determine vector length from the last multi-length vector
3649          operand.  */
3650       vector_length = 0;
3651       for (op = t->operands; op--;)
3652         if (t->operand_types[op].bitfield.xmmword
3653             && t->operand_types[op].bitfield.ymmword
3654             && i.types[op].bitfield.ymmword)
3655           {
3656             vector_length = 1;
3657             break;
3658           }
3659     }
3660
3661   /* Check the REX.W bit and VEXW.  */
3662   if (i.tm.opcode_modifier.vexw == VEXWIG)
3663     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3664   else if (i.tm.opcode_modifier.vexw)
3665     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3666   else
3667     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3668
3669   /* Use 2-byte VEX prefix if possible.  */
3670   if (w == 0
3671       && i.vec_encoding != vex_encoding_vex3
3672       && i.tm.opcode_space == SPACE_0F
3673       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3674     {
3675       /* 2-byte VEX prefix.  */
3676       unsigned int r;
3677
3678       i.vex.length = 2;
3679       i.vex.bytes[0] = 0xc5;
3680
3681       /* Check the REX.R bit.  */
3682       r = (i.rex & REX_R) ? 0 : 1;
3683       i.vex.bytes[1] = (r << 7
3684                         | register_specifier << 3
3685                         | vector_length << 2
3686                         | i.tm.opcode_modifier.opcodeprefix);
3687     }
3688   else
3689     {
3690       /* 3-byte VEX prefix.  */
3691       i.vex.length = 3;
3692
3693       switch (i.tm.opcode_space)
3694         {
3695         case SPACE_0F:
3696         case SPACE_0F38:
3697         case SPACE_0F3A:
3698           i.vex.bytes[0] = 0xc4;
3699           break;
3700         case SPACE_XOP08:
3701         case SPACE_XOP09:
3702         case SPACE_XOP0A:
3703           i.vex.bytes[0] = 0x8f;
3704           break;
3705         default:
3706           abort ();
3707         }
3708
3709       /* The high 3 bits of the second VEX byte are 1's compliment
3710          of RXB bits from REX.  */
3711       i.vex.bytes[1] = ((~i.rex & 7) << 5)
3712                        | (!dot_insn () ? i.tm.opcode_space
3713                                        : i.insn_opcode_space);
3714
3715       i.vex.bytes[2] = (w << 7
3716                         | register_specifier << 3
3717                         | vector_length << 2
3718                         | i.tm.opcode_modifier.opcodeprefix);
3719     }
3720 }
3721
3722 static INLINE bool
3723 is_evex_encoding (const insn_template *t)
3724 {
3725   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3726          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3727          || t->opcode_modifier.sae;
3728 }
3729
3730 static INLINE bool
3731 is_any_vex_encoding (const insn_template *t)
3732 {
3733   return t->opcode_modifier.vex || is_evex_encoding (t);
3734 }
3735
3736 static unsigned int
3737 get_broadcast_bytes (const insn_template *t, bool diag)
3738 {
3739   unsigned int op, bytes;
3740   const i386_operand_type *types;
3741
3742   if (i.broadcast.type)
3743     return (1 << (t->opcode_modifier.broadcast - 1)) * i.broadcast.type;
3744
3745   gas_assert (intel_syntax);
3746
3747   for (op = 0; op < t->operands; ++op)
3748     if (t->operand_types[op].bitfield.baseindex)
3749       break;
3750
3751   gas_assert (op < t->operands);
3752
3753   if (t->opcode_modifier.evex
3754       && t->opcode_modifier.evex != EVEXDYN)
3755     switch (i.broadcast.bytes)
3756       {
3757       case 1:
3758         if (t->operand_types[op].bitfield.word)
3759           return 2;
3760       /* Fall through.  */
3761       case 2:
3762         if (t->operand_types[op].bitfield.dword)
3763           return 4;
3764       /* Fall through.  */
3765       case 4:
3766         if (t->operand_types[op].bitfield.qword)
3767           return 8;
3768       /* Fall through.  */
3769       case 8:
3770         if (t->operand_types[op].bitfield.xmmword)
3771           return 16;
3772         if (t->operand_types[op].bitfield.ymmword)
3773           return 32;
3774         if (t->operand_types[op].bitfield.zmmword)
3775           return 64;
3776       /* Fall through.  */
3777       default:
3778         abort ();
3779       }
3780
3781   gas_assert (op + 1 < t->operands);
3782
3783   if (t->operand_types[op + 1].bitfield.xmmword
3784       + t->operand_types[op + 1].bitfield.ymmword
3785       + t->operand_types[op + 1].bitfield.zmmword > 1)
3786     {
3787       types = &i.types[op + 1];
3788       diag = false;
3789     }
3790   else /* Ambiguous - guess with a preference to non-AVX512VL forms.  */
3791     types = &t->operand_types[op];
3792
3793   if (types->bitfield.zmmword)
3794     bytes = 64;
3795   else if (types->bitfield.ymmword)
3796     bytes = 32;
3797   else
3798     bytes = 16;
3799
3800   if (diag)
3801     as_warn (_("ambiguous broadcast for `%s', using %u-bit form"),
3802              insn_name (t), bytes * 8);
3803
3804   return bytes;
3805 }
3806
3807 /* Build the EVEX prefix.  */
3808
3809 static void
3810 build_evex_prefix (void)
3811 {
3812   unsigned int register_specifier, w;
3813   rex_byte vrex_used = 0;
3814
3815   /* Check register specifier.  */
3816   if (i.vex.register_specifier)
3817     {
3818       gas_assert ((i.vrex & REX_X) == 0);
3819
3820       register_specifier = i.vex.register_specifier->reg_num;
3821       if ((i.vex.register_specifier->reg_flags & RegRex))
3822         register_specifier += 8;
3823       /* The upper 16 registers are encoded in the fourth byte of the
3824          EVEX prefix.  */
3825       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3826         i.vex.bytes[3] = 0x8;
3827       register_specifier = ~register_specifier & 0xf;
3828     }
3829   else
3830     {
3831       register_specifier = 0xf;
3832
3833       /* Encode upper 16 vector index register in the fourth byte of
3834          the EVEX prefix.  */
3835       if (!(i.vrex & REX_X))
3836         i.vex.bytes[3] = 0x8;
3837       else
3838         vrex_used |= REX_X;
3839     }
3840
3841   /* 4 byte EVEX prefix.  */
3842   i.vex.length = 4;
3843   i.vex.bytes[0] = 0x62;
3844
3845   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3846      bits from REX.  */
3847   gas_assert (i.tm.opcode_space >= SPACE_0F);
3848   gas_assert (i.tm.opcode_space <= SPACE_EVEXMAP6);
3849   i.vex.bytes[1] = ((~i.rex & 7) << 5)
3850                    | (!dot_insn () ? i.tm.opcode_space
3851                                    : i.insn_opcode_space);
3852
3853   /* The fifth bit of the second EVEX byte is 1's compliment of the
3854      REX_R bit in VREX.  */
3855   if (!(i.vrex & REX_R))
3856     i.vex.bytes[1] |= 0x10;
3857   else
3858     vrex_used |= REX_R;
3859
3860   if ((i.reg_operands + i.imm_operands) == i.operands)
3861     {
3862       /* When all operands are registers, the REX_X bit in REX is not
3863          used.  We reuse it to encode the upper 16 registers, which is
3864          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3865          as 1's compliment.  */
3866       if ((i.vrex & REX_B))
3867         {
3868           vrex_used |= REX_B;
3869           i.vex.bytes[1] &= ~0x40;
3870         }
3871     }
3872
3873   /* EVEX instructions shouldn't need the REX prefix.  */
3874   i.vrex &= ~vrex_used;
3875   gas_assert (i.vrex == 0);
3876
3877   /* Check the REX.W bit and VEXW.  */
3878   if (i.tm.opcode_modifier.vexw == VEXWIG)
3879     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
3880   else if (i.tm.opcode_modifier.vexw)
3881     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3882   else
3883     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
3884
3885   /* The third byte of the EVEX prefix.  */
3886   i.vex.bytes[2] = ((w << 7)
3887                     | (register_specifier << 3)
3888                     | 4 /* Encode the U bit.  */
3889                     | i.tm.opcode_modifier.opcodeprefix);
3890
3891   /* The fourth byte of the EVEX prefix.  */
3892   /* The zeroing-masking bit.  */
3893   if (i.mask.reg && i.mask.zeroing)
3894     i.vex.bytes[3] |= 0x80;
3895
3896   /* Don't always set the broadcast bit if there is no RC.  */
3897   if (i.rounding.type == rc_none)
3898     {
3899       /* Encode the vector length.  */
3900       unsigned int vec_length;
3901
3902       if (!i.tm.opcode_modifier.evex
3903           || i.tm.opcode_modifier.evex == EVEXDYN)
3904         {
3905           unsigned int op;
3906
3907           /* Determine vector length from the last multi-length vector
3908              operand.  */
3909           for (op = i.operands; op--;)
3910             if (i.tm.operand_types[op].bitfield.xmmword
3911                 + i.tm.operand_types[op].bitfield.ymmword
3912                 + i.tm.operand_types[op].bitfield.zmmword > 1)
3913               {
3914                 if (i.types[op].bitfield.zmmword)
3915                   {
3916                     i.tm.opcode_modifier.evex = EVEX512;
3917                     break;
3918                   }
3919                 else if (i.types[op].bitfield.ymmword)
3920                   {
3921                     i.tm.opcode_modifier.evex = EVEX256;
3922                     break;
3923                   }
3924                 else if (i.types[op].bitfield.xmmword)
3925                   {
3926                     i.tm.opcode_modifier.evex = EVEX128;
3927                     break;
3928                   }
3929                 else if ((i.broadcast.type || i.broadcast.bytes)
3930                          && op == i.broadcast.operand)
3931                   {
3932                     switch (get_broadcast_bytes (&i.tm, true))
3933                       {
3934                         case 64:
3935                           i.tm.opcode_modifier.evex = EVEX512;
3936                           break;
3937                         case 32:
3938                           i.tm.opcode_modifier.evex = EVEX256;
3939                           break;
3940                         case 16:
3941                           i.tm.opcode_modifier.evex = EVEX128;
3942                           break;
3943                         default:
3944                           abort ();
3945                       }
3946                     break;
3947                   }
3948               }
3949
3950           if (op >= MAX_OPERANDS)
3951             abort ();
3952         }
3953
3954       switch (i.tm.opcode_modifier.evex)
3955         {
3956         case EVEXLIG: /* LL' is ignored */
3957           vec_length = evexlig << 5;
3958           break;
3959         case EVEX128:
3960           vec_length = 0 << 5;
3961           break;
3962         case EVEX256:
3963           vec_length = 1 << 5;
3964           break;
3965         case EVEX512:
3966           vec_length = 2 << 5;
3967           break;
3968         case EVEX_L3:
3969           if (dot_insn ())
3970             {
3971               vec_length = 3 << 5;
3972               break;
3973             }
3974           /* Fall through.  */
3975         default:
3976           abort ();
3977           break;
3978         }
3979       i.vex.bytes[3] |= vec_length;
3980       /* Encode the broadcast bit.  */
3981       if (i.broadcast.type || i.broadcast.bytes)
3982         i.vex.bytes[3] |= 0x10;
3983     }
3984   else if (i.rounding.type != saeonly)
3985     i.vex.bytes[3] |= 0x10 | (i.rounding.type << 5);
3986   else
3987     i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
3988
3989   if (i.mask.reg)
3990     i.vex.bytes[3] |= i.mask.reg->reg_num;
3991 }
3992
3993 static void
3994 process_immext (void)
3995 {
3996   expressionS *exp;
3997
3998   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
3999      which is coded in the same place as an 8-bit immediate field
4000      would be.  Here we fake an 8-bit immediate operand from the
4001      opcode suffix stored in tm.extension_opcode.
4002
4003      AVX instructions also use this encoding, for some of
4004      3 argument instructions.  */
4005
4006   gas_assert (i.imm_operands <= 1
4007               && (i.operands <= 2
4008                   || (is_any_vex_encoding (&i.tm)
4009                       && i.operands <= 4)));
4010
4011   exp = &im_expressions[i.imm_operands++];
4012   i.op[i.operands].imms = exp;
4013   i.types[i.operands].bitfield.imm8 = 1;
4014   i.operands++;
4015   exp->X_op = O_constant;
4016   exp->X_add_number = i.tm.extension_opcode;
4017   i.tm.extension_opcode = None;
4018 }
4019
4020
4021 static int
4022 check_hle (void)
4023 {
4024   switch (i.tm.opcode_modifier.prefixok)
4025     {
4026     default:
4027       abort ();
4028     case PrefixLock:
4029     case PrefixNone:
4030     case PrefixNoTrack:
4031     case PrefixRep:
4032       as_bad (_("invalid instruction `%s' after `%s'"),
4033               insn_name (&i.tm), i.hle_prefix);
4034       return 0;
4035     case PrefixHLELock:
4036       if (i.prefix[LOCK_PREFIX])
4037         return 1;
4038       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4039       return 0;
4040     case PrefixHLEAny:
4041       return 1;
4042     case PrefixHLERelease:
4043       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4044         {
4045           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4046                   insn_name (&i.tm));
4047           return 0;
4048         }
4049       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4050         {
4051           as_bad (_("memory destination needed for instruction `%s'"
4052                     " after `xrelease'"), insn_name (&i.tm));
4053           return 0;
4054         }
4055       return 1;
4056     }
4057 }
4058
4059 /* Encode aligned vector move as unaligned vector move.  */
4060
4061 static void
4062 encode_with_unaligned_vector_move (void)
4063 {
4064   switch (i.tm.base_opcode)
4065     {
4066     case 0x28:  /* Load instructions.  */
4067     case 0x29:  /* Store instructions.  */
4068       /* movaps/movapd/vmovaps/vmovapd.  */
4069       if (i.tm.opcode_space == SPACE_0F
4070           && i.tm.opcode_modifier.opcodeprefix <= PREFIX_0X66)
4071         i.tm.base_opcode = 0x10 | (i.tm.base_opcode & 1);
4072       break;
4073     case 0x6f:  /* Load instructions.  */
4074     case 0x7f:  /* Store instructions.  */
4075       /* movdqa/vmovdqa/vmovdqa64/vmovdqa32. */
4076       if (i.tm.opcode_space == SPACE_0F
4077           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66)
4078         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4079       break;
4080     default:
4081       break;
4082     }
4083 }
4084
4085 /* Try the shortest encoding by shortening operand size.  */
4086
4087 static void
4088 optimize_encoding (void)
4089 {
4090   unsigned int j;
4091
4092   if (i.tm.mnem_off == MN_lea)
4093     {
4094       /* Optimize: -O:
4095            lea symbol, %rN    -> mov $symbol, %rN
4096            lea (%rM), %rN     -> mov %rM, %rN
4097            lea (,%rM,1), %rN  -> mov %rM, %rN
4098
4099            and in 32-bit mode for 16-bit addressing
4100
4101            lea (%rM), %rN     -> movzx %rM, %rN
4102
4103            and in 64-bit mode zap 32-bit addressing in favor of using a
4104            32-bit (or less) destination.
4105        */
4106       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4107         {
4108           if (!i.op[1].regs->reg_type.bitfield.word)
4109             i.tm.opcode_modifier.size = SIZE32;
4110           i.prefix[ADDR_PREFIX] = 0;
4111         }
4112
4113       if (!i.index_reg && !i.base_reg)
4114         {
4115           /* Handle:
4116                lea symbol, %rN    -> mov $symbol, %rN
4117            */
4118           if (flag_code == CODE_64BIT)
4119             {
4120               /* Don't transform a relocation to a 16-bit one.  */
4121               if (i.op[0].disps
4122                   && i.op[0].disps->X_op != O_constant
4123                   && i.op[1].regs->reg_type.bitfield.word)
4124                 return;
4125
4126               if (!i.op[1].regs->reg_type.bitfield.qword
4127                   || i.tm.opcode_modifier.size == SIZE32)
4128                 {
4129                   i.tm.base_opcode = 0xb8;
4130                   i.tm.opcode_modifier.modrm = 0;
4131                   if (!i.op[1].regs->reg_type.bitfield.word)
4132                     i.types[0].bitfield.imm32 = 1;
4133                   else
4134                     {
4135                       i.tm.opcode_modifier.size = SIZE16;
4136                       i.types[0].bitfield.imm16 = 1;
4137                     }
4138                 }
4139               else
4140                 {
4141                   /* Subject to further optimization below.  */
4142                   i.tm.base_opcode = 0xc7;
4143                   i.tm.extension_opcode = 0;
4144                   i.types[0].bitfield.imm32s = 1;
4145                   i.types[0].bitfield.baseindex = 0;
4146                 }
4147             }
4148           /* Outside of 64-bit mode address and operand sizes have to match if
4149              a relocation is involved, as otherwise we wouldn't (currently) or
4150              even couldn't express the relocation correctly.  */
4151           else if (i.op[0].disps
4152                    && i.op[0].disps->X_op != O_constant
4153                    && ((!i.prefix[ADDR_PREFIX])
4154                        != (flag_code == CODE_32BIT
4155                            ? i.op[1].regs->reg_type.bitfield.dword
4156                            : i.op[1].regs->reg_type.bitfield.word)))
4157             return;
4158           /* In 16-bit mode converting LEA with 16-bit addressing and a 32-bit
4159              destination is going to grow encoding size.  */
4160           else if (flag_code == CODE_16BIT
4161                    && (optimize <= 1 || optimize_for_space)
4162                    && !i.prefix[ADDR_PREFIX]
4163                    && i.op[1].regs->reg_type.bitfield.dword)
4164             return;
4165           else
4166             {
4167               i.tm.base_opcode = 0xb8;
4168               i.tm.opcode_modifier.modrm = 0;
4169               if (i.op[1].regs->reg_type.bitfield.dword)
4170                 i.types[0].bitfield.imm32 = 1;
4171               else
4172                 i.types[0].bitfield.imm16 = 1;
4173
4174               if (i.op[0].disps
4175                   && i.op[0].disps->X_op == O_constant
4176                   && i.op[1].regs->reg_type.bitfield.dword
4177                   /* NB: Add () to !i.prefix[ADDR_PREFIX] to silence
4178                      GCC 5. */
4179                   && (!i.prefix[ADDR_PREFIX]) != (flag_code == CODE_32BIT))
4180                 i.op[0].disps->X_add_number &= 0xffff;
4181             }
4182
4183           i.tm.operand_types[0] = i.types[0];
4184           i.imm_operands = 1;
4185           if (!i.op[0].imms)
4186             {
4187               i.op[0].imms = &im_expressions[0];
4188               i.op[0].imms->X_op = O_absent;
4189             }
4190         }
4191       else if (i.op[0].disps
4192                   && (i.op[0].disps->X_op != O_constant
4193                       || i.op[0].disps->X_add_number))
4194         return;
4195       else
4196         {
4197           /* Handle:
4198                lea (%rM), %rN     -> mov %rM, %rN
4199                lea (,%rM,1), %rN  -> mov %rM, %rN
4200                lea (%rM), %rN     -> movzx %rM, %rN
4201            */
4202           const reg_entry *addr_reg;
4203
4204           if (!i.index_reg && i.base_reg->reg_num != RegIP)
4205             addr_reg = i.base_reg;
4206           else if (!i.base_reg
4207                    && i.index_reg->reg_num != RegIZ
4208                    && !i.log2_scale_factor)
4209             addr_reg = i.index_reg;
4210           else
4211             return;
4212
4213           if (addr_reg->reg_type.bitfield.word
4214               && i.op[1].regs->reg_type.bitfield.dword)
4215             {
4216               if (flag_code != CODE_32BIT)
4217                 return;
4218               i.tm.opcode_space = SPACE_0F;
4219               i.tm.base_opcode = 0xb7;
4220             }
4221           else
4222             i.tm.base_opcode = 0x8b;
4223
4224           if (addr_reg->reg_type.bitfield.dword
4225               && i.op[1].regs->reg_type.bitfield.qword)
4226             i.tm.opcode_modifier.size = SIZE32;
4227
4228           i.op[0].regs = addr_reg;
4229           i.reg_operands = 2;
4230         }
4231
4232       i.mem_operands = 0;
4233       i.disp_operands = 0;
4234       i.prefix[ADDR_PREFIX] = 0;
4235       i.prefix[SEG_PREFIX] = 0;
4236       i.seg[0] = NULL;
4237     }
4238
4239   if (optimize_for_space
4240       && i.tm.mnem_off == MN_test
4241       && i.reg_operands == 1
4242       && i.imm_operands == 1
4243       && !i.types[1].bitfield.byte
4244       && i.op[0].imms->X_op == O_constant
4245       && fits_in_imm7 (i.op[0].imms->X_add_number))
4246     {
4247       /* Optimize: -Os:
4248            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4249        */
4250       unsigned int base_regnum = i.op[1].regs->reg_num;
4251       if (flag_code == CODE_64BIT || base_regnum < 4)
4252         {
4253           i.types[1].bitfield.byte = 1;
4254           /* Ignore the suffix.  */
4255           i.suffix = 0;
4256           /* Convert to byte registers.  */
4257           if (i.types[1].bitfield.word)
4258             j = 16;
4259           else if (i.types[1].bitfield.dword)
4260             j = 32;
4261           else
4262             j = 48;
4263           if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4264             j += 8;
4265           i.op[1].regs -= j;
4266         }
4267     }
4268   else if (flag_code == CODE_64BIT
4269            && i.tm.opcode_space == SPACE_BASE
4270            && ((i.types[1].bitfield.qword
4271                 && i.reg_operands == 1
4272                 && i.imm_operands == 1
4273                 && i.op[0].imms->X_op == O_constant
4274                 && ((i.tm.base_opcode == 0xb8
4275                      && i.tm.extension_opcode == None
4276                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4277                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4278                         && (i.tm.base_opcode == 0x24
4279                             || (i.tm.base_opcode == 0x80
4280                                 && i.tm.extension_opcode == 0x4)
4281                             || i.tm.mnem_off == MN_test
4282                             || ((i.tm.base_opcode | 1) == 0xc7
4283                                 && i.tm.extension_opcode == 0x0)))
4284                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4285                         && i.tm.base_opcode == 0x83
4286                         && i.tm.extension_opcode == 0x4)))
4287                || (i.types[0].bitfield.qword
4288                    && ((i.reg_operands == 2
4289                         && i.op[0].regs == i.op[1].regs
4290                         && (i.tm.mnem_off == MN_xor
4291                             || i.tm.mnem_off == MN_sub))
4292                        || i.tm.mnem_off == MN_clr))))
4293     {
4294       /* Optimize: -O:
4295            andq $imm31, %r64   -> andl $imm31, %r32
4296            andq $imm7, %r64    -> andl $imm7, %r32
4297            testq $imm31, %r64  -> testl $imm31, %r32
4298            xorq %r64, %r64     -> xorl %r32, %r32
4299            subq %r64, %r64     -> subl %r32, %r32
4300            movq $imm31, %r64   -> movl $imm31, %r32
4301            movq $imm32, %r64   -> movl $imm32, %r32
4302         */
4303       i.tm.opcode_modifier.size = SIZE32;
4304       if (i.imm_operands)
4305         {
4306           i.types[0].bitfield.imm32 = 1;
4307           i.types[0].bitfield.imm32s = 0;
4308           i.types[0].bitfield.imm64 = 0;
4309         }
4310       else
4311         {
4312           i.types[0].bitfield.dword = 1;
4313           i.types[0].bitfield.qword = 0;
4314         }
4315       i.types[1].bitfield.dword = 1;
4316       i.types[1].bitfield.qword = 0;
4317       if (i.tm.mnem_off == MN_mov || i.tm.mnem_off == MN_lea)
4318         {
4319           /* Handle
4320                movq $imm31, %r64   -> movl $imm31, %r32
4321                movq $imm32, %r64   -> movl $imm32, %r32
4322            */
4323           i.tm.operand_types[0].bitfield.imm32 = 1;
4324           i.tm.operand_types[0].bitfield.imm32s = 0;
4325           i.tm.operand_types[0].bitfield.imm64 = 0;
4326           if ((i.tm.base_opcode | 1) == 0xc7)
4327             {
4328               /* Handle
4329                    movq $imm31, %r64   -> movl $imm31, %r32
4330                */
4331               i.tm.base_opcode = 0xb8;
4332               i.tm.extension_opcode = None;
4333               i.tm.opcode_modifier.w = 0;
4334               i.tm.opcode_modifier.modrm = 0;
4335             }
4336         }
4337     }
4338   else if (optimize > 1
4339            && !optimize_for_space
4340            && i.reg_operands == 2
4341            && i.op[0].regs == i.op[1].regs
4342            && (i.tm.mnem_off == MN_and || i.tm.mnem_off == MN_or)
4343            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4344     {
4345       /* Optimize: -O2:
4346            andb %rN, %rN  -> testb %rN, %rN
4347            andw %rN, %rN  -> testw %rN, %rN
4348            andq %rN, %rN  -> testq %rN, %rN
4349            orb %rN, %rN   -> testb %rN, %rN
4350            orw %rN, %rN   -> testw %rN, %rN
4351            orq %rN, %rN   -> testq %rN, %rN
4352
4353            and outside of 64-bit mode
4354
4355            andl %rN, %rN  -> testl %rN, %rN
4356            orl %rN, %rN   -> testl %rN, %rN
4357        */
4358       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4359     }
4360   else if (i.tm.base_opcode == 0xba
4361            && i.tm.opcode_space == SPACE_0F
4362            && i.reg_operands == 1
4363            && i.op[0].imms->X_op == O_constant
4364            && i.op[0].imms->X_add_number >= 0)
4365     {
4366       /* Optimize: -O:
4367            btw $n, %rN -> btl $n, %rN (outside of 16-bit mode, n < 16)
4368            btq $n, %rN -> btl $n, %rN (in 64-bit mode, n < 32, N < 8)
4369            btl $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
4370
4371            With <BT> one of bts, btr, and bts also:
4372            <BT>w $n, %rN -> btl $n, %rN (in 32-bit mode, n < 16)
4373            <BT>l $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
4374        */
4375       switch (flag_code)
4376         {
4377         case CODE_64BIT:
4378           if (i.tm.extension_opcode != 4)
4379             break;
4380           if (i.types[1].bitfield.qword
4381               && i.op[0].imms->X_add_number < 32
4382               && !(i.op[1].regs->reg_flags & RegRex))
4383             i.tm.opcode_modifier.size = SIZE32;
4384           /* Fall through.  */
4385         case CODE_32BIT:
4386           if (i.types[1].bitfield.word
4387               && i.op[0].imms->X_add_number < 16)
4388             i.tm.opcode_modifier.size = SIZE32;
4389           break;
4390         case CODE_16BIT:
4391           if (i.op[0].imms->X_add_number < 16)
4392             i.tm.opcode_modifier.size = SIZE16;
4393           break;
4394         }
4395     }
4396   else if (i.reg_operands == 3
4397            && i.op[0].regs == i.op[1].regs
4398            && !i.types[2].bitfield.xmmword
4399            && (i.tm.opcode_modifier.vex
4400                || ((!i.mask.reg || i.mask.zeroing)
4401                    && is_evex_encoding (&i.tm)
4402                    && (i.vec_encoding != vex_encoding_evex
4403                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4404                        || i.tm.cpu_flags.bitfield.cpuavx512vl
4405                        || (i.tm.operand_types[2].bitfield.zmmword
4406                            && i.types[2].bitfield.ymmword))))
4407            && i.tm.opcode_space == SPACE_0F
4408            && ((i.tm.base_opcode | 2) == 0x57
4409                || i.tm.base_opcode == 0xdf
4410                || i.tm.base_opcode == 0xef
4411                || (i.tm.base_opcode | 3) == 0xfb
4412                || i.tm.base_opcode == 0x42
4413                || i.tm.base_opcode == 0x47))
4414     {
4415       /* Optimize: -O1:
4416            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4417            vpsubq and vpsubw:
4418              EVEX VOP %zmmM, %zmmM, %zmmN
4419                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4420                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4421              EVEX VOP %ymmM, %ymmM, %ymmN
4422                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4423                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4424              VEX VOP %ymmM, %ymmM, %ymmN
4425                -> VEX VOP %xmmM, %xmmM, %xmmN
4426            VOP, one of vpandn and vpxor:
4427              VEX VOP %ymmM, %ymmM, %ymmN
4428                -> VEX VOP %xmmM, %xmmM, %xmmN
4429            VOP, one of vpandnd and vpandnq:
4430              EVEX VOP %zmmM, %zmmM, %zmmN
4431                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4432                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4433              EVEX VOP %ymmM, %ymmM, %ymmN
4434                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4435                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4436            VOP, one of vpxord and vpxorq:
4437              EVEX VOP %zmmM, %zmmM, %zmmN
4438                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4439                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4440              EVEX VOP %ymmM, %ymmM, %ymmN
4441                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4442                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4443            VOP, one of kxord and kxorq:
4444              VEX VOP %kM, %kM, %kN
4445                -> VEX kxorw %kM, %kM, %kN
4446            VOP, one of kandnd and kandnq:
4447              VEX VOP %kM, %kM, %kN
4448                -> VEX kandnw %kM, %kM, %kN
4449        */
4450       if (is_evex_encoding (&i.tm))
4451         {
4452           if (i.vec_encoding != vex_encoding_evex)
4453             {
4454               i.tm.opcode_modifier.vex = VEX128;
4455               i.tm.opcode_modifier.vexw = VEXW0;
4456               i.tm.opcode_modifier.evex = 0;
4457             }
4458           else if (optimize > 1)
4459             i.tm.opcode_modifier.evex = EVEX128;
4460           else
4461             return;
4462         }
4463       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4464         {
4465           i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
4466           i.tm.opcode_modifier.vexw = VEXW0;
4467         }
4468       else
4469         i.tm.opcode_modifier.vex = VEX128;
4470
4471       if (i.tm.opcode_modifier.vex)
4472         for (j = 0; j < 3; j++)
4473           {
4474             i.types[j].bitfield.xmmword = 1;
4475             i.types[j].bitfield.ymmword = 0;
4476           }
4477     }
4478   else if (i.vec_encoding != vex_encoding_evex
4479            && !i.types[0].bitfield.zmmword
4480            && !i.types[1].bitfield.zmmword
4481            && !i.mask.reg
4482            && !i.broadcast.type
4483            && !i.broadcast.bytes
4484            && is_evex_encoding (&i.tm)
4485            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4486                || (i.tm.base_opcode & ~4) == 0xdb
4487                || (i.tm.base_opcode & ~4) == 0xeb)
4488            && i.tm.extension_opcode == None)
4489     {
4490       /* Optimize: -O1:
4491            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4492            vmovdqu32 and vmovdqu64:
4493              EVEX VOP %xmmM, %xmmN
4494                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4495              EVEX VOP %ymmM, %ymmN
4496                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4497              EVEX VOP %xmmM, mem
4498                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4499              EVEX VOP %ymmM, mem
4500                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4501              EVEX VOP mem, %xmmN
4502                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4503              EVEX VOP mem, %ymmN
4504                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4505            VOP, one of vpand, vpandn, vpor, vpxor:
4506              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4507                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4508              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4509                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4510              EVEX VOP{d,q} mem, %xmmM, %xmmN
4511                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4512              EVEX VOP{d,q} mem, %ymmM, %ymmN
4513                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4514        */
4515       for (j = 0; j < i.operands; j++)
4516         if (operand_type_check (i.types[j], disp)
4517             && i.op[j].disps->X_op == O_constant)
4518           {
4519             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4520                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4521                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4522             int evex_disp8, vex_disp8;
4523             unsigned int memshift = i.memshift;
4524             offsetT n = i.op[j].disps->X_add_number;
4525
4526             evex_disp8 = fits_in_disp8 (n);
4527             i.memshift = 0;
4528             vex_disp8 = fits_in_disp8 (n);
4529             if (evex_disp8 != vex_disp8)
4530               {
4531                 i.memshift = memshift;
4532                 return;
4533               }
4534
4535             i.types[j].bitfield.disp8 = vex_disp8;
4536             break;
4537           }
4538       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4539           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
4540         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4541       i.tm.opcode_modifier.vex
4542         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4543       i.tm.opcode_modifier.vexw = VEXW0;
4544       /* VPAND, VPOR, and VPXOR are commutative.  */
4545       if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
4546         i.tm.opcode_modifier.commutative = 1;
4547       i.tm.opcode_modifier.evex = 0;
4548       i.tm.opcode_modifier.masking = 0;
4549       i.tm.opcode_modifier.broadcast = 0;
4550       i.tm.opcode_modifier.disp8memshift = 0;
4551       i.memshift = 0;
4552       if (j < i.operands)
4553         i.types[j].bitfield.disp8
4554           = fits_in_disp8 (i.op[j].disps->X_add_number);
4555     }
4556 }
4557
4558 /* Return non-zero for load instruction.  */
4559
4560 static int
4561 load_insn_p (void)
4562 {
4563   unsigned int dest;
4564   int any_vex_p = is_any_vex_encoding (&i.tm);
4565   unsigned int base_opcode = i.tm.base_opcode | 1;
4566
4567   if (!any_vex_p)
4568     {
4569       /* Anysize insns: lea, invlpg, clflush, prefetch*, bndmk, bndcl, bndcu,
4570          bndcn, bndstx, bndldx, clflushopt, clwb, cldemote.  */
4571       if (i.tm.opcode_modifier.operandconstraint == ANY_SIZE)
4572         return 0;
4573
4574       /* pop.   */
4575       if (i.tm.mnem_off == MN_pop)
4576         return 1;
4577     }
4578
4579   if (i.tm.opcode_space == SPACE_BASE)
4580     {
4581       /* popf, popa.   */
4582       if (i.tm.base_opcode == 0x9d
4583           || i.tm.base_opcode == 0x61)
4584         return 1;
4585
4586       /* movs, cmps, lods, scas.  */
4587       if ((i.tm.base_opcode | 0xb) == 0xaf)
4588         return 1;
4589
4590       /* outs, xlatb.  */
4591       if (base_opcode == 0x6f
4592           || i.tm.base_opcode == 0xd7)
4593         return 1;
4594       /* NB: For AMD-specific insns with implicit memory operands,
4595          they're intentionally not covered.  */
4596     }
4597
4598   /* No memory operand.  */
4599   if (!i.mem_operands)
4600     return 0;
4601
4602   if (any_vex_p)
4603     {
4604       if (i.tm.mnem_off == MN_vldmxcsr)
4605         return 1;
4606     }
4607   else if (i.tm.opcode_space == SPACE_BASE)
4608     {
4609       /* test, not, neg, mul, imul, div, idiv.  */
4610       if (base_opcode == 0xf7 && i.tm.extension_opcode != 1)
4611         return 1;
4612
4613       /* inc, dec.  */
4614       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
4615         return 1;
4616
4617       /* add, or, adc, sbb, and, sub, xor, cmp.  */
4618       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
4619         return 1;
4620
4621       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
4622       if ((base_opcode == 0xc1 || (base_opcode | 2) == 0xd3)
4623           && i.tm.extension_opcode != 6)
4624         return 1;
4625
4626       /* Check for x87 instructions.  */
4627       if ((base_opcode | 6) == 0xdf)
4628         {
4629           /* Skip fst, fstp, fstenv, fstcw.  */
4630           if (i.tm.base_opcode == 0xd9
4631               && (i.tm.extension_opcode == 2
4632                   || i.tm.extension_opcode == 3
4633                   || i.tm.extension_opcode == 6
4634                   || i.tm.extension_opcode == 7))
4635             return 0;
4636
4637           /* Skip fisttp, fist, fistp, fstp.  */
4638           if (i.tm.base_opcode == 0xdb
4639               && (i.tm.extension_opcode == 1
4640                   || i.tm.extension_opcode == 2
4641                   || i.tm.extension_opcode == 3
4642                   || i.tm.extension_opcode == 7))
4643             return 0;
4644
4645           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
4646           if (i.tm.base_opcode == 0xdd
4647               && (i.tm.extension_opcode == 1
4648                   || i.tm.extension_opcode == 2
4649                   || i.tm.extension_opcode == 3
4650                   || i.tm.extension_opcode == 6
4651                   || i.tm.extension_opcode == 7))
4652             return 0;
4653
4654           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
4655           if (i.tm.base_opcode == 0xdf
4656               && (i.tm.extension_opcode == 1
4657                   || i.tm.extension_opcode == 2
4658                   || i.tm.extension_opcode == 3
4659                   || i.tm.extension_opcode == 6
4660                   || i.tm.extension_opcode == 7))
4661             return 0;
4662
4663           return 1;
4664         }
4665     }
4666   else if (i.tm.opcode_space == SPACE_0F)
4667     {
4668       /* bt, bts, btr, btc.  */
4669       if (i.tm.base_opcode == 0xba
4670           && (i.tm.extension_opcode | 3) == 7)
4671         return 1;
4672
4673       /* cmpxchg8b, cmpxchg16b, xrstors, vmptrld.  */
4674       if (i.tm.base_opcode == 0xc7
4675           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4676           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3
4677               || i.tm.extension_opcode == 6))
4678         return 1;
4679
4680       /* fxrstor, ldmxcsr, xrstor.  */
4681       if (i.tm.base_opcode == 0xae
4682           && (i.tm.extension_opcode == 1
4683               || i.tm.extension_opcode == 2
4684               || i.tm.extension_opcode == 5))
4685         return 1;
4686
4687       /* lgdt, lidt, lmsw.  */
4688       if (i.tm.base_opcode == 0x01
4689           && (i.tm.extension_opcode == 2
4690               || i.tm.extension_opcode == 3
4691               || i.tm.extension_opcode == 6))
4692         return 1;
4693     }
4694
4695   dest = i.operands - 1;
4696
4697   /* Check fake imm8 operand and 3 source operands.  */
4698   if ((i.tm.opcode_modifier.immext
4699        || i.reg_operands + i.mem_operands == 4)
4700       && i.types[dest].bitfield.imm8)
4701     dest--;
4702
4703   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg.  */
4704   if (i.tm.opcode_space == SPACE_BASE
4705       && ((base_opcode | 0x38) == 0x39
4706           || (base_opcode | 2) == 0x87))
4707     return 1;
4708
4709   if (i.tm.mnem_off == MN_xadd)
4710     return 1;
4711
4712   /* Check for load instruction.  */
4713   return (i.types[dest].bitfield.class != ClassNone
4714           || i.types[dest].bitfield.instance == Accum);
4715 }
4716
4717 /* Output lfence, 0xfaee8, after instruction.  */
4718
4719 static void
4720 insert_lfence_after (void)
4721 {
4722   if (lfence_after_load && load_insn_p ())
4723     {
4724       /* There are also two REP string instructions that require
4725          special treatment. Specifically, the compare string (CMPS)
4726          and scan string (SCAS) instructions set EFLAGS in a manner
4727          that depends on the data being compared/scanned. When used
4728          with a REP prefix, the number of iterations may therefore
4729          vary depending on this data. If the data is a program secret
4730          chosen by the adversary using an LVI method,
4731          then this data-dependent behavior may leak some aspect
4732          of the secret.  */
4733       if (((i.tm.base_opcode | 0x9) == 0xaf)
4734           && i.prefix[REP_PREFIX])
4735         {
4736             as_warn (_("`%s` changes flags which would affect control flow behavior"),
4737                      insn_name (&i.tm));
4738         }
4739       char *p = frag_more (3);
4740       *p++ = 0xf;
4741       *p++ = 0xae;
4742       *p = 0xe8;
4743     }
4744 }
4745
4746 /* Output lfence, 0xfaee8, before instruction.  */
4747
4748 static void
4749 insert_lfence_before (void)
4750 {
4751   char *p;
4752
4753   if (i.tm.opcode_space != SPACE_BASE)
4754     return;
4755
4756   if (i.tm.base_opcode == 0xff
4757       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
4758     {
4759       /* Insert lfence before indirect branch if needed.  */
4760
4761       if (lfence_before_indirect_branch == lfence_branch_none)
4762         return;
4763
4764       if (i.operands != 1)
4765         abort ();
4766
4767       if (i.reg_operands == 1)
4768         {
4769           /* Indirect branch via register.  Don't insert lfence with
4770              -mlfence-after-load=yes.  */
4771           if (lfence_after_load
4772               || lfence_before_indirect_branch == lfence_branch_memory)
4773             return;
4774         }
4775       else if (i.mem_operands == 1
4776                && lfence_before_indirect_branch != lfence_branch_register)
4777         {
4778           as_warn (_("indirect `%s` with memory operand should be avoided"),
4779                    insn_name (&i.tm));
4780           return;
4781         }
4782       else
4783         return;
4784
4785       if (last_insn.kind != last_insn_other
4786           && last_insn.seg == now_seg)
4787         {
4788           as_warn_where (last_insn.file, last_insn.line,
4789                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
4790                          last_insn.name, insn_name (&i.tm));
4791           return;
4792         }
4793
4794       p = frag_more (3);
4795       *p++ = 0xf;
4796       *p++ = 0xae;
4797       *p = 0xe8;
4798       return;
4799     }
4800
4801   /* Output or/not/shl and lfence before near ret.  */
4802   if (lfence_before_ret != lfence_before_ret_none
4803       && (i.tm.base_opcode | 1) == 0xc3)
4804     {
4805       if (last_insn.kind != last_insn_other
4806           && last_insn.seg == now_seg)
4807         {
4808           as_warn_where (last_insn.file, last_insn.line,
4809                          _("`%s` skips -mlfence-before-ret on `%s`"),
4810                          last_insn.name, insn_name (&i.tm));
4811           return;
4812         }
4813
4814       /* Near ret ingore operand size override under CPU64.  */
4815       char prefix = flag_code == CODE_64BIT
4816                     ? 0x48
4817                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
4818
4819       if (lfence_before_ret == lfence_before_ret_not)
4820         {
4821           /* not: 0xf71424, may add prefix
4822              for operand size override or 64-bit code.  */
4823           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
4824           if (prefix)
4825             *p++ = prefix;
4826           *p++ = 0xf7;
4827           *p++ = 0x14;
4828           *p++ = 0x24;
4829           if (prefix)
4830             *p++ = prefix;
4831           *p++ = 0xf7;
4832           *p++ = 0x14;
4833           *p++ = 0x24;
4834         }
4835       else
4836         {
4837           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
4838           if (prefix)
4839             *p++ = prefix;
4840           if (lfence_before_ret == lfence_before_ret_or)
4841             {
4842               /* or: 0x830c2400, may add prefix
4843                  for operand size override or 64-bit code.  */
4844               *p++ = 0x83;
4845               *p++ = 0x0c;
4846             }
4847           else
4848             {
4849               /* shl: 0xc1242400, may add prefix
4850                  for operand size override or 64-bit code.  */
4851               *p++ = 0xc1;
4852               *p++ = 0x24;
4853             }
4854
4855           *p++ = 0x24;
4856           *p++ = 0x0;
4857         }
4858
4859       *p++ = 0xf;
4860       *p++ = 0xae;
4861       *p = 0xe8;
4862     }
4863 }
4864
4865 /* Shared helper for md_assemble() and s_insn().  */
4866 static void init_globals (void)
4867 {
4868   unsigned int j;
4869
4870   memset (&i, '\0', sizeof (i));
4871   i.rounding.type = rc_none;
4872   for (j = 0; j < MAX_OPERANDS; j++)
4873     i.reloc[j] = NO_RELOC;
4874   memset (disp_expressions, '\0', sizeof (disp_expressions));
4875   memset (im_expressions, '\0', sizeof (im_expressions));
4876   save_stack_p = save_stack;
4877 }
4878
4879 /* Helper for md_assemble() to decide whether to prepare for a possible 2nd
4880    parsing pass. Instead of introducing a rarely use new insn attribute this
4881    utilizes a common pattern between affected templates. It is deemed
4882    acceptable that this will lead to unnecessary pass 2 preparations in a
4883    limited set of cases.  */
4884 static INLINE bool may_need_pass2 (const insn_template *t)
4885 {
4886   return t->opcode_modifier.sse2avx
4887          /* Note that all SSE2AVX templates have at least one operand.  */
4888          ? t->operand_types[t->operands - 1].bitfield.class == RegSIMD
4889          : (t->opcode_space == SPACE_0F
4890             && (t->base_opcode | 1) == 0xbf)
4891            || (t->opcode_space == SPACE_BASE
4892                && t->base_opcode == 0x63);
4893 }
4894
4895 /* This is the guts of the machine-dependent assembler.  LINE points to a
4896    machine dependent instruction.  This function is supposed to emit
4897    the frags/bytes it assembles to.  */
4898
4899 void
4900 md_assemble (char *line)
4901 {
4902   unsigned int j;
4903   char mnemonic[MAX_MNEM_SIZE], mnem_suffix = 0, *copy = NULL;
4904   const char *end, *pass1_mnem = NULL;
4905   enum i386_error pass1_err = 0;
4906   const insn_template *t;
4907
4908   /* Initialize globals.  */
4909   current_templates = NULL;
4910  retry:
4911   init_globals ();
4912
4913   /* First parse an instruction mnemonic & call i386_operand for the operands.
4914      We assume that the scrubber has arranged it so that line[0] is the valid
4915      start of a (possibly prefixed) mnemonic.  */
4916
4917   end = parse_insn (line, mnemonic, false);
4918   if (end == NULL)
4919     {
4920       if (pass1_mnem != NULL)
4921         goto match_error;
4922       if (i.error != no_error)
4923         {
4924           gas_assert (current_templates != NULL);
4925           if (may_need_pass2 (current_templates->start) && !i.suffix)
4926             goto no_match;
4927           /* No point in trying a 2nd pass - it'll only find the same suffix
4928              again.  */
4929           mnem_suffix = i.suffix;
4930           goto match_error;
4931         }
4932       return;
4933     }
4934   t = current_templates->start;
4935   if (may_need_pass2 (t))
4936     {
4937       /* Make a copy of the full line in case we need to retry.  */
4938       copy = xstrdup (line);
4939     }
4940   line += end - line;
4941   mnem_suffix = i.suffix;
4942
4943   line = parse_operands (line, mnemonic);
4944   this_operand = -1;
4945   if (line == NULL)
4946     {
4947       free (copy);
4948       return;
4949     }
4950
4951   /* Now we've parsed the mnemonic into a set of templates, and have the
4952      operands at hand.  */
4953
4954   /* All Intel opcodes have reversed operands except for "bound", "enter",
4955      "invlpg*", "monitor*", "mwait*", "tpause", "umwait", "pvalidate",
4956      "rmpadjust", "rmpupdate", and "rmpquery".  We also don't reverse
4957      intersegment "jmp" and "call" instructions with 2 immediate operands so
4958      that the immediate segment precedes the offset consistently in Intel and
4959      AT&T modes.  */
4960   if (intel_syntax
4961       && i.operands > 1
4962       && (t->mnem_off != MN_bound)
4963       && !startswith (mnemonic, "invlpg")
4964       && !startswith (mnemonic, "monitor")
4965       && !startswith (mnemonic, "mwait")
4966       && (t->mnem_off != MN_pvalidate)
4967       && !startswith (mnemonic, "rmp")
4968       && (t->mnem_off != MN_tpause)
4969       && (t->mnem_off != MN_umwait)
4970       && !(i.operands == 2
4971            && operand_type_check (i.types[0], imm)
4972            && operand_type_check (i.types[1], imm)))
4973     swap_operands ();
4974
4975   /* The order of the immediates should be reversed
4976      for 2 immediates extrq and insertq instructions */
4977   if (i.imm_operands == 2
4978       && (t->mnem_off == MN_extrq || t->mnem_off == MN_insertq))
4979       swap_2_operands (0, 1);
4980
4981   if (i.imm_operands)
4982     optimize_imm ();
4983
4984   if (i.disp_operands && !optimize_disp (t))
4985     return;
4986
4987   /* Next, we find a template that matches the given insn,
4988      making sure the overlap of the given operands types is consistent
4989      with the template operand types.  */
4990
4991   if (!(t = match_template (mnem_suffix)))
4992     {
4993       const char *err_msg;
4994
4995       if (copy && !mnem_suffix)
4996         {
4997           line = copy;
4998           copy = NULL;
4999   no_match:
5000           pass1_err = i.error;
5001           pass1_mnem = insn_name (current_templates->start);
5002           goto retry;
5003         }
5004
5005       /* If a non-/only-64bit template (group) was found in pass 1, and if
5006          _some_ template (group) was found in pass 2, squash pass 1's
5007          error.  */
5008       if (pass1_err == unsupported_64bit)
5009         pass1_mnem = NULL;
5010
5011   match_error:
5012       free (copy);
5013
5014       switch (pass1_mnem ? pass1_err : i.error)
5015         {
5016         default:
5017           abort ();
5018         case operand_size_mismatch:
5019           err_msg = _("operand size mismatch");
5020           break;
5021         case operand_type_mismatch:
5022           err_msg = _("operand type mismatch");
5023           break;
5024         case register_type_mismatch:
5025           err_msg = _("register type mismatch");
5026           break;
5027         case number_of_operands_mismatch:
5028           err_msg = _("number of operands mismatch");
5029           break;
5030         case invalid_instruction_suffix:
5031           err_msg = _("invalid instruction suffix");
5032           break;
5033         case bad_imm4:
5034           err_msg = _("constant doesn't fit in 4 bits");
5035           break;
5036         case unsupported_with_intel_mnemonic:
5037           err_msg = _("unsupported with Intel mnemonic");
5038           break;
5039         case unsupported_syntax:
5040           err_msg = _("unsupported syntax");
5041           break;
5042         case unsupported:
5043           as_bad (_("unsupported instruction `%s'"),
5044                   pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5045           return;
5046         case unsupported_on_arch:
5047           as_bad (_("`%s' is not supported on `%s%s'"),
5048                   pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5049                   cpu_arch_name ? cpu_arch_name : default_arch,
5050                   cpu_sub_arch_name ? cpu_sub_arch_name : "");
5051           return;
5052         case unsupported_64bit:
5053           if (ISLOWER (mnem_suffix))
5054             {
5055               if (flag_code == CODE_64BIT)
5056                 as_bad (_("`%s%c' is not supported in 64-bit mode"),
5057                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5058                         mnem_suffix);
5059               else
5060                 as_bad (_("`%s%c' is only supported in 64-bit mode"),
5061                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5062                         mnem_suffix);
5063             }
5064           else
5065             {
5066               if (flag_code == CODE_64BIT)
5067                 as_bad (_("`%s' is not supported in 64-bit mode"),
5068                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5069               else
5070                 as_bad (_("`%s' is only supported in 64-bit mode"),
5071                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5072             }
5073           return;
5074         case invalid_sib_address:
5075           err_msg = _("invalid SIB address");
5076           break;
5077         case invalid_vsib_address:
5078           err_msg = _("invalid VSIB address");
5079           break;
5080         case invalid_vector_register_set:
5081           err_msg = _("mask, index, and destination registers must be distinct");
5082           break;
5083         case invalid_tmm_register_set:
5084           err_msg = _("all tmm registers must be distinct");
5085           break;
5086         case invalid_dest_and_src_register_set:
5087           err_msg = _("destination and source registers must be distinct");
5088           break;
5089         case unsupported_vector_index_register:
5090           err_msg = _("unsupported vector index register");
5091           break;
5092         case unsupported_broadcast:
5093           err_msg = _("unsupported broadcast");
5094           break;
5095         case broadcast_needed:
5096           err_msg = _("broadcast is needed for operand of such type");
5097           break;
5098         case unsupported_masking:
5099           err_msg = _("unsupported masking");
5100           break;
5101         case mask_not_on_destination:
5102           err_msg = _("mask not on destination operand");
5103           break;
5104         case no_default_mask:
5105           err_msg = _("default mask isn't allowed");
5106           break;
5107         case unsupported_rc_sae:
5108           err_msg = _("unsupported static rounding/sae");
5109           break;
5110         case invalid_register_operand:
5111           err_msg = _("invalid register operand");
5112           break;
5113         }
5114       as_bad (_("%s for `%s'"), err_msg,
5115               pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5116       return;
5117     }
5118
5119   free (copy);
5120
5121   if (sse_check != check_none
5122       /* The opcode space check isn't strictly needed; it's there only to
5123          bypass the logic below when easily possible.  */
5124       && t->opcode_space >= SPACE_0F
5125       && t->opcode_space <= SPACE_0F3A
5126       && !i.tm.cpu_flags.bitfield.cpusse4a
5127       && !is_any_vex_encoding (t))
5128     {
5129       bool simd = false;
5130
5131       for (j = 0; j < t->operands; ++j)
5132         {
5133           if (t->operand_types[j].bitfield.class == RegMMX)
5134             break;
5135           if (t->operand_types[j].bitfield.class == RegSIMD)
5136             simd = true;
5137         }
5138
5139       if (j >= t->operands && simd)
5140         (sse_check == check_warning
5141          ? as_warn
5142          : as_bad) (_("SSE instruction `%s' is used"), insn_name (&i.tm));
5143     }
5144
5145   if (i.tm.opcode_modifier.fwait)
5146     if (!add_prefix (FWAIT_OPCODE))
5147       return;
5148
5149   /* Check if REP prefix is OK.  */
5150   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep)
5151     {
5152       as_bad (_("invalid instruction `%s' after `%s'"),
5153                 insn_name (&i.tm), i.rep_prefix);
5154       return;
5155     }
5156
5157   /* Check for lock without a lockable instruction.  Destination operand
5158      must be memory unless it is xchg (0x86).  */
5159   if (i.prefix[LOCK_PREFIX])
5160     {
5161       if (i.tm.opcode_modifier.prefixok < PrefixLock
5162           || i.mem_operands == 0
5163           || (i.tm.base_opcode != 0x86
5164               && !(i.flags[i.operands - 1] & Operand_Mem)))
5165         {
5166           as_bad (_("expecting lockable instruction after `lock'"));
5167           return;
5168         }
5169
5170       /* Zap the redundant prefix from XCHG when optimizing.  */
5171       if (i.tm.base_opcode == 0x86 && optimize && !i.no_optimize)
5172         i.prefix[LOCK_PREFIX] = 0;
5173     }
5174
5175   if (is_any_vex_encoding (&i.tm)
5176       || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
5177       || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX)
5178     {
5179       /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
5180       if (i.prefix[DATA_PREFIX])
5181         {
5182           as_bad (_("data size prefix invalid with `%s'"), insn_name (&i.tm));
5183           return;
5184         }
5185
5186       /* Don't allow e.g. KMOV in TLS code sequences.  */
5187       for (j = i.imm_operands; j < i.operands; ++j)
5188         switch (i.reloc[j])
5189           {
5190           case BFD_RELOC_386_TLS_GOTIE:
5191           case BFD_RELOC_386_TLS_LE_32:
5192           case BFD_RELOC_X86_64_GOTTPOFF:
5193           case BFD_RELOC_X86_64_TLSLD:
5194             as_bad (_("TLS relocation cannot be used with `%s'"), insn_name (&i.tm));
5195             return;
5196           default:
5197             break;
5198           }
5199     }
5200
5201   /* Check if HLE prefix is OK.  */
5202   if (i.hle_prefix && !check_hle ())
5203     return;
5204
5205   /* Check BND prefix.  */
5206   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
5207     as_bad (_("expecting valid branch instruction after `bnd'"));
5208
5209   /* Check NOTRACK prefix.  */
5210   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
5211     as_bad (_("expecting indirect branch instruction after `notrack'"));
5212
5213   if (i.tm.cpu_flags.bitfield.cpumpx)
5214     {
5215       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
5216         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
5217       else if (flag_code != CODE_16BIT
5218                ? i.prefix[ADDR_PREFIX]
5219                : i.mem_operands && !i.prefix[ADDR_PREFIX])
5220         as_bad (_("16-bit address isn't allowed in MPX instructions"));
5221     }
5222
5223   /* Insert BND prefix.  */
5224   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
5225     {
5226       if (!i.prefix[BND_PREFIX])
5227         add_prefix (BND_PREFIX_OPCODE);
5228       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
5229         {
5230           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
5231           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
5232         }
5233     }
5234
5235   /* Check string instruction segment overrides.  */
5236   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
5237     {
5238       gas_assert (i.mem_operands);
5239       if (!check_string ())
5240         return;
5241       i.disp_operands = 0;
5242     }
5243
5244   /* The memory operand of (%dx) should be only used with input/output
5245      instructions (base opcodes: 0x6c, 0x6e, 0xec, 0xee).  */
5246   if (i.input_output_operand
5247       && ((i.tm.base_opcode | 0x82) != 0xee
5248           || i.tm.opcode_space != SPACE_BASE))
5249     {
5250       as_bad (_("input/output port address isn't allowed with `%s'"),
5251               insn_name (&i.tm));
5252       return;
5253     }
5254
5255   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
5256     optimize_encoding ();
5257
5258   if (use_unaligned_vector_move)
5259     encode_with_unaligned_vector_move ();
5260
5261   if (!process_suffix ())
5262     return;
5263
5264   /* Check if IP-relative addressing requirements can be satisfied.  */
5265   if (i.tm.cpu_flags.bitfield.cpuprefetchi
5266       && !(i.base_reg && i.base_reg->reg_num == RegIP))
5267     as_warn (_("'%s' only supports RIP-relative address"), insn_name (&i.tm));
5268
5269   /* Update operand types and check extended states.  */
5270   for (j = 0; j < i.operands; j++)
5271     {
5272       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
5273       switch (i.tm.operand_types[j].bitfield.class)
5274         {
5275         default:
5276           break;
5277         case RegMMX:
5278           i.xstate |= xstate_mmx;
5279           break;
5280         case RegMask:
5281           i.xstate |= xstate_mask;
5282           break;
5283         case RegSIMD:
5284           if (i.tm.operand_types[j].bitfield.tmmword)
5285             i.xstate |= xstate_tmm;
5286           else if (i.tm.operand_types[j].bitfield.zmmword)
5287             i.xstate |= xstate_zmm;
5288           else if (i.tm.operand_types[j].bitfield.ymmword)
5289             i.xstate |= xstate_ymm;
5290           else if (i.tm.operand_types[j].bitfield.xmmword)
5291             i.xstate |= xstate_xmm;
5292           break;
5293         }
5294     }
5295
5296   /* Make still unresolved immediate matches conform to size of immediate
5297      given in i.suffix.  */
5298   if (!finalize_imm ())
5299     return;
5300
5301   if (i.types[0].bitfield.imm1)
5302     i.imm_operands = 0; /* kludge for shift insns.  */
5303
5304   /* For insns with operands there are more diddles to do to the opcode.  */
5305   if (i.operands)
5306     {
5307       if (!process_operands ())
5308         return;
5309     }
5310   else if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
5311     {
5312       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
5313       as_warn (_("translating to `%sp'"), insn_name (&i.tm));
5314     }
5315
5316   if (is_any_vex_encoding (&i.tm))
5317     {
5318       if (!cpu_arch_flags.bitfield.cpui286)
5319         {
5320           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
5321                   insn_name (&i.tm));
5322           return;
5323         }
5324
5325       /* Check for explicit REX prefix.  */
5326       if (i.prefix[REX_PREFIX] || i.rex_encoding)
5327         {
5328           as_bad (_("REX prefix invalid with `%s'"), insn_name (&i.tm));
5329           return;
5330         }
5331
5332       if (i.tm.opcode_modifier.vex)
5333         build_vex_prefix (t);
5334       else
5335         build_evex_prefix ();
5336
5337       /* The individual REX.RXBW bits got consumed.  */
5338       i.rex &= REX_OPCODE;
5339     }
5340
5341   /* Handle conversion of 'int $3' --> special int3 insn.  */
5342   if (i.tm.mnem_off == MN_int
5343       && i.op[0].imms->X_add_number == 3)
5344     {
5345       i.tm.base_opcode = INT3_OPCODE;
5346       i.imm_operands = 0;
5347     }
5348
5349   if ((i.tm.opcode_modifier.jump == JUMP
5350        || i.tm.opcode_modifier.jump == JUMP_BYTE
5351        || i.tm.opcode_modifier.jump == JUMP_DWORD)
5352       && i.op[0].disps->X_op == O_constant)
5353     {
5354       /* Convert "jmp constant" (and "call constant") to a jump (call) to
5355          the absolute address given by the constant.  Since ix86 jumps and
5356          calls are pc relative, we need to generate a reloc.  */
5357       i.op[0].disps->X_add_symbol = &abs_symbol;
5358       i.op[0].disps->X_op = O_symbol;
5359     }
5360
5361   /* For 8 bit registers we need an empty rex prefix.  Also if the
5362      instruction already has a prefix, we need to convert old
5363      registers to new ones.  */
5364
5365   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
5366        && (i.op[0].regs->reg_flags & RegRex64) != 0)
5367       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
5368           && (i.op[1].regs->reg_flags & RegRex64) != 0)
5369       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
5370            || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
5371           && i.rex != 0))
5372     {
5373       int x;
5374
5375       i.rex |= REX_OPCODE;
5376       for (x = 0; x < 2; x++)
5377         {
5378           /* Look for 8 bit operand that uses old registers.  */
5379           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
5380               && (i.op[x].regs->reg_flags & RegRex64) == 0)
5381             {
5382               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5383               /* In case it is "hi" register, give up.  */
5384               if (i.op[x].regs->reg_num > 3)
5385                 as_bad (_("can't encode register '%s%s' in an "
5386                           "instruction requiring REX prefix."),
5387                         register_prefix, i.op[x].regs->reg_name);
5388
5389               /* Otherwise it is equivalent to the extended register.
5390                  Since the encoding doesn't change this is merely
5391                  cosmetic cleanup for debug output.  */
5392
5393               i.op[x].regs = i.op[x].regs + 8;
5394             }
5395         }
5396     }
5397
5398   if (i.rex == 0 && i.rex_encoding)
5399     {
5400       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
5401          that uses legacy register.  If it is "hi" register, don't add
5402          the REX_OPCODE byte.  */
5403       int x;
5404       for (x = 0; x < 2; x++)
5405         if (i.types[x].bitfield.class == Reg
5406             && i.types[x].bitfield.byte
5407             && (i.op[x].regs->reg_flags & RegRex64) == 0
5408             && i.op[x].regs->reg_num > 3)
5409           {
5410             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5411             i.rex_encoding = false;
5412             break;
5413           }
5414
5415       if (i.rex_encoding)
5416         i.rex = REX_OPCODE;
5417     }
5418
5419   if (i.rex != 0)
5420     add_prefix (REX_OPCODE | i.rex);
5421
5422   insert_lfence_before ();
5423
5424   /* We are ready to output the insn.  */
5425   output_insn ();
5426
5427   insert_lfence_after ();
5428
5429   last_insn.seg = now_seg;
5430
5431   if (i.tm.opcode_modifier.isprefix)
5432     {
5433       last_insn.kind = last_insn_prefix;
5434       last_insn.name = insn_name (&i.tm);
5435       last_insn.file = as_where (&last_insn.line);
5436     }
5437   else
5438     last_insn.kind = last_insn_other;
5439 }
5440
5441 /* The Q suffix is generally valid only in 64-bit mode, with very few
5442    exceptions: fild, fistp, fisttp, and cmpxchg8b.  Note that for fild
5443    and fisttp only one of their two templates is matched below: That's
5444    sufficient since other relevant attributes are the same between both
5445    respective templates.  */
5446 static INLINE bool q_suffix_allowed(const insn_template *t)
5447 {
5448   return flag_code == CODE_64BIT
5449          || (t->opcode_space == SPACE_BASE
5450              && t->base_opcode == 0xdf
5451              && (t->extension_opcode & 1)) /* fild / fistp / fisttp */
5452          || t->mnem_off == MN_cmpxchg8b;
5453 }
5454
5455 static const char *
5456 parse_insn (const char *line, char *mnemonic, bool prefix_only)
5457 {
5458   const char *l = line, *token_start = l;
5459   char *mnem_p;
5460   bool pass1 = !current_templates;
5461   int supported;
5462   const insn_template *t;
5463   char *dot_p = NULL;
5464
5465   while (1)
5466     {
5467       mnem_p = mnemonic;
5468       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
5469         {
5470           if (*mnem_p == '.')
5471             dot_p = mnem_p;
5472           mnem_p++;
5473           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5474             {
5475               as_bad (_("no such instruction: `%s'"), token_start);
5476               return NULL;
5477             }
5478           l++;
5479         }
5480       if (!is_space_char (*l)
5481           && *l != END_OF_INSN
5482           && (intel_syntax
5483               || (*l != PREFIX_SEPARATOR
5484                   && *l != ',')))
5485         {
5486           if (prefix_only)
5487             break;
5488           as_bad (_("invalid character %s in mnemonic"),
5489                   output_invalid (*l));
5490           return NULL;
5491         }
5492       if (token_start == l)
5493         {
5494           if (!intel_syntax && *l == PREFIX_SEPARATOR)
5495             as_bad (_("expecting prefix; got nothing"));
5496           else
5497             as_bad (_("expecting mnemonic; got nothing"));
5498           return NULL;
5499         }
5500
5501       /* Look up instruction (or prefix) via hash table.  */
5502       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5503
5504       if (*l != END_OF_INSN
5505           && (!is_space_char (*l) || l[1] != END_OF_INSN)
5506           && current_templates
5507           && current_templates->start->opcode_modifier.isprefix)
5508         {
5509           if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
5510             {
5511               as_bad ((flag_code != CODE_64BIT
5512                        ? _("`%s' is only supported in 64-bit mode")
5513                        : _("`%s' is not supported in 64-bit mode")),
5514                       insn_name (current_templates->start));
5515               return NULL;
5516             }
5517           /* If we are in 16-bit mode, do not allow addr16 or data16.
5518              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
5519           if ((current_templates->start->opcode_modifier.size == SIZE16
5520                || current_templates->start->opcode_modifier.size == SIZE32)
5521               && flag_code != CODE_64BIT
5522               && ((current_templates->start->opcode_modifier.size == SIZE32)
5523                   ^ (flag_code == CODE_16BIT)))
5524             {
5525               as_bad (_("redundant %s prefix"),
5526                       insn_name (current_templates->start));
5527               return NULL;
5528             }
5529
5530           if (current_templates->start->base_opcode == PSEUDO_PREFIX)
5531             {
5532               /* Handle pseudo prefixes.  */
5533               switch (current_templates->start->extension_opcode)
5534                 {
5535                 case Prefix_Disp8:
5536                   /* {disp8} */
5537                   i.disp_encoding = disp_encoding_8bit;
5538                   break;
5539                 case Prefix_Disp16:
5540                   /* {disp16} */
5541                   i.disp_encoding = disp_encoding_16bit;
5542                   break;
5543                 case Prefix_Disp32:
5544                   /* {disp32} */
5545                   i.disp_encoding = disp_encoding_32bit;
5546                   break;
5547                 case Prefix_Load:
5548                   /* {load} */
5549                   i.dir_encoding = dir_encoding_load;
5550                   break;
5551                 case Prefix_Store:
5552                   /* {store} */
5553                   i.dir_encoding = dir_encoding_store;
5554                   break;
5555                 case Prefix_VEX:
5556                   /* {vex} */
5557                   i.vec_encoding = vex_encoding_vex;
5558                   break;
5559                 case Prefix_VEX3:
5560                   /* {vex3} */
5561                   i.vec_encoding = vex_encoding_vex3;
5562                   break;
5563                 case Prefix_EVEX:
5564                   /* {evex} */
5565                   i.vec_encoding = vex_encoding_evex;
5566                   break;
5567                 case Prefix_REX:
5568                   /* {rex} */
5569                   i.rex_encoding = true;
5570                   break;
5571                 case Prefix_NoOptimize:
5572                   /* {nooptimize} */
5573                   i.no_optimize = true;
5574                   break;
5575                 default:
5576                   abort ();
5577                 }
5578             }
5579           else
5580             {
5581               /* Add prefix, checking for repeated prefixes.  */
5582               switch (add_prefix (current_templates->start->base_opcode))
5583                 {
5584                 case PREFIX_EXIST:
5585                   return NULL;
5586                 case PREFIX_DS:
5587                   if (current_templates->start->cpu_flags.bitfield.cpuibt)
5588                     i.notrack_prefix = insn_name (current_templates->start);
5589                   break;
5590                 case PREFIX_REP:
5591                   if (current_templates->start->cpu_flags.bitfield.cpuhle)
5592                     i.hle_prefix = insn_name (current_templates->start);
5593                   else if (current_templates->start->cpu_flags.bitfield.cpumpx)
5594                     i.bnd_prefix = insn_name (current_templates->start);
5595                   else
5596                     i.rep_prefix = insn_name (current_templates->start);
5597                   break;
5598                 default:
5599                   break;
5600                 }
5601             }
5602           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
5603           token_start = ++l;
5604         }
5605       else
5606         break;
5607     }
5608
5609   if (prefix_only)
5610     return token_start;
5611
5612   if (!current_templates)
5613     {
5614       /* Deprecated functionality (new code should use pseudo-prefixes instead):
5615          Check if we should swap operand or force 32bit displacement in
5616          encoding.  */
5617       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
5618         i.dir_encoding = dir_encoding_swap;
5619       else if (mnem_p - 3 == dot_p
5620                && dot_p[1] == 'd'
5621                && dot_p[2] == '8')
5622         i.disp_encoding = disp_encoding_8bit;
5623       else if (mnem_p - 4 == dot_p
5624                && dot_p[1] == 'd'
5625                && dot_p[2] == '3'
5626                && dot_p[3] == '2')
5627         i.disp_encoding = disp_encoding_32bit;
5628       else
5629         goto check_suffix;
5630       mnem_p = dot_p;
5631       *dot_p = '\0';
5632       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5633     }
5634
5635   if (!current_templates || !pass1)
5636     {
5637       current_templates = NULL;
5638
5639     check_suffix:
5640       if (mnem_p > mnemonic)
5641         {
5642           /* See if we can get a match by trimming off a suffix.  */
5643           switch (mnem_p[-1])
5644             {
5645             case WORD_MNEM_SUFFIX:
5646               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
5647                 i.suffix = SHORT_MNEM_SUFFIX;
5648               else
5649                 /* Fall through.  */
5650               case BYTE_MNEM_SUFFIX:
5651               case QWORD_MNEM_SUFFIX:
5652                 i.suffix = mnem_p[-1];
5653               mnem_p[-1] = '\0';
5654               current_templates
5655                 = (const templates *) str_hash_find (op_hash, mnemonic);
5656               break;
5657             case SHORT_MNEM_SUFFIX:
5658             case LONG_MNEM_SUFFIX:
5659               if (!intel_syntax)
5660                 {
5661                   i.suffix = mnem_p[-1];
5662                   mnem_p[-1] = '\0';
5663                   current_templates
5664                     = (const templates *) str_hash_find (op_hash, mnemonic);
5665                 }
5666               break;
5667
5668               /* Intel Syntax.  */
5669             case 'd':
5670               if (intel_syntax)
5671                 {
5672                   if (intel_float_operand (mnemonic) == 1)
5673                     i.suffix = SHORT_MNEM_SUFFIX;
5674                   else
5675                     i.suffix = LONG_MNEM_SUFFIX;
5676                   mnem_p[-1] = '\0';
5677                   current_templates
5678                     = (const templates *) str_hash_find (op_hash, mnemonic);
5679                 }
5680               /* For compatibility reasons accept MOVSD and CMPSD without
5681                  operands even in AT&T mode.  */
5682               else if (*l == END_OF_INSN
5683                        || (is_space_char (*l) && l[1] == END_OF_INSN))
5684                 {
5685                   mnem_p[-1] = '\0';
5686                   current_templates
5687                     = (const templates *) str_hash_find (op_hash, mnemonic);
5688                   if (current_templates != NULL
5689                       /* MOVS or CMPS */
5690                       && (current_templates->start->base_opcode | 2) == 0xa6
5691                       && current_templates->start->opcode_space
5692                          == SPACE_BASE
5693                       && mnem_p[-2] == 's')
5694                     {
5695                       as_warn (_("found `%sd'; assuming `%sl' was meant"),
5696                                mnemonic, mnemonic);
5697                       i.suffix = LONG_MNEM_SUFFIX;
5698                     }
5699                   else
5700                     {
5701                       current_templates = NULL;
5702                       mnem_p[-1] = 'd';
5703                     }
5704                 }
5705               break;
5706             }
5707         }
5708
5709       if (!current_templates)
5710         {
5711           if (pass1)
5712             as_bad (_("no such instruction: `%s'"), token_start);
5713           return NULL;
5714         }
5715     }
5716
5717   if (current_templates->start->opcode_modifier.jump == JUMP
5718       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
5719     {
5720       /* Check for a branch hint.  We allow ",pt" and ",pn" for
5721          predict taken and predict not taken respectively.
5722          I'm not sure that branch hints actually do anything on loop
5723          and jcxz insns (JumpByte) for current Pentium4 chips.  They
5724          may work in the future and it doesn't hurt to accept them
5725          now.  */
5726       if (l[0] == ',' && l[1] == 'p')
5727         {
5728           if (l[2] == 't')
5729             {
5730               if (!add_prefix (DS_PREFIX_OPCODE))
5731                 return NULL;
5732               l += 3;
5733             }
5734           else if (l[2] == 'n')
5735             {
5736               if (!add_prefix (CS_PREFIX_OPCODE))
5737                 return NULL;
5738               l += 3;
5739             }
5740         }
5741     }
5742   /* Any other comma loses.  */
5743   if (*l == ',')
5744     {
5745       as_bad (_("invalid character %s in mnemonic"),
5746               output_invalid (*l));
5747       return NULL;
5748     }
5749
5750   /* Check if instruction is supported on specified architecture.  */
5751   supported = 0;
5752   for (t = current_templates->start; t < current_templates->end; ++t)
5753     {
5754       supported |= cpu_flags_match (t);
5755
5756       if (i.suffix == QWORD_MNEM_SUFFIX && !q_suffix_allowed (t))
5757         supported &= ~CPU_FLAGS_64BIT_MATCH;
5758
5759       if (supported == CPU_FLAGS_PERFECT_MATCH)
5760         return l;
5761     }
5762
5763   if (pass1)
5764     {
5765       if (supported & CPU_FLAGS_64BIT_MATCH)
5766         i.error = unsupported_on_arch;
5767       else
5768         i.error = unsupported_64bit;
5769     }
5770
5771   return NULL;
5772 }
5773
5774 static char *
5775 parse_operands (char *l, const char *mnemonic)
5776 {
5777   char *token_start;
5778
5779   /* 1 if operand is pending after ','.  */
5780   unsigned int expecting_operand = 0;
5781
5782   while (*l != END_OF_INSN)
5783     {
5784       /* Non-zero if operand parens not balanced.  */
5785       unsigned int paren_not_balanced = 0;
5786       /* True if inside double quotes.  */
5787       bool in_quotes = false;
5788
5789       /* Skip optional white space before operand.  */
5790       if (is_space_char (*l))
5791         ++l;
5792       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
5793         {
5794           as_bad (_("invalid character %s before operand %d"),
5795                   output_invalid (*l),
5796                   i.operands + 1);
5797           return NULL;
5798         }
5799       token_start = l;  /* After white space.  */
5800       while (in_quotes || paren_not_balanced || *l != ',')
5801         {
5802           if (*l == END_OF_INSN)
5803             {
5804               if (in_quotes)
5805                 {
5806                   as_bad (_("unbalanced double quotes in operand %d."),
5807                           i.operands + 1);
5808                   return NULL;
5809                 }
5810               if (paren_not_balanced)
5811                 {
5812                   know (!intel_syntax);
5813                   as_bad (_("unbalanced parenthesis in operand %d."),
5814                           i.operands + 1);
5815                   return NULL;
5816                 }
5817               else
5818                 break;  /* we are done */
5819             }
5820           else if (*l == '\\' && l[1] == '"')
5821             ++l;
5822           else if (*l == '"')
5823             in_quotes = !in_quotes;
5824           else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l))
5825             {
5826               as_bad (_("invalid character %s in operand %d"),
5827                       output_invalid (*l),
5828                       i.operands + 1);
5829               return NULL;
5830             }
5831           if (!intel_syntax && !in_quotes)
5832             {
5833               if (*l == '(')
5834                 ++paren_not_balanced;
5835               if (*l == ')')
5836                 --paren_not_balanced;
5837             }
5838           l++;
5839         }
5840       if (l != token_start)
5841         {                       /* Yes, we've read in another operand.  */
5842           unsigned int operand_ok;
5843           this_operand = i.operands++;
5844           if (i.operands > MAX_OPERANDS)
5845             {
5846               as_bad (_("spurious operands; (%d operands/instruction max)"),
5847                       MAX_OPERANDS);
5848               return NULL;
5849             }
5850           i.types[this_operand].bitfield.unspecified = 1;
5851           /* Now parse operand adding info to 'i' as we go along.  */
5852           END_STRING_AND_SAVE (l);
5853
5854           if (i.mem_operands > 1)
5855             {
5856               as_bad (_("too many memory references for `%s'"),
5857                       mnemonic);
5858               return 0;
5859             }
5860
5861           if (intel_syntax)
5862             operand_ok =
5863               i386_intel_operand (token_start,
5864                                   intel_float_operand (mnemonic));
5865           else
5866             operand_ok = i386_att_operand (token_start);
5867
5868           RESTORE_END_STRING (l);
5869           if (!operand_ok)
5870             return NULL;
5871         }
5872       else
5873         {
5874           if (expecting_operand)
5875             {
5876             expecting_operand_after_comma:
5877               as_bad (_("expecting operand after ','; got nothing"));
5878               return NULL;
5879             }
5880           if (*l == ',')
5881             {
5882               as_bad (_("expecting operand before ','; got nothing"));
5883               return NULL;
5884             }
5885         }
5886
5887       /* Now *l must be either ',' or END_OF_INSN.  */
5888       if (*l == ',')
5889         {
5890           if (*++l == END_OF_INSN)
5891             {
5892               /* Just skip it, if it's \n complain.  */
5893               goto expecting_operand_after_comma;
5894             }
5895           expecting_operand = 1;
5896         }
5897     }
5898   return l;
5899 }
5900
5901 static void
5902 swap_2_operands (unsigned int xchg1, unsigned int xchg2)
5903 {
5904   union i386_op temp_op;
5905   i386_operand_type temp_type;
5906   unsigned int temp_flags;
5907   enum bfd_reloc_code_real temp_reloc;
5908
5909   temp_type = i.types[xchg2];
5910   i.types[xchg2] = i.types[xchg1];
5911   i.types[xchg1] = temp_type;
5912
5913   temp_flags = i.flags[xchg2];
5914   i.flags[xchg2] = i.flags[xchg1];
5915   i.flags[xchg1] = temp_flags;
5916
5917   temp_op = i.op[xchg2];
5918   i.op[xchg2] = i.op[xchg1];
5919   i.op[xchg1] = temp_op;
5920
5921   temp_reloc = i.reloc[xchg2];
5922   i.reloc[xchg2] = i.reloc[xchg1];
5923   i.reloc[xchg1] = temp_reloc;
5924
5925   if (i.mask.reg)
5926     {
5927       if (i.mask.operand == xchg1)
5928         i.mask.operand = xchg2;
5929       else if (i.mask.operand == xchg2)
5930         i.mask.operand = xchg1;
5931     }
5932   if (i.broadcast.type || i.broadcast.bytes)
5933     {
5934       if (i.broadcast.operand == xchg1)
5935         i.broadcast.operand = xchg2;
5936       else if (i.broadcast.operand == xchg2)
5937         i.broadcast.operand = xchg1;
5938     }
5939 }
5940
5941 static void
5942 swap_operands (void)
5943 {
5944   switch (i.operands)
5945     {
5946     case 5:
5947     case 4:
5948       swap_2_operands (1, i.operands - 2);
5949       /* Fall through.  */
5950     case 3:
5951     case 2:
5952       swap_2_operands (0, i.operands - 1);
5953       break;
5954     default:
5955       abort ();
5956     }
5957
5958   if (i.mem_operands == 2)
5959     {
5960       const reg_entry *temp_seg;
5961       temp_seg = i.seg[0];
5962       i.seg[0] = i.seg[1];
5963       i.seg[1] = temp_seg;
5964     }
5965 }
5966
5967 /* Try to ensure constant immediates are represented in the smallest
5968    opcode possible.  */
5969 static void
5970 optimize_imm (void)
5971 {
5972   char guess_suffix = 0;
5973   int op;
5974
5975   if (i.suffix)
5976     guess_suffix = i.suffix;
5977   else if (i.reg_operands)
5978     {
5979       /* Figure out a suffix from the last register operand specified.
5980          We can't do this properly yet, i.e. excluding special register
5981          instances, but the following works for instructions with
5982          immediates.  In any case, we can't set i.suffix yet.  */
5983       for (op = i.operands; --op >= 0;)
5984         if (i.types[op].bitfield.class != Reg)
5985           continue;
5986         else if (i.types[op].bitfield.byte)
5987           {
5988             guess_suffix = BYTE_MNEM_SUFFIX;
5989             break;
5990           }
5991         else if (i.types[op].bitfield.word)
5992           {
5993             guess_suffix = WORD_MNEM_SUFFIX;
5994             break;
5995           }
5996         else if (i.types[op].bitfield.dword)
5997           {
5998             guess_suffix = LONG_MNEM_SUFFIX;
5999             break;
6000           }
6001         else if (i.types[op].bitfield.qword)
6002           {
6003             guess_suffix = QWORD_MNEM_SUFFIX;
6004             break;
6005           }
6006     }
6007   else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
6008     guess_suffix = WORD_MNEM_SUFFIX;
6009
6010   for (op = i.operands; --op >= 0;)
6011     if (operand_type_check (i.types[op], imm))
6012       {
6013         switch (i.op[op].imms->X_op)
6014           {
6015           case O_constant:
6016             /* If a suffix is given, this operand may be shortened.  */
6017             switch (guess_suffix)
6018               {
6019               case LONG_MNEM_SUFFIX:
6020                 i.types[op].bitfield.imm32 = 1;
6021                 i.types[op].bitfield.imm64 = 1;
6022                 break;
6023               case WORD_MNEM_SUFFIX:
6024                 i.types[op].bitfield.imm16 = 1;
6025                 i.types[op].bitfield.imm32 = 1;
6026                 i.types[op].bitfield.imm32s = 1;
6027                 i.types[op].bitfield.imm64 = 1;
6028                 break;
6029               case BYTE_MNEM_SUFFIX:
6030                 i.types[op].bitfield.imm8 = 1;
6031                 i.types[op].bitfield.imm8s = 1;
6032                 i.types[op].bitfield.imm16 = 1;
6033                 i.types[op].bitfield.imm32 = 1;
6034                 i.types[op].bitfield.imm32s = 1;
6035                 i.types[op].bitfield.imm64 = 1;
6036                 break;
6037               }
6038
6039             /* If this operand is at most 16 bits, convert it
6040                to a signed 16 bit number before trying to see
6041                whether it will fit in an even smaller size.
6042                This allows a 16-bit operand such as $0xffe0 to
6043                be recognised as within Imm8S range.  */
6044             if ((i.types[op].bitfield.imm16)
6045                 && fits_in_unsigned_word (i.op[op].imms->X_add_number))
6046               {
6047                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
6048                                                 ^ 0x8000) - 0x8000);
6049               }
6050 #ifdef BFD64
6051             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
6052             if ((i.types[op].bitfield.imm32)
6053                 && fits_in_unsigned_long (i.op[op].imms->X_add_number))
6054               {
6055                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
6056                                                 ^ ((offsetT) 1 << 31))
6057                                                - ((offsetT) 1 << 31));
6058               }
6059 #endif
6060             i.types[op]
6061               = operand_type_or (i.types[op],
6062                                  smallest_imm_type (i.op[op].imms->X_add_number));
6063
6064             /* We must avoid matching of Imm32 templates when 64bit
6065                only immediate is available.  */
6066             if (guess_suffix == QWORD_MNEM_SUFFIX)
6067               i.types[op].bitfield.imm32 = 0;
6068             break;
6069
6070           case O_absent:
6071           case O_register:
6072             abort ();
6073
6074             /* Symbols and expressions.  */
6075           default:
6076             /* Convert symbolic operand to proper sizes for matching, but don't
6077                prevent matching a set of insns that only supports sizes other
6078                than those matching the insn suffix.  */
6079             {
6080               i386_operand_type mask, allowed;
6081               const insn_template *t = current_templates->start;
6082
6083               operand_type_set (&mask, 0);
6084               switch (guess_suffix)
6085                 {
6086                 case QWORD_MNEM_SUFFIX:
6087                   mask.bitfield.imm64 = 1;
6088                   mask.bitfield.imm32s = 1;
6089                   break;
6090                 case LONG_MNEM_SUFFIX:
6091                   mask.bitfield.imm32 = 1;
6092                   break;
6093                 case WORD_MNEM_SUFFIX:
6094                   mask.bitfield.imm16 = 1;
6095                   break;
6096                 case BYTE_MNEM_SUFFIX:
6097                   mask.bitfield.imm8 = 1;
6098                   break;
6099                 default:
6100                   break;
6101                 }
6102
6103               allowed = operand_type_and (t->operand_types[op], mask);
6104               while (++t < current_templates->end)
6105                 {
6106                   allowed = operand_type_or (allowed, t->operand_types[op]);
6107                   allowed = operand_type_and (allowed, mask);
6108                 }
6109
6110               if (!operand_type_all_zero (&allowed))
6111                 i.types[op] = operand_type_and (i.types[op], mask);
6112             }
6113             break;
6114           }
6115       }
6116 }
6117
6118 /* Try to use the smallest displacement type too.  */
6119 static bool
6120 optimize_disp (const insn_template *t)
6121 {
6122   unsigned int op;
6123
6124   if (!want_disp32 (t)
6125       && (!t->opcode_modifier.jump
6126           || i.jumpabsolute || i.types[0].bitfield.baseindex))
6127     {
6128       for (op = 0; op < i.operands; ++op)
6129         {
6130           const expressionS *exp = i.op[op].disps;
6131
6132           if (!operand_type_check (i.types[op], disp))
6133             continue;
6134
6135           if (exp->X_op != O_constant)
6136             continue;
6137
6138           /* Since displacement is signed extended to 64bit, don't allow
6139              disp32 if it is out of range.  */
6140           if (fits_in_signed_long (exp->X_add_number))
6141             continue;
6142
6143           i.types[op].bitfield.disp32 = 0;
6144           if (i.types[op].bitfield.baseindex)
6145             {
6146               as_bad (_("0x%" PRIx64 " out of range of signed 32bit displacement"),
6147                       (uint64_t) exp->X_add_number);
6148               return false;
6149             }
6150         }
6151     }
6152
6153   /* Don't optimize displacement for movabs since it only takes 64bit
6154      displacement.  */
6155   if (i.disp_encoding > disp_encoding_8bit
6156       || (flag_code == CODE_64BIT && t->mnem_off == MN_movabs))
6157     return true;
6158
6159   for (op = i.operands; op-- > 0;)
6160     if (operand_type_check (i.types[op], disp))
6161       {
6162         if (i.op[op].disps->X_op == O_constant)
6163           {
6164             offsetT op_disp = i.op[op].disps->X_add_number;
6165
6166             if (!op_disp && i.types[op].bitfield.baseindex)
6167               {
6168                 i.types[op] = operand_type_and_not (i.types[op], anydisp);
6169                 i.op[op].disps = NULL;
6170                 i.disp_operands--;
6171                 continue;
6172               }
6173
6174             if (i.types[op].bitfield.disp16
6175                 && fits_in_unsigned_word (op_disp))
6176               {
6177                 /* If this operand is at most 16 bits, convert
6178                    to a signed 16 bit number and don't use 64bit
6179                    displacement.  */
6180                 op_disp = ((op_disp ^ 0x8000) - 0x8000);
6181                 i.types[op].bitfield.disp64 = 0;
6182               }
6183
6184 #ifdef BFD64
6185             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
6186             if ((flag_code != CODE_64BIT
6187                  ? i.types[op].bitfield.disp32
6188                  : want_disp32 (t)
6189                    && (!t->opcode_modifier.jump
6190                        || i.jumpabsolute || i.types[op].bitfield.baseindex))
6191                 && fits_in_unsigned_long (op_disp))
6192               {
6193                 /* If this operand is at most 32 bits, convert
6194                    to a signed 32 bit number and don't use 64bit
6195                    displacement.  */
6196                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
6197                 i.types[op].bitfield.disp64 = 0;
6198                 i.types[op].bitfield.disp32 = 1;
6199               }
6200
6201             if (flag_code == CODE_64BIT && fits_in_signed_long (op_disp))
6202               {
6203                 i.types[op].bitfield.disp64 = 0;
6204                 i.types[op].bitfield.disp32 = 1;
6205               }
6206 #endif
6207             if ((i.types[op].bitfield.disp32
6208                  || i.types[op].bitfield.disp16)
6209                 && fits_in_disp8 (op_disp))
6210               i.types[op].bitfield.disp8 = 1;
6211
6212             i.op[op].disps->X_add_number = op_disp;
6213           }
6214         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
6215                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
6216           {
6217             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
6218                          i.op[op].disps, 0, i.reloc[op]);
6219             i.types[op] = operand_type_and_not (i.types[op], anydisp);
6220           }
6221         else
6222           /* We only support 64bit displacement on constants.  */
6223           i.types[op].bitfield.disp64 = 0;
6224       }
6225
6226   return true;
6227 }
6228
6229 /* Return 1 if there is a match in broadcast bytes between operand
6230    GIVEN and instruction template T.   */
6231
6232 static INLINE int
6233 match_broadcast_size (const insn_template *t, unsigned int given)
6234 {
6235   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
6236            && i.types[given].bitfield.byte)
6237           || (t->opcode_modifier.broadcast == WORD_BROADCAST
6238               && i.types[given].bitfield.word)
6239           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
6240               && i.types[given].bitfield.dword)
6241           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
6242               && i.types[given].bitfield.qword));
6243 }
6244
6245 /* Check if operands are valid for the instruction.  */
6246
6247 static int
6248 check_VecOperands (const insn_template *t)
6249 {
6250   unsigned int op;
6251   i386_cpu_flags cpu;
6252
6253   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
6254      any one operand are implicity requiring AVX512VL support if the actual
6255      operand size is YMMword or XMMword.  Since this function runs after
6256      template matching, there's no need to check for YMMword/XMMword in
6257      the template.  */
6258   cpu = cpu_flags_and (t->cpu_flags, avx512);
6259   if (!cpu_flags_all_zero (&cpu)
6260       && !t->cpu_flags.bitfield.cpuavx512vl
6261       && !cpu_arch_flags.bitfield.cpuavx512vl)
6262     {
6263       for (op = 0; op < t->operands; ++op)
6264         {
6265           if (t->operand_types[op].bitfield.zmmword
6266               && (i.types[op].bitfield.ymmword
6267                   || i.types[op].bitfield.xmmword))
6268             {
6269               i.error = unsupported;
6270               return 1;
6271             }
6272         }
6273     }
6274
6275   /* Somewhat similarly, templates specifying both AVX and AVX2 are
6276      requiring AVX2 support if the actual operand size is YMMword.  */
6277   if (t->cpu_flags.bitfield.cpuavx
6278       && t->cpu_flags.bitfield.cpuavx2
6279       && !cpu_arch_flags.bitfield.cpuavx2)
6280     {
6281       for (op = 0; op < t->operands; ++op)
6282         {
6283           if (t->operand_types[op].bitfield.xmmword
6284               && i.types[op].bitfield.ymmword)
6285             {
6286               i.error = unsupported;
6287               return 1;
6288             }
6289         }
6290     }
6291
6292   /* Without VSIB byte, we can't have a vector register for index.  */
6293   if (!t->opcode_modifier.sib
6294       && i.index_reg
6295       && (i.index_reg->reg_type.bitfield.xmmword
6296           || i.index_reg->reg_type.bitfield.ymmword
6297           || i.index_reg->reg_type.bitfield.zmmword))
6298     {
6299       i.error = unsupported_vector_index_register;
6300       return 1;
6301     }
6302
6303   /* Check if default mask is allowed.  */
6304   if (t->opcode_modifier.operandconstraint == NO_DEFAULT_MASK
6305       && (!i.mask.reg || i.mask.reg->reg_num == 0))
6306     {
6307       i.error = no_default_mask;
6308       return 1;
6309     }
6310
6311   /* For VSIB byte, we need a vector register for index, and all vector
6312      registers must be distinct.  */
6313   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
6314     {
6315       if (!i.index_reg
6316           || !((t->opcode_modifier.sib == VECSIB128
6317                 && i.index_reg->reg_type.bitfield.xmmword)
6318                || (t->opcode_modifier.sib == VECSIB256
6319                    && i.index_reg->reg_type.bitfield.ymmword)
6320                || (t->opcode_modifier.sib == VECSIB512
6321                    && i.index_reg->reg_type.bitfield.zmmword)))
6322       {
6323         i.error = invalid_vsib_address;
6324         return 1;
6325       }
6326
6327       gas_assert (i.reg_operands == 2 || i.mask.reg);
6328       if (i.reg_operands == 2 && !i.mask.reg)
6329         {
6330           gas_assert (i.types[0].bitfield.class == RegSIMD);
6331           gas_assert (i.types[0].bitfield.xmmword
6332                       || i.types[0].bitfield.ymmword);
6333           gas_assert (i.types[2].bitfield.class == RegSIMD);
6334           gas_assert (i.types[2].bitfield.xmmword
6335                       || i.types[2].bitfield.ymmword);
6336           if (operand_check == check_none)
6337             return 0;
6338           if (register_number (i.op[0].regs)
6339               != register_number (i.index_reg)
6340               && register_number (i.op[2].regs)
6341                  != register_number (i.index_reg)
6342               && register_number (i.op[0].regs)
6343                  != register_number (i.op[2].regs))
6344             return 0;
6345           if (operand_check == check_error)
6346             {
6347               i.error = invalid_vector_register_set;
6348               return 1;
6349             }
6350           as_warn (_("mask, index, and destination registers should be distinct"));
6351         }
6352       else if (i.reg_operands == 1 && i.mask.reg)
6353         {
6354           if (i.types[1].bitfield.class == RegSIMD
6355               && (i.types[1].bitfield.xmmword
6356                   || i.types[1].bitfield.ymmword
6357                   || i.types[1].bitfield.zmmword)
6358               && (register_number (i.op[1].regs)
6359                   == register_number (i.index_reg)))
6360             {
6361               if (operand_check == check_error)
6362                 {
6363                   i.error = invalid_vector_register_set;
6364                   return 1;
6365                 }
6366               if (operand_check != check_none)
6367                 as_warn (_("index and destination registers should be distinct"));
6368             }
6369         }
6370     }
6371
6372   /* For AMX instructions with 3 TMM register operands, all operands
6373       must be distinct.  */
6374   if (i.reg_operands == 3
6375       && t->operand_types[0].bitfield.tmmword
6376       && (i.op[0].regs == i.op[1].regs
6377           || i.op[0].regs == i.op[2].regs
6378           || i.op[1].regs == i.op[2].regs))
6379     {
6380       i.error = invalid_tmm_register_set;
6381       return 1;
6382     }
6383
6384   /* For some special instructions require that destination must be distinct
6385      from source registers.  */
6386   if (t->opcode_modifier.operandconstraint == DISTINCT_DEST)
6387     {
6388       unsigned int dest_reg = i.operands - 1;
6389
6390       know (i.operands >= 3);
6391
6392       /* #UD if dest_reg == src1_reg or dest_reg == src2_reg.  */
6393       if (i.op[dest_reg - 1].regs == i.op[dest_reg].regs
6394           || (i.reg_operands > 2
6395               && i.op[dest_reg - 2].regs == i.op[dest_reg].regs))
6396         {
6397           i.error = invalid_dest_and_src_register_set;
6398           return 1;
6399         }
6400     }
6401
6402   /* Check if broadcast is supported by the instruction and is applied
6403      to the memory operand.  */
6404   if (i.broadcast.type || i.broadcast.bytes)
6405     {
6406       i386_operand_type type, overlap;
6407
6408       /* Check if specified broadcast is supported in this instruction,
6409          and its broadcast bytes match the memory operand.  */
6410       op = i.broadcast.operand;
6411       if (!t->opcode_modifier.broadcast
6412           || !(i.flags[op] & Operand_Mem)
6413           || (!i.types[op].bitfield.unspecified
6414               && !match_broadcast_size (t, op)))
6415         {
6416         bad_broadcast:
6417           i.error = unsupported_broadcast;
6418           return 1;
6419         }
6420
6421       operand_type_set (&type, 0);
6422       switch (get_broadcast_bytes (t, false))
6423         {
6424         case 2:
6425           type.bitfield.word = 1;
6426           break;
6427         case 4:
6428           type.bitfield.dword = 1;
6429           break;
6430         case 8:
6431           type.bitfield.qword = 1;
6432           break;
6433         case 16:
6434           type.bitfield.xmmword = 1;
6435           break;
6436         case 32:
6437           type.bitfield.ymmword = 1;
6438           break;
6439         case 64:
6440           type.bitfield.zmmword = 1;
6441           break;
6442         default:
6443           goto bad_broadcast;
6444         }
6445
6446       overlap = operand_type_and (type, t->operand_types[op]);
6447       if (t->operand_types[op].bitfield.class == RegSIMD
6448           && t->operand_types[op].bitfield.byte
6449              + t->operand_types[op].bitfield.word
6450              + t->operand_types[op].bitfield.dword
6451              + t->operand_types[op].bitfield.qword > 1)
6452         {
6453           overlap.bitfield.xmmword = 0;
6454           overlap.bitfield.ymmword = 0;
6455           overlap.bitfield.zmmword = 0;
6456         }
6457       if (operand_type_all_zero (&overlap))
6458           goto bad_broadcast;
6459
6460       if (t->opcode_modifier.checkoperandsize)
6461         {
6462           unsigned int j;
6463
6464           type.bitfield.baseindex = 1;
6465           for (j = 0; j < i.operands; ++j)
6466             {
6467               if (j != op
6468                   && !operand_type_register_match(i.types[j],
6469                                                   t->operand_types[j],
6470                                                   type,
6471                                                   t->operand_types[op]))
6472                 goto bad_broadcast;
6473             }
6474         }
6475     }
6476   /* If broadcast is supported in this instruction, we need to check if
6477      operand of one-element size isn't specified without broadcast.  */
6478   else if (t->opcode_modifier.broadcast && i.mem_operands)
6479     {
6480       /* Find memory operand.  */
6481       for (op = 0; op < i.operands; op++)
6482         if (i.flags[op] & Operand_Mem)
6483           break;
6484       gas_assert (op < i.operands);
6485       /* Check size of the memory operand.  */
6486       if (match_broadcast_size (t, op))
6487         {
6488           i.error = broadcast_needed;
6489           return 1;
6490         }
6491     }
6492   else
6493     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
6494
6495   /* Check if requested masking is supported.  */
6496   if (i.mask.reg)
6497     {
6498       switch (t->opcode_modifier.masking)
6499         {
6500         case BOTH_MASKING:
6501           break;
6502         case MERGING_MASKING:
6503           if (i.mask.zeroing)
6504             {
6505         case 0:
6506               i.error = unsupported_masking;
6507               return 1;
6508             }
6509           break;
6510         case DYNAMIC_MASKING:
6511           /* Memory destinations allow only merging masking.  */
6512           if (i.mask.zeroing && i.mem_operands)
6513             {
6514               /* Find memory operand.  */
6515               for (op = 0; op < i.operands; op++)
6516                 if (i.flags[op] & Operand_Mem)
6517                   break;
6518               gas_assert (op < i.operands);
6519               if (op == i.operands - 1)
6520                 {
6521                   i.error = unsupported_masking;
6522                   return 1;
6523                 }
6524             }
6525           break;
6526         default:
6527           abort ();
6528         }
6529     }
6530
6531   /* Check if masking is applied to dest operand.  */
6532   if (i.mask.reg && (i.mask.operand != i.operands - 1))
6533     {
6534       i.error = mask_not_on_destination;
6535       return 1;
6536     }
6537
6538   /* Check RC/SAE.  */
6539   if (i.rounding.type != rc_none)
6540     {
6541       if (!t->opcode_modifier.sae
6542           || ((i.rounding.type != saeonly) != t->opcode_modifier.staticrounding)
6543           || i.mem_operands)
6544         {
6545           i.error = unsupported_rc_sae;
6546           return 1;
6547         }
6548
6549       /* Non-EVEX.LIG forms need to have a ZMM register as at least one
6550          operand.  */
6551       if (t->opcode_modifier.evex != EVEXLIG)
6552         {
6553           for (op = 0; op < t->operands; ++op)
6554             if (i.types[op].bitfield.zmmword)
6555               break;
6556           if (op >= t->operands)
6557             {
6558               i.error = operand_size_mismatch;
6559               return 1;
6560             }
6561         }
6562     }
6563
6564   /* Check the special Imm4 cases; must be the first operand.  */
6565   if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
6566     {
6567       if (i.op[0].imms->X_op != O_constant
6568           || !fits_in_imm4 (i.op[0].imms->X_add_number))
6569         {
6570           i.error = bad_imm4;
6571           return 1;
6572         }
6573
6574       /* Turn off Imm<N> so that update_imm won't complain.  */
6575       operand_type_set (&i.types[0], 0);
6576     }
6577
6578   /* Check vector Disp8 operand.  */
6579   if (t->opcode_modifier.disp8memshift
6580       && i.disp_encoding <= disp_encoding_8bit)
6581     {
6582       if (i.broadcast.type || i.broadcast.bytes)
6583         i.memshift = t->opcode_modifier.broadcast - 1;
6584       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
6585         i.memshift = t->opcode_modifier.disp8memshift;
6586       else
6587         {
6588           const i386_operand_type *type = NULL, *fallback = NULL;
6589
6590           i.memshift = 0;
6591           for (op = 0; op < i.operands; op++)
6592             if (i.flags[op] & Operand_Mem)
6593               {
6594                 if (t->opcode_modifier.evex == EVEXLIG)
6595                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
6596                 else if (t->operand_types[op].bitfield.xmmword
6597                          + t->operand_types[op].bitfield.ymmword
6598                          + t->operand_types[op].bitfield.zmmword <= 1)
6599                   type = &t->operand_types[op];
6600                 else if (!i.types[op].bitfield.unspecified)
6601                   type = &i.types[op];
6602                 else /* Ambiguities get resolved elsewhere.  */
6603                   fallback = &t->operand_types[op];
6604               }
6605             else if (i.types[op].bitfield.class == RegSIMD
6606                      && t->opcode_modifier.evex != EVEXLIG)
6607               {
6608                 if (i.types[op].bitfield.zmmword)
6609                   i.memshift = 6;
6610                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
6611                   i.memshift = 5;
6612                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
6613                   i.memshift = 4;
6614               }
6615
6616           if (!type && !i.memshift)
6617             type = fallback;
6618           if (type)
6619             {
6620               if (type->bitfield.zmmword)
6621                 i.memshift = 6;
6622               else if (type->bitfield.ymmword)
6623                 i.memshift = 5;
6624               else if (type->bitfield.xmmword)
6625                 i.memshift = 4;
6626             }
6627
6628           /* For the check in fits_in_disp8().  */
6629           if (i.memshift == 0)
6630             i.memshift = -1;
6631         }
6632
6633       for (op = 0; op < i.operands; op++)
6634         if (operand_type_check (i.types[op], disp)
6635             && i.op[op].disps->X_op == O_constant)
6636           {
6637             if (fits_in_disp8 (i.op[op].disps->X_add_number))
6638               {
6639                 i.types[op].bitfield.disp8 = 1;
6640                 return 0;
6641               }
6642             i.types[op].bitfield.disp8 = 0;
6643           }
6644     }
6645
6646   i.memshift = 0;
6647
6648   return 0;
6649 }
6650
6651 /* Check if encoding requirements are met by the instruction.  */
6652
6653 static int
6654 VEX_check_encoding (const insn_template *t)
6655 {
6656   if (i.vec_encoding == vex_encoding_error)
6657     {
6658       i.error = unsupported;
6659       return 1;
6660     }
6661
6662   if (i.vec_encoding == vex_encoding_evex)
6663     {
6664       /* This instruction must be encoded with EVEX prefix.  */
6665       if (!is_evex_encoding (t))
6666         {
6667           i.error = unsupported;
6668           return 1;
6669         }
6670       return 0;
6671     }
6672
6673   if (!t->opcode_modifier.vex)
6674     {
6675       /* This instruction template doesn't have VEX prefix.  */
6676       if (i.vec_encoding != vex_encoding_default)
6677         {
6678           i.error = unsupported;
6679           return 1;
6680         }
6681       return 0;
6682     }
6683
6684   return 0;
6685 }
6686
6687 /* Helper function for the progress() macro in match_template().  */
6688 static INLINE enum i386_error progress (enum i386_error new,
6689                                         enum i386_error last,
6690                                         unsigned int line, unsigned int *line_p)
6691 {
6692   if (line <= *line_p)
6693     return last;
6694   *line_p = line;
6695   return new;
6696 }
6697
6698 static const insn_template *
6699 match_template (char mnem_suffix)
6700 {
6701   /* Points to template once we've found it.  */
6702   const insn_template *t;
6703   i386_operand_type overlap0, overlap1, overlap2, overlap3;
6704   i386_operand_type overlap4;
6705   unsigned int found_reverse_match;
6706   i386_operand_type operand_types [MAX_OPERANDS];
6707   int addr_prefix_disp;
6708   unsigned int j, size_match, check_register, errline = __LINE__;
6709   enum i386_error specific_error = number_of_operands_mismatch;
6710 #define progress(err) progress (err, specific_error, __LINE__, &errline)
6711
6712 #if MAX_OPERANDS != 5
6713 # error "MAX_OPERANDS must be 5."
6714 #endif
6715
6716   found_reverse_match = 0;
6717   addr_prefix_disp = -1;
6718
6719   for (t = current_templates->start; t < current_templates->end; t++)
6720     {
6721       addr_prefix_disp = -1;
6722       found_reverse_match = 0;
6723
6724       /* Must have right number of operands.  */
6725       if (i.operands != t->operands)
6726         continue;
6727
6728       /* Check processor support.  */
6729       specific_error = progress (unsupported);
6730       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
6731         continue;
6732
6733       /* Check AT&T mnemonic.   */
6734       specific_error = progress (unsupported_with_intel_mnemonic);
6735       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
6736         continue;
6737
6738       /* Check AT&T/Intel syntax.  */
6739       specific_error = progress (unsupported_syntax);
6740       if ((intel_syntax && t->opcode_modifier.attsyntax)
6741           || (!intel_syntax && t->opcode_modifier.intelsyntax))
6742         continue;
6743
6744       /* Check Intel64/AMD64 ISA.   */
6745       switch (isa64)
6746         {
6747         default:
6748           /* Default: Don't accept Intel64.  */
6749           if (t->opcode_modifier.isa64 == INTEL64)
6750             continue;
6751           break;
6752         case amd64:
6753           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
6754           if (t->opcode_modifier.isa64 >= INTEL64)
6755             continue;
6756           break;
6757         case intel64:
6758           /* -mintel64: Don't accept AMD64.  */
6759           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
6760             continue;
6761           break;
6762         }
6763
6764       /* Check the suffix.  */
6765       specific_error = progress (invalid_instruction_suffix);
6766       if ((t->opcode_modifier.no_bsuf && mnem_suffix == BYTE_MNEM_SUFFIX)
6767           || (t->opcode_modifier.no_wsuf && mnem_suffix == WORD_MNEM_SUFFIX)
6768           || (t->opcode_modifier.no_lsuf && mnem_suffix == LONG_MNEM_SUFFIX)
6769           || (t->opcode_modifier.no_ssuf && mnem_suffix == SHORT_MNEM_SUFFIX)
6770           || (t->opcode_modifier.no_qsuf && mnem_suffix == QWORD_MNEM_SUFFIX))
6771         continue;
6772
6773       specific_error = progress (operand_size_mismatch);
6774       size_match = operand_size_match (t);
6775       if (!size_match)
6776         continue;
6777
6778       /* This is intentionally not
6779
6780          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
6781
6782          as the case of a missing * on the operand is accepted (perhaps with
6783          a warning, issued further down).  */
6784       specific_error = progress (operand_type_mismatch);
6785       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
6786         continue;
6787
6788       /* In Intel syntax, normally we can check for memory operand size when
6789          there is no mnemonic suffix.  But jmp and call have 2 different
6790          encodings with Dword memory operand size.  Skip the "near" one
6791          (permitting a register operand) when "far" was requested.  */
6792       if (i.far_branch
6793           && t->opcode_modifier.jump == JUMP_ABSOLUTE
6794           && t->operand_types[0].bitfield.class == Reg)
6795         continue;
6796
6797       for (j = 0; j < MAX_OPERANDS; j++)
6798         operand_types[j] = t->operand_types[j];
6799
6800       /* In general, don't allow 32-bit operands on pre-386.  */
6801       specific_error = progress (mnem_suffix ? invalid_instruction_suffix
6802                                              : operand_size_mismatch);
6803       j = i.imm_operands + (t->operands > i.imm_operands + 1);
6804       if (i.suffix == LONG_MNEM_SUFFIX
6805           && !cpu_arch_flags.bitfield.cpui386
6806           && (intel_syntax
6807               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
6808                  && !intel_float_operand (insn_name (t)))
6809               : intel_float_operand (insn_name (t)) != 2)
6810           && (t->operands == i.imm_operands
6811               || (operand_types[i.imm_operands].bitfield.class != RegMMX
6812                && operand_types[i.imm_operands].bitfield.class != RegSIMD
6813                && operand_types[i.imm_operands].bitfield.class != RegMask)
6814               || (operand_types[j].bitfield.class != RegMMX
6815                   && operand_types[j].bitfield.class != RegSIMD
6816                   && operand_types[j].bitfield.class != RegMask))
6817           && !t->opcode_modifier.sib)
6818         continue;
6819
6820       /* Do not verify operands when there are none.  */
6821       if (!t->operands)
6822         {
6823           if (VEX_check_encoding (t))
6824             {
6825               specific_error = progress (i.error);
6826               continue;
6827             }
6828
6829           /* We've found a match; break out of loop.  */
6830           break;
6831         }
6832
6833       if (!t->opcode_modifier.jump
6834           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
6835         {
6836           /* There should be only one Disp operand.  */
6837           for (j = 0; j < MAX_OPERANDS; j++)
6838             if (operand_type_check (operand_types[j], disp))
6839               break;
6840           if (j < MAX_OPERANDS)
6841             {
6842               bool override = (i.prefix[ADDR_PREFIX] != 0);
6843
6844               addr_prefix_disp = j;
6845
6846               /* Address size prefix will turn Disp64 operand into Disp32 and
6847                  Disp32/Disp16 one into Disp16/Disp32 respectively.  */
6848               switch (flag_code)
6849                 {
6850                 case CODE_16BIT:
6851                   override = !override;
6852                   /* Fall through.  */
6853                 case CODE_32BIT:
6854                   if (operand_types[j].bitfield.disp32
6855                       && operand_types[j].bitfield.disp16)
6856                     {
6857                       operand_types[j].bitfield.disp16 = override;
6858                       operand_types[j].bitfield.disp32 = !override;
6859                     }
6860                   gas_assert (!operand_types[j].bitfield.disp64);
6861                   break;
6862
6863                 case CODE_64BIT:
6864                   if (operand_types[j].bitfield.disp64)
6865                     {
6866                       gas_assert (!operand_types[j].bitfield.disp32);
6867                       operand_types[j].bitfield.disp32 = override;
6868                       operand_types[j].bitfield.disp64 = !override;
6869                     }
6870                   operand_types[j].bitfield.disp16 = 0;
6871                   break;
6872                 }
6873             }
6874         }
6875
6876       /* We check register size if needed.  */
6877       if (t->opcode_modifier.checkoperandsize)
6878         {
6879           check_register = (1 << t->operands) - 1;
6880           if (i.broadcast.type || i.broadcast.bytes)
6881             check_register &= ~(1 << i.broadcast.operand);
6882         }
6883       else
6884         check_register = 0;
6885
6886       overlap0 = operand_type_and (i.types[0], operand_types[0]);
6887       switch (t->operands)
6888         {
6889         case 1:
6890           if (!operand_type_match (overlap0, i.types[0]))
6891             continue;
6892
6893           /* Allow the ModR/M encoding to be requested by using the {load} or
6894              {store} pseudo prefix on an applicable insn.  */
6895           if (!t->opcode_modifier.modrm
6896               && i.reg_operands == 1
6897               && ((i.dir_encoding == dir_encoding_load
6898                    && t->mnem_off != MN_pop)
6899                   || (i.dir_encoding == dir_encoding_store
6900                       && t->mnem_off != MN_push))
6901               /* Avoid BSWAP.  */
6902               && t->mnem_off != MN_bswap)
6903             continue;
6904           break;
6905
6906         case 2:
6907           /* xchg %eax, %eax is a special case. It is an alias for nop
6908              only in 32bit mode and we can use opcode 0x90.  In 64bit
6909              mode, we can't use 0x90 for xchg %eax, %eax since it should
6910              zero-extend %eax to %rax.  */
6911           if (t->base_opcode == 0x90
6912               && t->opcode_space == SPACE_BASE)
6913             {
6914               if (flag_code == CODE_64BIT
6915                   && i.types[0].bitfield.instance == Accum
6916                   && i.types[0].bitfield.dword
6917                   && i.types[1].bitfield.instance == Accum)
6918                 continue;
6919
6920               /* Allow the ModR/M encoding to be requested by using the
6921                  {load} or {store} pseudo prefix.  */
6922               if (i.dir_encoding == dir_encoding_load
6923                   || i.dir_encoding == dir_encoding_store)
6924                 continue;
6925             }
6926
6927           if (t->base_opcode == MOV_AX_DISP32
6928               && t->opcode_space == SPACE_BASE
6929               && t->mnem_off != MN_movabs)
6930             {
6931               /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
6932               if (i.reloc[0] == BFD_RELOC_386_GOT32)
6933                 continue;
6934
6935               /* xrelease mov %eax, <disp> is another special case. It must not
6936                  match the accumulator-only encoding of mov.  */
6937               if (i.hle_prefix)
6938                 continue;
6939
6940               /* Allow the ModR/M encoding to be requested by using a suitable
6941                  {load} or {store} pseudo prefix.  */
6942               if (i.dir_encoding == (i.types[0].bitfield.instance == Accum
6943                                      ? dir_encoding_store
6944                                      : dir_encoding_load)
6945                   && !i.types[0].bitfield.disp64
6946                   && !i.types[1].bitfield.disp64)
6947                 continue;
6948             }
6949
6950           /* Allow the ModR/M encoding to be requested by using the {load} or
6951              {store} pseudo prefix on an applicable insn.  */
6952           if (!t->opcode_modifier.modrm
6953               && i.reg_operands == 1
6954               && i.imm_operands == 1
6955               && (i.dir_encoding == dir_encoding_load
6956                   || i.dir_encoding == dir_encoding_store)
6957               && t->opcode_space == SPACE_BASE)
6958             {
6959               if (t->base_opcode == 0xb0 /* mov $imm, %reg */
6960                   && i.dir_encoding == dir_encoding_store)
6961                 continue;
6962
6963               if ((t->base_opcode | 0x38) == 0x3c /* <alu> $imm, %acc */
6964                   && (t->base_opcode != 0x3c /* cmp $imm, %acc */
6965                       || i.dir_encoding == dir_encoding_load))
6966                 continue;
6967
6968               if (t->base_opcode == 0xa8 /* test $imm, %acc */
6969                   && i.dir_encoding == dir_encoding_load)
6970                 continue;
6971             }
6972           /* Fall through.  */
6973
6974         case 3:
6975           if (!(size_match & MATCH_STRAIGHT))
6976             goto check_reverse;
6977           /* Reverse direction of operands if swapping is possible in the first
6978              place (operands need to be symmetric) and
6979              - the load form is requested, and the template is a store form,
6980              - the store form is requested, and the template is a load form,
6981              - the non-default (swapped) form is requested.  */
6982           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
6983           if (t->opcode_modifier.d && i.reg_operands == i.operands
6984               && !operand_type_all_zero (&overlap1))
6985             switch (i.dir_encoding)
6986               {
6987               case dir_encoding_load:
6988                 if (operand_type_check (operand_types[i.operands - 1], anymem)
6989                     || t->opcode_modifier.regmem)
6990                   goto check_reverse;
6991                 break;
6992
6993               case dir_encoding_store:
6994                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
6995                     && !t->opcode_modifier.regmem)
6996                   goto check_reverse;
6997                 break;
6998
6999               case dir_encoding_swap:
7000                 goto check_reverse;
7001
7002               case dir_encoding_default:
7003                 break;
7004               }
7005           /* If we want store form, we skip the current load.  */
7006           if ((i.dir_encoding == dir_encoding_store
7007                || i.dir_encoding == dir_encoding_swap)
7008               && i.mem_operands == 0
7009               && t->opcode_modifier.load)
7010             continue;
7011           /* Fall through.  */
7012         case 4:
7013         case 5:
7014           overlap1 = operand_type_and (i.types[1], operand_types[1]);
7015           if (!operand_type_match (overlap0, i.types[0])
7016               || !operand_type_match (overlap1, i.types[1])
7017               || ((check_register & 3) == 3
7018                   && !operand_type_register_match (i.types[0],
7019                                                    operand_types[0],
7020                                                    i.types[1],
7021                                                    operand_types[1])))
7022             {
7023               specific_error = progress (i.error);
7024
7025               /* Check if other direction is valid ...  */
7026               if (!t->opcode_modifier.d)
7027                 continue;
7028
7029             check_reverse:
7030               if (!(size_match & MATCH_REVERSE))
7031                 continue;
7032               /* Try reversing direction of operands.  */
7033               j = t->cpu_flags.bitfield.cpufma4
7034                   || t->cpu_flags.bitfield.cpuxop ? 1 : i.operands - 1;
7035               overlap0 = operand_type_and (i.types[0], operand_types[j]);
7036               overlap1 = operand_type_and (i.types[j], operand_types[0]);
7037               overlap2 = operand_type_and (i.types[1], operand_types[1]);
7038               gas_assert (t->operands != 3 || !check_register);
7039               if (!operand_type_match (overlap0, i.types[0])
7040                   || !operand_type_match (overlap1, i.types[j])
7041                   || (t->operands == 3
7042                       && !operand_type_match (overlap2, i.types[1]))
7043                   || (check_register
7044                       && !operand_type_register_match (i.types[0],
7045                                                        operand_types[j],
7046                                                        i.types[j],
7047                                                        operand_types[0])))
7048                 {
7049                   /* Does not match either direction.  */
7050                   specific_error = progress (i.error);
7051                   continue;
7052                 }
7053               /* found_reverse_match holds which variant of D
7054                  we've found.  */
7055               if (!t->opcode_modifier.d)
7056                 found_reverse_match = 0;
7057               else if (operand_types[0].bitfield.tbyte)
7058                 {
7059                   if (t->opcode_modifier.operandconstraint != UGH)
7060                     found_reverse_match = Opcode_FloatD;
7061                   else
7062                     found_reverse_match = ~0;
7063                   /* FSUB{,R} and FDIV{,R} may need a 2nd bit flipped.  */
7064                   if ((t->extension_opcode & 4)
7065                       && (intel_syntax || intel_mnemonic))
7066                     found_reverse_match |= Opcode_FloatR;
7067                 }
7068               else if (t->cpu_flags.bitfield.cpufma4
7069                        || t->cpu_flags.bitfield.cpuxop)
7070                 {
7071                   found_reverse_match = Opcode_VexW;
7072                   goto check_operands_345;
7073                 }
7074               else if (t->opcode_space != SPACE_BASE
7075                        && (t->opcode_space != SPACE_0F
7076                            /* MOV to/from CR/DR/TR, as an exception, follow
7077                               the base opcode space encoding model.  */
7078                            || (t->base_opcode | 7) != 0x27))
7079                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
7080                                       ? Opcode_ExtD : Opcode_SIMD_IntD;
7081               else if (!t->opcode_modifier.commutative)
7082                 found_reverse_match = Opcode_D;
7083               else
7084                 found_reverse_match = ~0;
7085             }
7086           else
7087             {
7088               /* Found a forward 2 operand match here.  */
7089             check_operands_345:
7090               switch (t->operands)
7091                 {
7092                 case 5:
7093                   overlap4 = operand_type_and (i.types[4], operand_types[4]);
7094                   if (!operand_type_match (overlap4, i.types[4])
7095                       || !operand_type_register_match (i.types[3],
7096                                                        operand_types[3],
7097                                                        i.types[4],
7098                                                        operand_types[4]))
7099                     {
7100                       specific_error = progress (i.error);
7101                       continue;
7102                     }
7103                   /* Fall through.  */
7104                 case 4:
7105                   overlap3 = operand_type_and (i.types[3], operand_types[3]);
7106                   if (!operand_type_match (overlap3, i.types[3])
7107                       || ((check_register & 0xa) == 0xa
7108                           && !operand_type_register_match (i.types[1],
7109                                                             operand_types[1],
7110                                                             i.types[3],
7111                                                             operand_types[3]))
7112                       || ((check_register & 0xc) == 0xc
7113                           && !operand_type_register_match (i.types[2],
7114                                                             operand_types[2],
7115                                                             i.types[3],
7116                                                             operand_types[3])))
7117                     {
7118                       specific_error = progress (i.error);
7119                       continue;
7120                     }
7121                   /* Fall through.  */
7122                 case 3:
7123                   overlap2 = operand_type_and (i.types[2], operand_types[2]);
7124                   if (!operand_type_match (overlap2, i.types[2])
7125                       || ((check_register & 5) == 5
7126                           && !operand_type_register_match (i.types[0],
7127                                                             operand_types[0],
7128                                                             i.types[2],
7129                                                             operand_types[2]))
7130                       || ((check_register & 6) == 6
7131                           && !operand_type_register_match (i.types[1],
7132                                                             operand_types[1],
7133                                                             i.types[2],
7134                                                             operand_types[2])))
7135                     {
7136                       specific_error = progress (i.error);
7137                       continue;
7138                     }
7139                   break;
7140                 }
7141             }
7142           /* Found either forward/reverse 2, 3 or 4 operand match here:
7143              slip through to break.  */
7144         }
7145
7146       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
7147       if (VEX_check_encoding (t))
7148         {
7149           specific_error = progress (i.error);
7150           continue;
7151         }
7152
7153       /* Check if vector operands are valid.  */
7154       if (check_VecOperands (t))
7155         {
7156           specific_error = progress (i.error);
7157           continue;
7158         }
7159
7160       /* We've found a match; break out of loop.  */
7161       break;
7162     }
7163
7164 #undef progress
7165
7166   if (t == current_templates->end)
7167     {
7168       /* We found no match.  */
7169       i.error = specific_error;
7170       return NULL;
7171     }
7172
7173   if (!quiet_warnings)
7174     {
7175       if (!intel_syntax
7176           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
7177         as_warn (_("indirect %s without `*'"), insn_name (t));
7178
7179       if (t->opcode_modifier.isprefix
7180           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
7181         {
7182           /* Warn them that a data or address size prefix doesn't
7183              affect assembly of the next line of code.  */
7184           as_warn (_("stand-alone `%s' prefix"), insn_name (t));
7185         }
7186     }
7187
7188   /* Copy the template we found.  */
7189   install_template (t);
7190
7191   if (addr_prefix_disp != -1)
7192     i.tm.operand_types[addr_prefix_disp]
7193       = operand_types[addr_prefix_disp];
7194
7195   switch (found_reverse_match)
7196     {
7197     case 0:
7198       break;
7199
7200     case Opcode_FloatR:
7201     case Opcode_FloatR | Opcode_FloatD:
7202       i.tm.extension_opcode ^= Opcode_FloatR >> 3;
7203       found_reverse_match &= Opcode_FloatD;
7204
7205       /* Fall through.  */
7206     default:
7207       /* If we found a reverse match we must alter the opcode direction
7208          bit and clear/flip the regmem modifier one.  found_reverse_match
7209          holds bits to change (different for int & float insns).  */
7210
7211       i.tm.base_opcode ^= found_reverse_match;
7212
7213       /* Certain SIMD insns have their load forms specified in the opcode
7214          table, and hence we need to _set_ RegMem instead of clearing it.
7215          We need to avoid setting the bit though on insns like KMOVW.  */
7216       i.tm.opcode_modifier.regmem
7217         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
7218           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
7219           && !i.tm.opcode_modifier.regmem;
7220
7221       /* Fall through.  */
7222     case ~0:
7223       i.tm.operand_types[0] = operand_types[i.operands - 1];
7224       i.tm.operand_types[i.operands - 1] = operand_types[0];
7225       break;
7226
7227     case Opcode_VexW:
7228       /* Only the first two register operands need reversing, alongside
7229          flipping VEX.W.  */
7230       i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;
7231
7232       j = i.tm.operand_types[0].bitfield.imm8;
7233       i.tm.operand_types[j] = operand_types[j + 1];
7234       i.tm.operand_types[j + 1] = operand_types[j];
7235       break;
7236     }
7237
7238   return t;
7239 }
7240
7241 static int
7242 check_string (void)
7243 {
7244   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
7245   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
7246
7247   if (i.seg[op] != NULL && i.seg[op] != reg_es)
7248     {
7249       as_bad (_("`%s' operand %u must use `%ses' segment"),
7250               insn_name (&i.tm),
7251               intel_syntax ? i.tm.operands - es_op : es_op + 1,
7252               register_prefix);
7253       return 0;
7254     }
7255
7256   /* There's only ever one segment override allowed per instruction.
7257      This instruction possibly has a legal segment override on the
7258      second operand, so copy the segment to where non-string
7259      instructions store it, allowing common code.  */
7260   i.seg[op] = i.seg[1];
7261
7262   return 1;
7263 }
7264
7265 static int
7266 process_suffix (void)
7267 {
7268   bool is_movx = false;
7269
7270   /* If matched instruction specifies an explicit instruction mnemonic
7271      suffix, use it.  */
7272   if (i.tm.opcode_modifier.size == SIZE16)
7273     i.suffix = WORD_MNEM_SUFFIX;
7274   else if (i.tm.opcode_modifier.size == SIZE32)
7275     i.suffix = LONG_MNEM_SUFFIX;
7276   else if (i.tm.opcode_modifier.size == SIZE64)
7277     i.suffix = QWORD_MNEM_SUFFIX;
7278   else if (i.reg_operands
7279            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
7280            && i.tm.opcode_modifier.operandconstraint != ADDR_PREFIX_OP_REG)
7281     {
7282       unsigned int numop = i.operands;
7283
7284       /* MOVSX/MOVZX */
7285       is_movx = (i.tm.opcode_space == SPACE_0F
7286                  && (i.tm.base_opcode | 8) == 0xbe)
7287                 || (i.tm.opcode_space == SPACE_BASE
7288                     && i.tm.base_opcode == 0x63
7289                     && i.tm.cpu_flags.bitfield.cpu64);
7290
7291       /* movsx/movzx want only their source operand considered here, for the
7292          ambiguity checking below.  The suffix will be replaced afterwards
7293          to represent the destination (register).  */
7294       if (is_movx && (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63))
7295         --i.operands;
7296
7297       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
7298       if (i.tm.mnem_off == MN_crc32 && i.tm.operand_types[1].bitfield.qword)
7299         i.rex |= REX_W;
7300
7301       /* If there's no instruction mnemonic suffix we try to invent one
7302          based on GPR operands.  */
7303       if (!i.suffix)
7304         {
7305           /* We take i.suffix from the last register operand specified,
7306              Destination register type is more significant than source
7307              register type.  crc32 in SSE4.2 prefers source register
7308              type. */
7309           unsigned int op = i.tm.mnem_off == MN_crc32 ? 1 : i.operands;
7310
7311           while (op--)
7312             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
7313                 || i.tm.operand_types[op].bitfield.instance == Accum)
7314               {
7315                 if (i.types[op].bitfield.class != Reg)
7316                   continue;
7317                 if (i.types[op].bitfield.byte)
7318                   i.suffix = BYTE_MNEM_SUFFIX;
7319                 else if (i.types[op].bitfield.word)
7320                   i.suffix = WORD_MNEM_SUFFIX;
7321                 else if (i.types[op].bitfield.dword)
7322                   i.suffix = LONG_MNEM_SUFFIX;
7323                 else if (i.types[op].bitfield.qword)
7324                   i.suffix = QWORD_MNEM_SUFFIX;
7325                 else
7326                   continue;
7327                 break;
7328               }
7329
7330           /* As an exception, movsx/movzx silently default to a byte source
7331              in AT&T mode.  */
7332           if (is_movx && i.tm.opcode_modifier.w && !i.suffix && !intel_syntax)
7333             i.suffix = BYTE_MNEM_SUFFIX;
7334         }
7335       else if (i.suffix == BYTE_MNEM_SUFFIX)
7336         {
7337           if (!check_byte_reg ())
7338             return 0;
7339         }
7340       else if (i.suffix == LONG_MNEM_SUFFIX)
7341         {
7342           if (!check_long_reg ())
7343             return 0;
7344         }
7345       else if (i.suffix == QWORD_MNEM_SUFFIX)
7346         {
7347           if (!check_qword_reg ())
7348             return 0;
7349         }
7350       else if (i.suffix == WORD_MNEM_SUFFIX)
7351         {
7352           if (!check_word_reg ())
7353             return 0;
7354         }
7355       else if (intel_syntax
7356                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
7357         /* Do nothing if the instruction is going to ignore the prefix.  */
7358         ;
7359       else
7360         abort ();
7361
7362       /* Undo the movsx/movzx change done above.  */
7363       i.operands = numop;
7364     }
7365   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
7366            && !i.suffix)
7367     {
7368       i.suffix = stackop_size;
7369       if (stackop_size == LONG_MNEM_SUFFIX)
7370         {
7371           /* stackop_size is set to LONG_MNEM_SUFFIX for the
7372              .code16gcc directive to support 16-bit mode with
7373              32-bit address.  For IRET without a suffix, generate
7374              16-bit IRET (opcode 0xcf) to return from an interrupt
7375              handler.  */
7376           if (i.tm.base_opcode == 0xcf)
7377             {
7378               i.suffix = WORD_MNEM_SUFFIX;
7379               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
7380             }
7381           /* Warn about changed behavior for segment register push/pop.  */
7382           else if ((i.tm.base_opcode | 1) == 0x07)
7383             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
7384                      insn_name (&i.tm));
7385         }
7386     }
7387   else if (!i.suffix
7388            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
7389                || i.tm.opcode_modifier.jump == JUMP_BYTE
7390                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
7391                || (i.tm.opcode_space == SPACE_0F
7392                    && i.tm.base_opcode == 0x01 /* [ls][gi]dt */
7393                    && i.tm.extension_opcode <= 3)))
7394     {
7395       switch (flag_code)
7396         {
7397         case CODE_64BIT:
7398           if (!i.tm.opcode_modifier.no_qsuf)
7399             {
7400               if (i.tm.opcode_modifier.jump == JUMP_BYTE
7401                   || i.tm.opcode_modifier.no_lsuf)
7402                 i.suffix = QWORD_MNEM_SUFFIX;
7403               break;
7404             }
7405           /* Fall through.  */
7406         case CODE_32BIT:
7407           if (!i.tm.opcode_modifier.no_lsuf)
7408             i.suffix = LONG_MNEM_SUFFIX;
7409           break;
7410         case CODE_16BIT:
7411           if (!i.tm.opcode_modifier.no_wsuf)
7412             i.suffix = WORD_MNEM_SUFFIX;
7413           break;
7414         }
7415     }
7416
7417   if (!i.suffix
7418       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7419           /* Also cover lret/retf/iret in 64-bit mode.  */
7420           || (flag_code == CODE_64BIT
7421               && !i.tm.opcode_modifier.no_lsuf
7422               && !i.tm.opcode_modifier.no_qsuf))
7423       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7424       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
7425       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
7426       /* Accept FLDENV et al without suffix.  */
7427       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
7428     {
7429       unsigned int suffixes, evex = 0;
7430
7431       suffixes = !i.tm.opcode_modifier.no_bsuf;
7432       if (!i.tm.opcode_modifier.no_wsuf)
7433         suffixes |= 1 << 1;
7434       if (!i.tm.opcode_modifier.no_lsuf)
7435         suffixes |= 1 << 2;
7436       if (!i.tm.opcode_modifier.no_ssuf)
7437         suffixes |= 1 << 4;
7438       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
7439         suffixes |= 1 << 5;
7440
7441       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
7442          also suitable for AT&T syntax mode, it was requested that this be
7443          restricted to just Intel syntax.  */
7444       if (intel_syntax && is_any_vex_encoding (&i.tm)
7445           && !i.broadcast.type && !i.broadcast.bytes)
7446         {
7447           unsigned int op;
7448
7449           for (op = 0; op < i.tm.operands; ++op)
7450             {
7451               if (is_evex_encoding (&i.tm)
7452                   && !cpu_arch_flags.bitfield.cpuavx512vl)
7453                 {
7454                   if (i.tm.operand_types[op].bitfield.ymmword)
7455                     i.tm.operand_types[op].bitfield.xmmword = 0;
7456                   if (i.tm.operand_types[op].bitfield.zmmword)
7457                     i.tm.operand_types[op].bitfield.ymmword = 0;
7458                   if (!i.tm.opcode_modifier.evex
7459                       || i.tm.opcode_modifier.evex == EVEXDYN)
7460                     i.tm.opcode_modifier.evex = EVEX512;
7461                 }
7462
7463               if (i.tm.operand_types[op].bitfield.xmmword
7464                   + i.tm.operand_types[op].bitfield.ymmword
7465                   + i.tm.operand_types[op].bitfield.zmmword < 2)
7466                 continue;
7467
7468               /* Any properly sized operand disambiguates the insn.  */
7469               if (i.types[op].bitfield.xmmword
7470                   || i.types[op].bitfield.ymmword
7471                   || i.types[op].bitfield.zmmword)
7472                 {
7473                   suffixes &= ~(7 << 6);
7474                   evex = 0;
7475                   break;
7476                 }
7477
7478               if ((i.flags[op] & Operand_Mem)
7479                   && i.tm.operand_types[op].bitfield.unspecified)
7480                 {
7481                   if (i.tm.operand_types[op].bitfield.xmmword)
7482                     suffixes |= 1 << 6;
7483                   if (i.tm.operand_types[op].bitfield.ymmword)
7484                     suffixes |= 1 << 7;
7485                   if (i.tm.operand_types[op].bitfield.zmmword)
7486                     suffixes |= 1 << 8;
7487                   if (is_evex_encoding (&i.tm))
7488                     evex = EVEX512;
7489                 }
7490             }
7491         }
7492
7493       /* Are multiple suffixes / operand sizes allowed?  */
7494       if (suffixes & (suffixes - 1))
7495         {
7496           if (intel_syntax
7497               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7498                   || operand_check == check_error))
7499             {
7500               as_bad (_("ambiguous operand size for `%s'"), insn_name (&i.tm));
7501               return 0;
7502             }
7503           if (operand_check == check_error)
7504             {
7505               as_bad (_("no instruction mnemonic suffix given and "
7506                         "no register operands; can't size `%s'"), insn_name (&i.tm));
7507               return 0;
7508             }
7509           if (operand_check == check_warning)
7510             as_warn (_("%s; using default for `%s'"),
7511                        intel_syntax
7512                        ? _("ambiguous operand size")
7513                        : _("no instruction mnemonic suffix given and "
7514                            "no register operands"),
7515                        insn_name (&i.tm));
7516
7517           if (i.tm.opcode_modifier.floatmf)
7518             i.suffix = SHORT_MNEM_SUFFIX;
7519           else if (is_movx)
7520             /* handled below */;
7521           else if (evex)
7522             i.tm.opcode_modifier.evex = evex;
7523           else if (flag_code == CODE_16BIT)
7524             i.suffix = WORD_MNEM_SUFFIX;
7525           else if (!i.tm.opcode_modifier.no_lsuf)
7526             i.suffix = LONG_MNEM_SUFFIX;
7527           else
7528             i.suffix = QWORD_MNEM_SUFFIX;
7529         }
7530     }
7531
7532   if (is_movx)
7533     {
7534       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
7535          In AT&T syntax, if there is no suffix (warned about above), the default
7536          will be byte extension.  */
7537       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
7538         i.tm.base_opcode |= 1;
7539
7540       /* For further processing, the suffix should represent the destination
7541          (register).  This is already the case when one was used with
7542          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
7543          no suffix to begin with.  */
7544       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
7545         {
7546           if (i.types[1].bitfield.word)
7547             i.suffix = WORD_MNEM_SUFFIX;
7548           else if (i.types[1].bitfield.qword)
7549             i.suffix = QWORD_MNEM_SUFFIX;
7550           else
7551             i.suffix = LONG_MNEM_SUFFIX;
7552
7553           i.tm.opcode_modifier.w = 0;
7554         }
7555     }
7556
7557   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
7558     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
7559                    != (i.tm.operand_types[1].bitfield.class == Reg);
7560
7561   /* Change the opcode based on the operand size given by i.suffix.  */
7562   switch (i.suffix)
7563     {
7564     /* Size floating point instruction.  */
7565     case LONG_MNEM_SUFFIX:
7566       if (i.tm.opcode_modifier.floatmf)
7567         {
7568           i.tm.base_opcode ^= 4;
7569           break;
7570         }
7571     /* fall through */
7572     case WORD_MNEM_SUFFIX:
7573     case QWORD_MNEM_SUFFIX:
7574       /* It's not a byte, select word/dword operation.  */
7575       if (i.tm.opcode_modifier.w)
7576         {
7577           if (i.short_form)
7578             i.tm.base_opcode |= 8;
7579           else
7580             i.tm.base_opcode |= 1;
7581         }
7582     /* fall through */
7583     case SHORT_MNEM_SUFFIX:
7584       /* Now select between word & dword operations via the operand
7585          size prefix, except for instructions that will ignore this
7586          prefix anyway.  */
7587       if (i.suffix != QWORD_MNEM_SUFFIX
7588           && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7589           && !i.tm.opcode_modifier.floatmf
7590           && !is_any_vex_encoding (&i.tm)
7591           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
7592               || (flag_code == CODE_64BIT
7593                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
7594         {
7595           unsigned int prefix = DATA_PREFIX_OPCODE;
7596
7597           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
7598             prefix = ADDR_PREFIX_OPCODE;
7599
7600           if (!add_prefix (prefix))
7601             return 0;
7602         }
7603
7604       /* Set mode64 for an operand.  */
7605       if (i.suffix == QWORD_MNEM_SUFFIX
7606           && flag_code == CODE_64BIT
7607           && !i.tm.opcode_modifier.norex64
7608           && !i.tm.opcode_modifier.vexw
7609           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
7610              need rex64. */
7611           && ! (i.operands == 2
7612                 && i.tm.base_opcode == 0x90
7613                 && i.tm.opcode_space == SPACE_BASE
7614                 && i.types[0].bitfield.instance == Accum
7615                 && i.types[0].bitfield.qword
7616                 && i.types[1].bitfield.instance == Accum))
7617         i.rex |= REX_W;
7618
7619       break;
7620
7621     case 0:
7622       /* Select word/dword/qword operation with explicit data sizing prefix
7623          when there are no suitable register operands.  */
7624       if (i.tm.opcode_modifier.w
7625           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
7626           && (!i.reg_operands
7627               || (i.reg_operands == 1
7628                       /* ShiftCount */
7629                   && (i.tm.operand_types[0].bitfield.instance == RegC
7630                       /* InOutPortReg */
7631                       || i.tm.operand_types[0].bitfield.instance == RegD
7632                       || i.tm.operand_types[1].bitfield.instance == RegD
7633                       || i.tm.mnem_off == MN_crc32))))
7634         i.tm.base_opcode |= 1;
7635       break;
7636     }
7637
7638   if (i.tm.opcode_modifier.operandconstraint == ADDR_PREFIX_OP_REG)
7639     {
7640       gas_assert (!i.suffix);
7641       gas_assert (i.reg_operands);
7642
7643       if (i.tm.operand_types[0].bitfield.instance == Accum
7644           || i.operands == 1)
7645         {
7646           /* The address size override prefix changes the size of the
7647              first operand.  */
7648           if (flag_code == CODE_64BIT
7649               && i.op[0].regs->reg_type.bitfield.word)
7650             {
7651               as_bad (_("16-bit addressing unavailable for `%s'"),
7652                       insn_name (&i.tm));
7653               return 0;
7654             }
7655
7656           if ((flag_code == CODE_32BIT
7657                ? i.op[0].regs->reg_type.bitfield.word
7658                : i.op[0].regs->reg_type.bitfield.dword)
7659               && !add_prefix (ADDR_PREFIX_OPCODE))
7660             return 0;
7661         }
7662       else
7663         {
7664           /* Check invalid register operand when the address size override
7665              prefix changes the size of register operands.  */
7666           unsigned int op;
7667           enum { need_word, need_dword, need_qword } need;
7668
7669           /* Check the register operand for the address size prefix if
7670              the memory operand has no real registers, like symbol, DISP
7671              or bogus (x32-only) symbol(%rip) when symbol(%eip) is meant.  */
7672           if (i.mem_operands == 1
7673               && i.reg_operands == 1
7674               && i.operands == 2
7675               && i.types[1].bitfield.class == Reg
7676               && (flag_code == CODE_32BIT
7677                   ? i.op[1].regs->reg_type.bitfield.word
7678                   : i.op[1].regs->reg_type.bitfield.dword)
7679               && ((i.base_reg == NULL && i.index_reg == NULL)
7680 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
7681                   || (x86_elf_abi == X86_64_X32_ABI
7682                       && i.base_reg
7683                       && i.base_reg->reg_num == RegIP
7684                       && i.base_reg->reg_type.bitfield.qword))
7685 #else
7686                   || 0)
7687 #endif
7688               && !add_prefix (ADDR_PREFIX_OPCODE))
7689             return 0;
7690
7691           if (flag_code == CODE_32BIT)
7692             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
7693           else if (i.prefix[ADDR_PREFIX])
7694             need = need_dword;
7695           else
7696             need = flag_code == CODE_64BIT ? need_qword : need_word;
7697
7698           for (op = 0; op < i.operands; op++)
7699             {
7700               if (i.types[op].bitfield.class != Reg)
7701                 continue;
7702
7703               switch (need)
7704                 {
7705                 case need_word:
7706                   if (i.op[op].regs->reg_type.bitfield.word)
7707                     continue;
7708                   break;
7709                 case need_dword:
7710                   if (i.op[op].regs->reg_type.bitfield.dword)
7711                     continue;
7712                   break;
7713                 case need_qword:
7714                   if (i.op[op].regs->reg_type.bitfield.qword)
7715                     continue;
7716                   break;
7717                 }
7718
7719               as_bad (_("invalid register operand size for `%s'"),
7720                       insn_name (&i.tm));
7721               return 0;
7722             }
7723         }
7724     }
7725
7726   return 1;
7727 }
7728
7729 static int
7730 check_byte_reg (void)
7731 {
7732   int op;
7733
7734   for (op = i.operands; --op >= 0;)
7735     {
7736       /* Skip non-register operands. */
7737       if (i.types[op].bitfield.class != Reg)
7738         continue;
7739
7740       /* If this is an eight bit register, it's OK.  If it's the 16 or
7741          32 bit version of an eight bit register, we will just use the
7742          low portion, and that's OK too.  */
7743       if (i.types[op].bitfield.byte)
7744         continue;
7745
7746       /* I/O port address operands are OK too.  */
7747       if (i.tm.operand_types[op].bitfield.instance == RegD
7748           && i.tm.operand_types[op].bitfield.word)
7749         continue;
7750
7751       /* crc32 only wants its source operand checked here.  */
7752       if (i.tm.mnem_off == MN_crc32 && op != 0)
7753         continue;
7754
7755       /* Any other register is bad.  */
7756       as_bad (_("`%s%s' not allowed with `%s%c'"),
7757               register_prefix, i.op[op].regs->reg_name,
7758               insn_name (&i.tm), i.suffix);
7759       return 0;
7760     }
7761   return 1;
7762 }
7763
7764 static int
7765 check_long_reg (void)
7766 {
7767   int op;
7768
7769   for (op = i.operands; --op >= 0;)
7770     /* Skip non-register operands. */
7771     if (i.types[op].bitfield.class != Reg)
7772       continue;
7773     /* Reject eight bit registers, except where the template requires
7774        them. (eg. movzb)  */
7775     else if (i.types[op].bitfield.byte
7776              && (i.tm.operand_types[op].bitfield.class == Reg
7777                  || i.tm.operand_types[op].bitfield.instance == Accum)
7778              && (i.tm.operand_types[op].bitfield.word
7779                  || i.tm.operand_types[op].bitfield.dword))
7780       {
7781         as_bad (_("`%s%s' not allowed with `%s%c'"),
7782                 register_prefix,
7783                 i.op[op].regs->reg_name,
7784                 insn_name (&i.tm),
7785                 i.suffix);
7786         return 0;
7787       }
7788     /* Error if the e prefix on a general reg is missing.  */
7789     else if (i.types[op].bitfield.word
7790              && (i.tm.operand_types[op].bitfield.class == Reg
7791                  || i.tm.operand_types[op].bitfield.instance == Accum)
7792              && i.tm.operand_types[op].bitfield.dword)
7793       {
7794         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7795                 register_prefix, i.op[op].regs->reg_name,
7796                 i.suffix);
7797         return 0;
7798       }
7799     /* Warn if the r prefix on a general reg is present.  */
7800     else if (i.types[op].bitfield.qword
7801              && (i.tm.operand_types[op].bitfield.class == Reg
7802                  || i.tm.operand_types[op].bitfield.instance == Accum)
7803              && i.tm.operand_types[op].bitfield.dword)
7804       {
7805         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7806                 register_prefix, i.op[op].regs->reg_name, i.suffix);
7807         return 0;
7808       }
7809   return 1;
7810 }
7811
7812 static int
7813 check_qword_reg (void)
7814 {
7815   int op;
7816
7817   for (op = i.operands; --op >= 0; )
7818     /* Skip non-register operands. */
7819     if (i.types[op].bitfield.class != Reg)
7820       continue;
7821     /* Reject eight bit registers, except where the template requires
7822        them. (eg. movzb)  */
7823     else if (i.types[op].bitfield.byte
7824              && (i.tm.operand_types[op].bitfield.class == Reg
7825                  || i.tm.operand_types[op].bitfield.instance == Accum)
7826              && (i.tm.operand_types[op].bitfield.word
7827                  || i.tm.operand_types[op].bitfield.dword))
7828       {
7829         as_bad (_("`%s%s' not allowed with `%s%c'"),
7830                 register_prefix,
7831                 i.op[op].regs->reg_name,
7832                 insn_name (&i.tm),
7833                 i.suffix);
7834         return 0;
7835       }
7836     /* Warn if the r prefix on a general reg is missing.  */
7837     else if ((i.types[op].bitfield.word
7838               || i.types[op].bitfield.dword)
7839              && (i.tm.operand_types[op].bitfield.class == Reg
7840                  || i.tm.operand_types[op].bitfield.instance == Accum)
7841              && i.tm.operand_types[op].bitfield.qword)
7842       {
7843         /* Prohibit these changes in the 64bit mode, since the
7844            lowering is more complicated.  */
7845         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7846                 register_prefix, i.op[op].regs->reg_name, i.suffix);
7847         return 0;
7848       }
7849   return 1;
7850 }
7851
7852 static int
7853 check_word_reg (void)
7854 {
7855   int op;
7856   for (op = i.operands; --op >= 0;)
7857     /* Skip non-register operands. */
7858     if (i.types[op].bitfield.class != Reg)
7859       continue;
7860     /* Reject eight bit registers, except where the template requires
7861        them. (eg. movzb)  */
7862     else if (i.types[op].bitfield.byte
7863              && (i.tm.operand_types[op].bitfield.class == Reg
7864                  || i.tm.operand_types[op].bitfield.instance == Accum)
7865              && (i.tm.operand_types[op].bitfield.word
7866                  || i.tm.operand_types[op].bitfield.dword))
7867       {
7868         as_bad (_("`%s%s' not allowed with `%s%c'"),
7869                 register_prefix,
7870                 i.op[op].regs->reg_name,
7871                 insn_name (&i.tm),
7872                 i.suffix);
7873         return 0;
7874       }
7875     /* Error if the e or r prefix on a general reg is present.  */
7876     else if ((i.types[op].bitfield.dword
7877                  || i.types[op].bitfield.qword)
7878              && (i.tm.operand_types[op].bitfield.class == Reg
7879                  || i.tm.operand_types[op].bitfield.instance == Accum)
7880              && i.tm.operand_types[op].bitfield.word)
7881       {
7882         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7883                 register_prefix, i.op[op].regs->reg_name,
7884                 i.suffix);
7885         return 0;
7886       }
7887   return 1;
7888 }
7889
7890 static int
7891 update_imm (unsigned int j)
7892 {
7893   i386_operand_type overlap = i.types[j];
7894
7895   if (i.tm.operand_types[j].bitfield.imm8
7896       && i.tm.operand_types[j].bitfield.imm8s
7897       && overlap.bitfield.imm8 && overlap.bitfield.imm8s)
7898     {
7899       /* This combination is used on 8-bit immediates where e.g. $~0 is
7900          desirable to permit.  We're past operand type matching, so simply
7901          put things back in the shape they were before introducing the
7902          distinction between Imm8, Imm8S, and Imm8|Imm8S.  */
7903       overlap.bitfield.imm8s = 0;
7904     }
7905
7906   if (overlap.bitfield.imm8
7907       + overlap.bitfield.imm8s
7908       + overlap.bitfield.imm16
7909       + overlap.bitfield.imm32
7910       + overlap.bitfield.imm32s
7911       + overlap.bitfield.imm64 > 1)
7912     {
7913       static const i386_operand_type imm16 = { .bitfield = { .imm16 = 1 } };
7914       static const i386_operand_type imm32 = { .bitfield = { .imm32 = 1 } };
7915       static const i386_operand_type imm32s = { .bitfield = { .imm32s = 1 } };
7916       static const i386_operand_type imm16_32 = { .bitfield =
7917         { .imm16 = 1, .imm32 = 1 }
7918       };
7919       static const i386_operand_type imm16_32s =  { .bitfield =
7920         { .imm16 = 1, .imm32s = 1 }
7921       };
7922       static const i386_operand_type imm16_32_32s = { .bitfield =
7923         { .imm16 = 1, .imm32 = 1, .imm32s = 1 }
7924       };
7925
7926       if (i.suffix)
7927         {
7928           i386_operand_type temp;
7929
7930           operand_type_set (&temp, 0);
7931           if (i.suffix == BYTE_MNEM_SUFFIX)
7932             {
7933               temp.bitfield.imm8 = overlap.bitfield.imm8;
7934               temp.bitfield.imm8s = overlap.bitfield.imm8s;
7935             }
7936           else if (i.suffix == WORD_MNEM_SUFFIX)
7937             temp.bitfield.imm16 = overlap.bitfield.imm16;
7938           else if (i.suffix == QWORD_MNEM_SUFFIX)
7939             {
7940               temp.bitfield.imm64 = overlap.bitfield.imm64;
7941               temp.bitfield.imm32s = overlap.bitfield.imm32s;
7942             }
7943           else
7944             temp.bitfield.imm32 = overlap.bitfield.imm32;
7945           overlap = temp;
7946         }
7947       else if (operand_type_equal (&overlap, &imm16_32_32s)
7948                || operand_type_equal (&overlap, &imm16_32)
7949                || operand_type_equal (&overlap, &imm16_32s))
7950         {
7951           if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
7952             overlap = imm16;
7953           else
7954             overlap = imm32s;
7955         }
7956       else if (i.prefix[REX_PREFIX] & REX_W)
7957         overlap = operand_type_and (overlap, imm32s);
7958       else if (i.prefix[DATA_PREFIX])
7959         overlap = operand_type_and (overlap,
7960                                     flag_code != CODE_16BIT ? imm16 : imm32);
7961       if (overlap.bitfield.imm8
7962           + overlap.bitfield.imm8s
7963           + overlap.bitfield.imm16
7964           + overlap.bitfield.imm32
7965           + overlap.bitfield.imm32s
7966           + overlap.bitfield.imm64 != 1)
7967         {
7968           as_bad (_("no instruction mnemonic suffix given; "
7969                     "can't determine immediate size"));
7970           return 0;
7971         }
7972     }
7973   i.types[j] = overlap;
7974
7975   return 1;
7976 }
7977
7978 static int
7979 finalize_imm (void)
7980 {
7981   unsigned int j, n;
7982
7983   /* Update the first 2 immediate operands.  */
7984   n = i.operands > 2 ? 2 : i.operands;
7985   if (n)
7986     {
7987       for (j = 0; j < n; j++)
7988         if (update_imm (j) == 0)
7989           return 0;
7990
7991       /* The 3rd operand can't be immediate operand.  */
7992       gas_assert (operand_type_check (i.types[2], imm) == 0);
7993     }
7994
7995   return 1;
7996 }
7997
7998 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
7999                                  bool do_sse2avx)
8000 {
8001   if (r->reg_flags & RegRex)
8002     {
8003       if (i.rex & rex_bit)
8004         as_bad (_("same type of prefix used twice"));
8005       i.rex |= rex_bit;
8006     }
8007   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
8008     {
8009       gas_assert (i.vex.register_specifier == r);
8010       i.vex.register_specifier += 8;
8011     }
8012
8013   if (r->reg_flags & RegVRex)
8014     i.vrex |= rex_bit;
8015 }
8016
8017 static int
8018 process_operands (void)
8019 {
8020   /* Default segment register this instruction will use for memory
8021      accesses.  0 means unknown.  This is only for optimizing out
8022      unnecessary segment overrides.  */
8023   const reg_entry *default_seg = NULL;
8024
8025   /* We only need to check those implicit registers for instructions
8026      with 3 operands or less.  */
8027   if (i.operands <= 3)
8028     for (unsigned int j = 0; j < i.operands; j++)
8029       if (i.types[j].bitfield.instance != InstanceNone)
8030         i.reg_operands--;
8031
8032   if (i.tm.opcode_modifier.sse2avx)
8033     {
8034       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
8035          need converting.  */
8036       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
8037       i.prefix[REX_PREFIX] = 0;
8038       i.rex_encoding = 0;
8039     }
8040   /* ImmExt should be processed after SSE2AVX.  */
8041   else if (i.tm.opcode_modifier.immext)
8042     process_immext ();
8043
8044   /* TILEZERO is unusual in that it has a single operand encoded in ModR/M.reg,
8045      not ModR/M.rm.  To avoid special casing this in build_modrm_byte(), fake a
8046      new destination operand here, while converting the source one to register
8047      number 0.  */
8048   if (i.tm.mnem_off == MN_tilezero)
8049     {
8050       i.op[1].regs = i.op[0].regs;
8051       i.op[0].regs -= i.op[0].regs->reg_num;
8052       i.types[1] = i.types[0];
8053       i.tm.operand_types[1] = i.tm.operand_types[0];
8054       i.flags[1] = i.flags[0];
8055       i.operands++;
8056       i.reg_operands++;
8057       i.tm.operands++;
8058     }
8059
8060   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
8061     {
8062       static const i386_operand_type regxmm = {
8063         .bitfield = { .class = RegSIMD, .xmmword = 1 }
8064       };
8065       unsigned int dupl = i.operands;
8066       unsigned int dest = dupl - 1;
8067       unsigned int j;
8068
8069       /* The destination must be an xmm register.  */
8070       gas_assert (i.reg_operands
8071                   && MAX_OPERANDS > dupl
8072                   && operand_type_equal (&i.types[dest], &regxmm));
8073
8074       if (i.tm.operand_types[0].bitfield.instance == Accum
8075           && i.tm.operand_types[0].bitfield.xmmword)
8076         {
8077           /* Keep xmm0 for instructions with VEX prefix and 3
8078              sources.  */
8079           i.tm.operand_types[0].bitfield.instance = InstanceNone;
8080           i.tm.operand_types[0].bitfield.class = RegSIMD;
8081           i.reg_operands++;
8082           goto duplicate;
8083         }
8084
8085       if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_1ST_XMM0)
8086         {
8087           gas_assert ((MAX_OPERANDS - 1) > dupl);
8088
8089           /* Add the implicit xmm0 for instructions with VEX prefix
8090              and 3 sources.  */
8091           for (j = i.operands; j > 0; j--)
8092             {
8093               i.op[j] = i.op[j - 1];
8094               i.types[j] = i.types[j - 1];
8095               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
8096               i.flags[j] = i.flags[j - 1];
8097             }
8098           i.op[0].regs
8099             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
8100           i.types[0] = regxmm;
8101           i.tm.operand_types[0] = regxmm;
8102
8103           i.operands += 2;
8104           i.reg_operands += 2;
8105           i.tm.operands += 2;
8106
8107           dupl++;
8108           dest++;
8109           i.op[dupl] = i.op[dest];
8110           i.types[dupl] = i.types[dest];
8111           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
8112           i.flags[dupl] = i.flags[dest];
8113         }
8114       else
8115         {
8116         duplicate:
8117           i.operands++;
8118           i.reg_operands++;
8119           i.tm.operands++;
8120
8121           i.op[dupl] = i.op[dest];
8122           i.types[dupl] = i.types[dest];
8123           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
8124           i.flags[dupl] = i.flags[dest];
8125         }
8126
8127        if (i.tm.opcode_modifier.immext)
8128          process_immext ();
8129     }
8130   else if (i.tm.operand_types[0].bitfield.instance == Accum
8131            && i.tm.opcode_modifier.modrm)
8132     {
8133       unsigned int j;
8134
8135       for (j = 1; j < i.operands; j++)
8136         {
8137           i.op[j - 1] = i.op[j];
8138           i.types[j - 1] = i.types[j];
8139
8140           /* We need to adjust fields in i.tm since they are used by
8141              build_modrm_byte.  */
8142           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
8143
8144           i.flags[j - 1] = i.flags[j];
8145         }
8146
8147       /* No adjustment to i.reg_operands: This was already done at the top
8148          of the function.  */
8149       i.operands--;
8150       i.tm.operands--;
8151     }
8152   else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_QUAD_GROUP)
8153     {
8154       unsigned int regnum, first_reg_in_group, last_reg_in_group;
8155
8156       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
8157       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
8158       regnum = register_number (i.op[1].regs);
8159       first_reg_in_group = regnum & ~3;
8160       last_reg_in_group = first_reg_in_group + 3;
8161       if (regnum != first_reg_in_group)
8162         as_warn (_("source register `%s%s' implicitly denotes"
8163                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
8164                  register_prefix, i.op[1].regs->reg_name,
8165                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
8166                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
8167                  insn_name (&i.tm));
8168     }
8169   else if (i.tm.opcode_modifier.operandconstraint == REG_KLUDGE)
8170     {
8171       /* The imul $imm, %reg instruction is converted into
8172          imul $imm, %reg, %reg, and the clr %reg instruction
8173          is converted into xor %reg, %reg.  */
8174
8175       unsigned int first_reg_op;
8176
8177       if (operand_type_check (i.types[0], reg))
8178         first_reg_op = 0;
8179       else
8180         first_reg_op = 1;
8181       /* Pretend we saw the extra register operand.  */
8182       gas_assert (i.reg_operands == 1
8183                   && i.op[first_reg_op + 1].regs == 0);
8184       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
8185       i.types[first_reg_op + 1] = i.types[first_reg_op];
8186       i.operands++;
8187       i.reg_operands++;
8188     }
8189
8190   if (i.tm.opcode_modifier.modrm)
8191     {
8192       /* The opcode is completed (modulo i.tm.extension_opcode which
8193          must be put into the modrm byte).  Now, we make the modrm and
8194          index base bytes based on all the info we've collected.  */
8195
8196       default_seg = build_modrm_byte ();
8197
8198       if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
8199         {
8200           /* Warn about some common errors, but press on regardless.  */
8201           if (i.operands == 2)
8202             {
8203               /* Reversed arguments on faddp or fmulp.  */
8204               as_warn (_("translating to `%s %s%s,%s%s'"), insn_name (&i.tm),
8205                        register_prefix, i.op[!intel_syntax].regs->reg_name,
8206                        register_prefix, i.op[intel_syntax].regs->reg_name);
8207             }
8208           else if (i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
8209             {
8210               /* Extraneous `l' suffix on fp insn.  */
8211               as_warn (_("translating to `%s %s%s'"), insn_name (&i.tm),
8212                        register_prefix, i.op[0].regs->reg_name);
8213             }
8214         }
8215     }
8216   else if (i.types[0].bitfield.class == SReg)
8217     {
8218       if (flag_code != CODE_64BIT
8219           ? i.tm.base_opcode == POP_SEG_SHORT
8220             && i.op[0].regs->reg_num == 1
8221           : (i.tm.base_opcode | 1) == (POP_SEG386_SHORT & 0xff)
8222             && i.op[0].regs->reg_num < 4)
8223         {
8224           as_bad (_("you can't `%s %s%s'"),
8225                   insn_name (&i.tm), register_prefix, i.op[0].regs->reg_name);
8226           return 0;
8227         }
8228       if (i.op[0].regs->reg_num > 3
8229           && i.tm.opcode_space == SPACE_BASE )
8230         {
8231           i.tm.base_opcode ^= (POP_SEG_SHORT ^ POP_SEG386_SHORT) & 0xff;
8232           i.tm.opcode_space = SPACE_0F;
8233         }
8234       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
8235     }
8236   else if (i.tm.opcode_space == SPACE_BASE
8237            && (i.tm.base_opcode & ~3) == MOV_AX_DISP32)
8238     {
8239       default_seg = reg_ds;
8240     }
8241   else if (i.tm.opcode_modifier.isstring)
8242     {
8243       /* For the string instructions that allow a segment override
8244          on one of their operands, the default segment is ds.  */
8245       default_seg = reg_ds;
8246     }
8247   else if (i.short_form)
8248     {
8249       /* The register operand is in operand 0 or 1.  */
8250       const reg_entry *r = i.op[0].regs;
8251
8252       if (i.imm_operands
8253           || (r->reg_type.bitfield.instance == Accum && i.op[1].regs))
8254         r = i.op[1].regs;
8255       /* Register goes in low 3 bits of opcode.  */
8256       i.tm.base_opcode |= r->reg_num;
8257       set_rex_vrex (r, REX_B, false);
8258     }
8259
8260   if ((i.seg[0] || i.prefix[SEG_PREFIX])
8261       && i.tm.mnem_off == MN_lea)
8262     {
8263       if (!quiet_warnings)
8264         as_warn (_("segment override on `%s' is ineffectual"), insn_name (&i.tm));
8265       if (optimize && !i.no_optimize)
8266         {
8267           i.seg[0] = NULL;
8268           i.prefix[SEG_PREFIX] = 0;
8269         }
8270     }
8271
8272   /* If a segment was explicitly specified, and the specified segment
8273      is neither the default nor the one already recorded from a prefix,
8274      use an opcode prefix to select it.  If we never figured out what
8275      the default segment is, then default_seg will be zero at this
8276      point, and the specified segment prefix will always be used.  */
8277   if (i.seg[0]
8278       && i.seg[0] != default_seg
8279       && i386_seg_prefixes[i.seg[0]->reg_num] != i.prefix[SEG_PREFIX])
8280     {
8281       if (!add_prefix (i386_seg_prefixes[i.seg[0]->reg_num]))
8282         return 0;
8283     }
8284   return 1;
8285 }
8286
8287 static const reg_entry *
8288 build_modrm_byte (void)
8289 {
8290   const reg_entry *default_seg = NULL;
8291   unsigned int source = i.imm_operands - i.tm.opcode_modifier.immext
8292                         /* Compensate for kludge in md_assemble().  */
8293                         + i.tm.operand_types[0].bitfield.imm1;
8294   unsigned int dest = i.operands - 1 - i.tm.opcode_modifier.immext;
8295   unsigned int v, op, reg_slot = ~0;
8296
8297   /* Accumulator (in particular %st), shift count (%cl), and alike need
8298      to be skipped just like immediate operands do.  */
8299   if (i.tm.operand_types[source].bitfield.instance)
8300     ++source;
8301   while (i.tm.operand_types[dest].bitfield.instance)
8302     --dest;
8303
8304   for (op = source; op < i.operands; ++op)
8305     if (i.tm.operand_types[op].bitfield.baseindex)
8306       break;
8307
8308   if (i.reg_operands + i.mem_operands + (i.tm.extension_opcode != None) == 4)
8309     {
8310       expressionS *exp;
8311
8312       /* There are 2 kinds of instructions:
8313          1. 5 operands: 4 register operands or 3 register operands
8314          plus 1 memory operand plus one Imm4 operand, VexXDS, and
8315          VexW0 or VexW1.  The destination must be either XMM, YMM or
8316          ZMM register.
8317          2. 4 operands: 4 register operands or 3 register operands
8318          plus 1 memory operand, with VexXDS.  */
8319       gas_assert (i.tm.opcode_modifier.vexvvvv
8320                   && i.tm.opcode_modifier.vexw
8321                   && i.tm.operand_types[dest].bitfield.class == RegSIMD);
8322
8323       /* Of the first two non-immediate operands the one with the template
8324          not allowing for a memory one is encoded in the immediate operand.  */
8325       if (source == op)
8326         reg_slot = source + 1;
8327       else
8328         reg_slot = source++;
8329
8330       if (i.imm_operands == 0)
8331         {
8332           /* When there is no immediate operand, generate an 8bit
8333              immediate operand to encode the first operand.  */
8334           exp = &im_expressions[i.imm_operands++];
8335           i.op[i.operands].imms = exp;
8336           i.types[i.operands].bitfield.imm8 = 1;
8337           i.operands++;
8338
8339           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8340           exp->X_op = O_constant;
8341           exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
8342           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8343         }
8344       else
8345         {
8346           gas_assert (i.imm_operands == 1);
8347           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
8348           gas_assert (!i.tm.opcode_modifier.immext);
8349
8350           /* Turn on Imm8 again so that output_imm will generate it.  */
8351           i.types[0].bitfield.imm8 = 1;
8352
8353           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8354           i.op[0].imms->X_add_number
8355               |= register_number (i.op[reg_slot].regs) << 4;
8356           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8357         }
8358     }
8359
8360   for (v = source + 1; v < dest; ++v)
8361     if (v != reg_slot)
8362       break;
8363   if (v >= dest)
8364     v = ~0;
8365   if (i.tm.extension_opcode != None)
8366     {
8367       if (dest != source)
8368         v = dest;
8369       dest = ~0;
8370     }
8371   gas_assert (source < dest);
8372   if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES
8373       && source != op)
8374     {
8375       unsigned int tmp = source;
8376
8377       source = v;
8378       v = tmp;
8379     }
8380
8381   if (v < MAX_OPERANDS)
8382     {
8383       gas_assert (i.tm.opcode_modifier.vexvvvv);
8384       i.vex.register_specifier = i.op[v].regs;
8385     }
8386
8387   if (op < i.operands)
8388     {
8389       if (i.mem_operands)
8390         {
8391           unsigned int fake_zero_displacement = 0;
8392
8393           gas_assert (i.flags[op] & Operand_Mem);
8394
8395           if (i.tm.opcode_modifier.sib)
8396             {
8397               /* The index register of VSIB shouldn't be RegIZ.  */
8398               if (i.tm.opcode_modifier.sib != SIBMEM
8399                   && i.index_reg->reg_num == RegIZ)
8400                 abort ();
8401
8402               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8403               if (!i.base_reg)
8404                 {
8405                   i.sib.base = NO_BASE_REGISTER;
8406                   i.sib.scale = i.log2_scale_factor;
8407                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8408                   i.types[op].bitfield.disp32 = 1;
8409                 }
8410
8411               /* Since the mandatory SIB always has index register, so
8412                  the code logic remains unchanged. The non-mandatory SIB
8413                  without index register is allowed and will be handled
8414                  later.  */
8415               if (i.index_reg)
8416                 {
8417                   if (i.index_reg->reg_num == RegIZ)
8418                     i.sib.index = NO_INDEX_REGISTER;
8419                   else
8420                     i.sib.index = i.index_reg->reg_num;
8421                   set_rex_vrex (i.index_reg, REX_X, false);
8422                 }
8423             }
8424
8425           default_seg = reg_ds;
8426
8427           if (i.base_reg == 0)
8428             {
8429               i.rm.mode = 0;
8430               if (!i.disp_operands)
8431                 fake_zero_displacement = 1;
8432               if (i.index_reg == 0)
8433                 {
8434                   /* Both check for VSIB and mandatory non-vector SIB. */
8435                   gas_assert (!i.tm.opcode_modifier.sib
8436                               || i.tm.opcode_modifier.sib == SIBMEM);
8437                   /* Operand is just <disp>  */
8438                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8439                   if (flag_code == CODE_64BIT)
8440                     {
8441                       /* 64bit mode overwrites the 32bit absolute
8442                          addressing by RIP relative addressing and
8443                          absolute addressing is encoded by one of the
8444                          redundant SIB forms.  */
8445                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8446                       i.sib.base = NO_BASE_REGISTER;
8447                       i.sib.index = NO_INDEX_REGISTER;
8448                       i.types[op].bitfield.disp32 = 1;
8449                     }
8450                   else if ((flag_code == CODE_16BIT)
8451                            ^ (i.prefix[ADDR_PREFIX] != 0))
8452                     {
8453                       i.rm.regmem = NO_BASE_REGISTER_16;
8454                       i.types[op].bitfield.disp16 = 1;
8455                     }
8456                   else
8457                     {
8458                       i.rm.regmem = NO_BASE_REGISTER;
8459                       i.types[op].bitfield.disp32 = 1;
8460                     }
8461                 }
8462               else if (!i.tm.opcode_modifier.sib)
8463                 {
8464                   /* !i.base_reg && i.index_reg  */
8465                   if (i.index_reg->reg_num == RegIZ)
8466                     i.sib.index = NO_INDEX_REGISTER;
8467                   else
8468                     i.sib.index = i.index_reg->reg_num;
8469                   i.sib.base = NO_BASE_REGISTER;
8470                   i.sib.scale = i.log2_scale_factor;
8471                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8472                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8473                   i.types[op].bitfield.disp32 = 1;
8474                   if ((i.index_reg->reg_flags & RegRex) != 0)
8475                     i.rex |= REX_X;
8476                 }
8477             }
8478           /* RIP addressing for 64bit mode.  */
8479           else if (i.base_reg->reg_num == RegIP)
8480             {
8481               gas_assert (!i.tm.opcode_modifier.sib);
8482               i.rm.regmem = NO_BASE_REGISTER;
8483               i.types[op].bitfield.disp8 = 0;
8484               i.types[op].bitfield.disp16 = 0;
8485               i.types[op].bitfield.disp32 = 1;
8486               i.types[op].bitfield.disp64 = 0;
8487               i.flags[op] |= Operand_PCrel;
8488               if (! i.disp_operands)
8489                 fake_zero_displacement = 1;
8490             }
8491           else if (i.base_reg->reg_type.bitfield.word)
8492             {
8493               gas_assert (!i.tm.opcode_modifier.sib);
8494               switch (i.base_reg->reg_num)
8495                 {
8496                 case 3: /* (%bx)  */
8497                   if (i.index_reg == 0)
8498                     i.rm.regmem = 7;
8499                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
8500                     i.rm.regmem = i.index_reg->reg_num - 6;
8501                   break;
8502                 case 5: /* (%bp)  */
8503                   default_seg = reg_ss;
8504                   if (i.index_reg == 0)
8505                     {
8506                       i.rm.regmem = 6;
8507                       if (operand_type_check (i.types[op], disp) == 0)
8508                         {
8509                           /* fake (%bp) into 0(%bp)  */
8510                           if (i.disp_encoding == disp_encoding_16bit)
8511                             i.types[op].bitfield.disp16 = 1;
8512                           else
8513                             i.types[op].bitfield.disp8 = 1;
8514                           fake_zero_displacement = 1;
8515                         }
8516                     }
8517                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
8518                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
8519                   break;
8520                 default: /* (%si) -> 4 or (%di) -> 5  */
8521                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
8522                 }
8523               if (!fake_zero_displacement
8524                   && !i.disp_operands
8525                   && i.disp_encoding)
8526                 {
8527                   fake_zero_displacement = 1;
8528                   if (i.disp_encoding == disp_encoding_8bit)
8529                     i.types[op].bitfield.disp8 = 1;
8530                   else
8531                     i.types[op].bitfield.disp16 = 1;
8532                 }
8533               i.rm.mode = mode_from_disp_size (i.types[op]);
8534             }
8535           else /* i.base_reg and 32/64 bit mode  */
8536             {
8537               if (operand_type_check (i.types[op], disp))
8538                 {
8539                   i.types[op].bitfield.disp16 = 0;
8540                   i.types[op].bitfield.disp64 = 0;
8541                   i.types[op].bitfield.disp32 = 1;
8542                 }
8543
8544               if (!i.tm.opcode_modifier.sib)
8545                 i.rm.regmem = i.base_reg->reg_num;
8546               if ((i.base_reg->reg_flags & RegRex) != 0)
8547                 i.rex |= REX_B;
8548               i.sib.base = i.base_reg->reg_num;
8549               /* x86-64 ignores REX prefix bit here to avoid decoder
8550                  complications.  */
8551               if (!(i.base_reg->reg_flags & RegRex)
8552                   && (i.base_reg->reg_num == EBP_REG_NUM
8553                    || i.base_reg->reg_num == ESP_REG_NUM))
8554                   default_seg = reg_ss;
8555               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
8556                 {
8557                   fake_zero_displacement = 1;
8558                   if (i.disp_encoding == disp_encoding_32bit)
8559                     i.types[op].bitfield.disp32 = 1;
8560                   else
8561                     i.types[op].bitfield.disp8 = 1;
8562                 }
8563               i.sib.scale = i.log2_scale_factor;
8564               if (i.index_reg == 0)
8565                 {
8566                   /* Only check for VSIB. */
8567                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
8568                               && i.tm.opcode_modifier.sib != VECSIB256
8569                               && i.tm.opcode_modifier.sib != VECSIB512);
8570
8571                   /* <disp>(%esp) becomes two byte modrm with no index
8572                      register.  We've already stored the code for esp
8573                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
8574                      Any base register besides %esp will not use the
8575                      extra modrm byte.  */
8576                   i.sib.index = NO_INDEX_REGISTER;
8577                 }
8578               else if (!i.tm.opcode_modifier.sib)
8579                 {
8580                   if (i.index_reg->reg_num == RegIZ)
8581                     i.sib.index = NO_INDEX_REGISTER;
8582                   else
8583                     i.sib.index = i.index_reg->reg_num;
8584                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8585                   if ((i.index_reg->reg_flags & RegRex) != 0)
8586                     i.rex |= REX_X;
8587                 }
8588
8589               if (i.disp_operands
8590                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
8591                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
8592                 i.rm.mode = 0;
8593               else
8594                 {
8595                   if (!fake_zero_displacement
8596                       && !i.disp_operands
8597                       && i.disp_encoding)
8598                     {
8599                       fake_zero_displacement = 1;
8600                       if (i.disp_encoding == disp_encoding_8bit)
8601                         i.types[op].bitfield.disp8 = 1;
8602                       else
8603                         i.types[op].bitfield.disp32 = 1;
8604                     }
8605                   i.rm.mode = mode_from_disp_size (i.types[op]);
8606                 }
8607             }
8608
8609           if (fake_zero_displacement)
8610             {
8611               /* Fakes a zero displacement assuming that i.types[op]
8612                  holds the correct displacement size.  */
8613               expressionS *exp;
8614
8615               gas_assert (i.op[op].disps == 0);
8616               exp = &disp_expressions[i.disp_operands++];
8617               i.op[op].disps = exp;
8618               exp->X_op = O_constant;
8619               exp->X_add_number = 0;
8620               exp->X_add_symbol = (symbolS *) 0;
8621               exp->X_op_symbol = (symbolS *) 0;
8622             }
8623         }
8624     else
8625         {
8626       i.rm.mode = 3;
8627       i.rm.regmem = i.op[op].regs->reg_num;
8628       set_rex_vrex (i.op[op].regs, REX_B, false);
8629         }
8630
8631       if (op == dest)
8632         dest = ~0;
8633       if (op == source)
8634         source = ~0;
8635     }
8636   else
8637     {
8638       i.rm.mode = 3;
8639       if (!i.tm.opcode_modifier.regmem)
8640         {
8641           gas_assert (source < MAX_OPERANDS);
8642           i.rm.regmem = i.op[source].regs->reg_num;
8643           set_rex_vrex (i.op[source].regs, REX_B,
8644                         dest >= MAX_OPERANDS && i.tm.opcode_modifier.sse2avx);
8645           source = ~0;
8646         }
8647       else
8648         {
8649           gas_assert (dest < MAX_OPERANDS);
8650           i.rm.regmem = i.op[dest].regs->reg_num;
8651           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
8652           dest = ~0;
8653         }
8654     }
8655
8656   /* Fill in i.rm.reg field with extension opcode (if any) or the
8657      appropriate register.  */
8658   if (i.tm.extension_opcode != None)
8659     i.rm.reg = i.tm.extension_opcode;
8660   else if (!i.tm.opcode_modifier.regmem && dest < MAX_OPERANDS)
8661     {
8662       i.rm.reg = i.op[dest].regs->reg_num;
8663       set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
8664     }
8665   else
8666     {
8667       gas_assert (source < MAX_OPERANDS);
8668       i.rm.reg = i.op[source].regs->reg_num;
8669       set_rex_vrex (i.op[source].regs, REX_R, false);
8670     }
8671
8672   if (flag_code != CODE_64BIT && (i.rex & REX_R))
8673     {
8674       gas_assert (i.types[!i.tm.opcode_modifier.regmem].bitfield.class == RegCR);
8675       i.rex &= ~REX_R;
8676       add_prefix (LOCK_PREFIX_OPCODE);
8677     }
8678
8679   return default_seg;
8680 }
8681
8682 static INLINE void
8683 frag_opcode_byte (unsigned char byte)
8684 {
8685   if (now_seg != absolute_section)
8686     FRAG_APPEND_1_CHAR (byte);
8687   else
8688     ++abs_section_offset;
8689 }
8690
8691 static unsigned int
8692 flip_code16 (unsigned int code16)
8693 {
8694   gas_assert (i.tm.operands == 1);
8695
8696   return !(i.prefix[REX_PREFIX] & REX_W)
8697          && (code16 ? i.tm.operand_types[0].bitfield.disp32
8698                     : i.tm.operand_types[0].bitfield.disp16)
8699          ? CODE16 : 0;
8700 }
8701
8702 static void
8703 output_branch (void)
8704 {
8705   char *p;
8706   int size;
8707   int code16;
8708   int prefix;
8709   relax_substateT subtype;
8710   symbolS *sym;
8711   offsetT off;
8712
8713   if (now_seg == absolute_section)
8714     {
8715       as_bad (_("relaxable branches not supported in absolute section"));
8716       return;
8717     }
8718
8719   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
8720   size = i.disp_encoding > disp_encoding_8bit ? BIG : SMALL;
8721
8722   prefix = 0;
8723   if (i.prefix[DATA_PREFIX] != 0)
8724     {
8725       prefix = 1;
8726       i.prefixes -= 1;
8727       code16 ^= flip_code16(code16);
8728     }
8729   /* Pentium4 branch hints.  */
8730   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8731       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8732     {
8733       prefix++;
8734       i.prefixes--;
8735     }
8736   if (i.prefix[REX_PREFIX] != 0)
8737     {
8738       prefix++;
8739       i.prefixes--;
8740     }
8741
8742   /* BND prefixed jump.  */
8743   if (i.prefix[BND_PREFIX] != 0)
8744     {
8745       prefix++;
8746       i.prefixes--;
8747     }
8748
8749   if (i.prefixes != 0)
8750     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
8751
8752   /* It's always a symbol;  End frag & setup for relax.
8753      Make sure there is enough room in this frag for the largest
8754      instruction we may generate in md_convert_frag.  This is 2
8755      bytes for the opcode and room for the prefix and largest
8756      displacement.  */
8757   frag_grow (prefix + 2 + 4);
8758   /* Prefix and 1 opcode byte go in fr_fix.  */
8759   p = frag_more (prefix + 1);
8760   if (i.prefix[DATA_PREFIX] != 0)
8761     *p++ = DATA_PREFIX_OPCODE;
8762   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
8763       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
8764     *p++ = i.prefix[SEG_PREFIX];
8765   if (i.prefix[BND_PREFIX] != 0)
8766     *p++ = BND_PREFIX_OPCODE;
8767   if (i.prefix[REX_PREFIX] != 0)
8768     *p++ = i.prefix[REX_PREFIX];
8769   *p = i.tm.base_opcode;
8770
8771   if ((unsigned char) *p == JUMP_PC_RELATIVE)
8772     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
8773   else if (cpu_arch_flags.bitfield.cpui386)
8774     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
8775   else
8776     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
8777   subtype |= code16;
8778
8779   sym = i.op[0].disps->X_add_symbol;
8780   off = i.op[0].disps->X_add_number;
8781
8782   if (i.op[0].disps->X_op != O_constant
8783       && i.op[0].disps->X_op != O_symbol)
8784     {
8785       /* Handle complex expressions.  */
8786       sym = make_expr_symbol (i.op[0].disps);
8787       off = 0;
8788     }
8789
8790   frag_now->tc_frag_data.code64 = flag_code == CODE_64BIT;
8791
8792   /* 1 possible extra opcode + 4 byte displacement go in var part.
8793      Pass reloc in fr_var.  */
8794   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
8795 }
8796
8797 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8798 /* Return TRUE iff PLT32 relocation should be used for branching to
8799    symbol S.  */
8800
8801 static bool
8802 need_plt32_p (symbolS *s)
8803 {
8804   /* PLT32 relocation is ELF only.  */
8805   if (!IS_ELF)
8806     return false;
8807
8808 #ifdef TE_SOLARIS
8809   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
8810      krtld support it.  */
8811   return false;
8812 #endif
8813
8814   /* Since there is no need to prepare for PLT branch on x86-64, we
8815      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
8816      be used as a marker for 32-bit PC-relative branches.  */
8817   if (!object_64bit)
8818     return false;
8819
8820   if (s == NULL)
8821     return false;
8822
8823   /* Weak or undefined symbol need PLT32 relocation.  */
8824   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
8825     return true;
8826
8827   /* Non-global symbol doesn't need PLT32 relocation.  */
8828   if (! S_IS_EXTERNAL (s))
8829     return false;
8830
8831   /* Other global symbols need PLT32 relocation.  NB: Symbol with
8832      non-default visibilities are treated as normal global symbol
8833      so that PLT32 relocation can be used as a marker for 32-bit
8834      PC-relative branches.  It is useful for linker relaxation.  */
8835   return true;
8836 }
8837 #endif
8838
8839 static void
8840 output_jump (void)
8841 {
8842   char *p;
8843   int size;
8844   fixS *fixP;
8845   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
8846
8847   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
8848     {
8849       /* This is a loop or jecxz type instruction.  */
8850       size = 1;
8851       if (i.prefix[ADDR_PREFIX] != 0)
8852         {
8853           frag_opcode_byte (ADDR_PREFIX_OPCODE);
8854           i.prefixes -= 1;
8855         }
8856       /* Pentium4 branch hints.  */
8857       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8858           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8859         {
8860           frag_opcode_byte (i.prefix[SEG_PREFIX]);
8861           i.prefixes--;
8862         }
8863     }
8864   else
8865     {
8866       int code16;
8867
8868       code16 = 0;
8869       if (flag_code == CODE_16BIT)
8870         code16 = CODE16;
8871
8872       if (i.prefix[DATA_PREFIX] != 0)
8873         {
8874           frag_opcode_byte (DATA_PREFIX_OPCODE);
8875           i.prefixes -= 1;
8876           code16 ^= flip_code16(code16);
8877         }
8878
8879       size = 4;
8880       if (code16)
8881         size = 2;
8882     }
8883
8884   /* BND prefixed jump.  */
8885   if (i.prefix[BND_PREFIX] != 0)
8886     {
8887       frag_opcode_byte (i.prefix[BND_PREFIX]);
8888       i.prefixes -= 1;
8889     }
8890
8891   if (i.prefix[REX_PREFIX] != 0)
8892     {
8893       frag_opcode_byte (i.prefix[REX_PREFIX]);
8894       i.prefixes -= 1;
8895     }
8896
8897   if (i.prefixes != 0)
8898     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
8899
8900   if (now_seg == absolute_section)
8901     {
8902       abs_section_offset += i.opcode_length + size;
8903       return;
8904     }
8905
8906   p = frag_more (i.opcode_length + size);
8907   switch (i.opcode_length)
8908     {
8909     case 2:
8910       *p++ = i.tm.base_opcode >> 8;
8911       /* Fall through.  */
8912     case 1:
8913       *p++ = i.tm.base_opcode;
8914       break;
8915     default:
8916       abort ();
8917     }
8918
8919 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8920   if (flag_code == CODE_64BIT && size == 4
8921       && jump_reloc == NO_RELOC && i.op[0].disps->X_add_number == 0
8922       && need_plt32_p (i.op[0].disps->X_add_symbol))
8923     jump_reloc = BFD_RELOC_X86_64_PLT32;
8924 #endif
8925
8926   jump_reloc = reloc (size, 1, 1, jump_reloc);
8927
8928   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
8929                       i.op[0].disps, 1, jump_reloc);
8930
8931   /* All jumps handled here are signed, but don't unconditionally use a
8932      signed limit check for 32 and 16 bit jumps as we want to allow wrap
8933      around at 4G (outside of 64-bit mode) and 64k (except for XBEGIN)
8934      respectively.  */
8935   switch (size)
8936     {
8937     case 1:
8938       fixP->fx_signed = 1;
8939       break;
8940
8941     case 2:
8942       if (i.tm.mnem_off == MN_xbegin)
8943         fixP->fx_signed = 1;
8944       break;
8945
8946     case 4:
8947       if (flag_code == CODE_64BIT)
8948         fixP->fx_signed = 1;
8949       break;
8950     }
8951 }
8952
8953 static void
8954 output_interseg_jump (void)
8955 {
8956   char *p;
8957   int size;
8958   int prefix;
8959   int code16;
8960
8961   code16 = 0;
8962   if (flag_code == CODE_16BIT)
8963     code16 = CODE16;
8964
8965   prefix = 0;
8966   if (i.prefix[DATA_PREFIX] != 0)
8967     {
8968       prefix = 1;
8969       i.prefixes -= 1;
8970       code16 ^= CODE16;
8971     }
8972
8973   gas_assert (!i.prefix[REX_PREFIX]);
8974
8975   size = 4;
8976   if (code16)
8977     size = 2;
8978
8979   if (i.prefixes != 0)
8980     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
8981
8982   if (now_seg == absolute_section)
8983     {
8984       abs_section_offset += prefix + 1 + 2 + size;
8985       return;
8986     }
8987
8988   /* 1 opcode; 2 segment; offset  */
8989   p = frag_more (prefix + 1 + 2 + size);
8990
8991   if (i.prefix[DATA_PREFIX] != 0)
8992     *p++ = DATA_PREFIX_OPCODE;
8993
8994   if (i.prefix[REX_PREFIX] != 0)
8995     *p++ = i.prefix[REX_PREFIX];
8996
8997   *p++ = i.tm.base_opcode;
8998   if (i.op[1].imms->X_op == O_constant)
8999     {
9000       offsetT n = i.op[1].imms->X_add_number;
9001
9002       if (size == 2
9003           && !fits_in_unsigned_word (n)
9004           && !fits_in_signed_word (n))
9005         {
9006           as_bad (_("16-bit jump out of range"));
9007           return;
9008         }
9009       md_number_to_chars (p, n, size);
9010     }
9011   else
9012     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9013                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
9014
9015   p += size;
9016   if (i.op[0].imms->X_op == O_constant)
9017     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
9018   else
9019     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
9020                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
9021 }
9022
9023 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9024 void
9025 x86_cleanup (void)
9026 {
9027   char *p;
9028   asection *seg = now_seg;
9029   subsegT subseg = now_subseg;
9030   asection *sec;
9031   unsigned int alignment, align_size_1;
9032   unsigned int isa_1_descsz, feature_2_descsz, descsz;
9033   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
9034   unsigned int padding;
9035
9036   if (!IS_ELF || !x86_used_note)
9037     return;
9038
9039   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
9040
9041   /* The .note.gnu.property section layout:
9042
9043      Field      Length          Contents
9044      ----       ----            ----
9045      n_namsz    4               4
9046      n_descsz   4               The note descriptor size
9047      n_type     4               NT_GNU_PROPERTY_TYPE_0
9048      n_name     4               "GNU"
9049      n_desc     n_descsz        The program property array
9050      ....       ....            ....
9051    */
9052
9053   /* Create the .note.gnu.property section.  */
9054   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
9055   bfd_set_section_flags (sec,
9056                          (SEC_ALLOC
9057                           | SEC_LOAD
9058                           | SEC_DATA
9059                           | SEC_HAS_CONTENTS
9060                           | SEC_READONLY));
9061
9062   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
9063     {
9064       align_size_1 = 7;
9065       alignment = 3;
9066     }
9067   else
9068     {
9069       align_size_1 = 3;
9070       alignment = 2;
9071     }
9072
9073   bfd_set_section_alignment (sec, alignment);
9074   elf_section_type (sec) = SHT_NOTE;
9075
9076   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
9077                                   + 4-byte data  */
9078   isa_1_descsz_raw = 4 + 4 + 4;
9079   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
9080   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
9081
9082   feature_2_descsz_raw = isa_1_descsz;
9083   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
9084                                       + 4-byte data  */
9085   feature_2_descsz_raw += 4 + 4 + 4;
9086   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
9087   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
9088                       & ~align_size_1);
9089
9090   descsz = feature_2_descsz;
9091   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
9092   p = frag_more (4 + 4 + 4 + 4 + descsz);
9093
9094   /* Write n_namsz.  */
9095   md_number_to_chars (p, (valueT) 4, 4);
9096
9097   /* Write n_descsz.  */
9098   md_number_to_chars (p + 4, (valueT) descsz, 4);
9099
9100   /* Write n_type.  */
9101   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
9102
9103   /* Write n_name.  */
9104   memcpy (p + 4 * 3, "GNU", 4);
9105
9106   /* Write 4-byte type.  */
9107   md_number_to_chars (p + 4 * 4,
9108                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
9109
9110   /* Write 4-byte data size.  */
9111   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
9112
9113   /* Write 4-byte data.  */
9114   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
9115
9116   /* Zero out paddings.  */
9117   padding = isa_1_descsz - isa_1_descsz_raw;
9118   if (padding)
9119     memset (p + 4 * 7, 0, padding);
9120
9121   /* Write 4-byte type.  */
9122   md_number_to_chars (p + isa_1_descsz + 4 * 4,
9123                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
9124
9125   /* Write 4-byte data size.  */
9126   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
9127
9128   /* Write 4-byte data.  */
9129   md_number_to_chars (p + isa_1_descsz + 4 * 6,
9130                       (valueT) x86_feature_2_used, 4);
9131
9132   /* Zero out paddings.  */
9133   padding = feature_2_descsz - feature_2_descsz_raw;
9134   if (padding)
9135     memset (p + isa_1_descsz + 4 * 7, 0, padding);
9136
9137   /* We probably can't restore the current segment, for there likely
9138      isn't one yet...  */
9139   if (seg && subseg)
9140     subseg_set (seg, subseg);
9141 }
9142
9143 bool
9144 x86_support_sframe_p (void)
9145 {
9146   /* At this time, SFrame stack trace is supported for AMD64 ABI only.  */
9147   return (x86_elf_abi == X86_64_ABI);
9148 }
9149
9150 bool
9151 x86_sframe_ra_tracking_p (void)
9152 {
9153   /* In AMD64, return address is always stored on the stack at a fixed offset
9154      from the CFA (provided via x86_sframe_cfa_ra_offset ()).
9155      Do not track explicitly via an SFrame Frame Row Entry.  */
9156   return false;
9157 }
9158
9159 offsetT
9160 x86_sframe_cfa_ra_offset (void)
9161 {
9162   gas_assert (x86_elf_abi == X86_64_ABI);
9163   return (offsetT) -8;
9164 }
9165
9166 unsigned char
9167 x86_sframe_get_abi_arch (void)
9168 {
9169   unsigned char sframe_abi_arch = 0;
9170
9171   if (x86_support_sframe_p ())
9172     {
9173       gas_assert (!target_big_endian);
9174       sframe_abi_arch = SFRAME_ABI_AMD64_ENDIAN_LITTLE;
9175     }
9176
9177   return sframe_abi_arch;
9178 }
9179
9180 #endif
9181
9182 static unsigned int
9183 encoding_length (const fragS *start_frag, offsetT start_off,
9184                  const char *frag_now_ptr)
9185 {
9186   unsigned int len = 0;
9187
9188   if (start_frag != frag_now)
9189     {
9190       const fragS *fr = start_frag;
9191
9192       do {
9193         len += fr->fr_fix;
9194         fr = fr->fr_next;
9195       } while (fr && fr != frag_now);
9196     }
9197
9198   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
9199 }
9200
9201 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
9202    be macro-fused with conditional jumps.
9203    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
9204    or is one of the following format:
9205
9206     cmp m, imm
9207     add m, imm
9208     sub m, imm
9209    test m, imm
9210     and m, imm
9211     inc m
9212     dec m
9213
9214    it is unfusible.  */
9215
9216 static int
9217 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
9218 {
9219   /* No RIP address.  */
9220   if (i.base_reg && i.base_reg->reg_num == RegIP)
9221     return 0;
9222
9223   /* No opcodes outside of base encoding space.  */
9224   if (i.tm.opcode_space != SPACE_BASE)
9225     return 0;
9226
9227   /* add, sub without add/sub m, imm.  */
9228   if (i.tm.base_opcode <= 5
9229       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
9230       || ((i.tm.base_opcode | 3) == 0x83
9231           && (i.tm.extension_opcode == 0x5
9232               || i.tm.extension_opcode == 0x0)))
9233     {
9234       *mf_cmp_p = mf_cmp_alu_cmp;
9235       return !(i.mem_operands && i.imm_operands);
9236     }
9237
9238   /* and without and m, imm.  */
9239   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
9240       || ((i.tm.base_opcode | 3) == 0x83
9241           && i.tm.extension_opcode == 0x4))
9242     {
9243       *mf_cmp_p = mf_cmp_test_and;
9244       return !(i.mem_operands && i.imm_operands);
9245     }
9246
9247   /* test without test m imm.  */
9248   if ((i.tm.base_opcode | 1) == 0x85
9249       || (i.tm.base_opcode | 1) == 0xa9
9250       || ((i.tm.base_opcode | 1) == 0xf7
9251           && i.tm.extension_opcode == 0))
9252     {
9253       *mf_cmp_p = mf_cmp_test_and;
9254       return !(i.mem_operands && i.imm_operands);
9255     }
9256
9257   /* cmp without cmp m, imm.  */
9258   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
9259       || ((i.tm.base_opcode | 3) == 0x83
9260           && (i.tm.extension_opcode == 0x7)))
9261     {
9262       *mf_cmp_p = mf_cmp_alu_cmp;
9263       return !(i.mem_operands && i.imm_operands);
9264     }
9265
9266   /* inc, dec without inc/dec m.   */
9267   if ((i.tm.cpu_flags.bitfield.cpuno64
9268        && (i.tm.base_opcode | 0xf) == 0x4f)
9269       || ((i.tm.base_opcode | 1) == 0xff
9270           && i.tm.extension_opcode <= 0x1))
9271     {
9272       *mf_cmp_p = mf_cmp_incdec;
9273       return !i.mem_operands;
9274     }
9275
9276   return 0;
9277 }
9278
9279 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
9280
9281 static int
9282 add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
9283 {
9284   /* NB: Don't work with COND_JUMP86 without i386.  */
9285   if (!align_branch_power
9286       || now_seg == absolute_section
9287       || !cpu_arch_flags.bitfield.cpui386
9288       || !(align_branch & align_branch_fused_bit))
9289     return 0;
9290
9291   if (maybe_fused_with_jcc_p (mf_cmp_p))
9292     {
9293       if (last_insn.kind == last_insn_other
9294           || last_insn.seg != now_seg)
9295         return 1;
9296       if (flag_debug)
9297         as_warn_where (last_insn.file, last_insn.line,
9298                        _("`%s` skips -malign-branch-boundary on `%s`"),
9299                        last_insn.name, insn_name (&i.tm));
9300     }
9301
9302   return 0;
9303 }
9304
9305 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
9306
9307 static int
9308 add_branch_prefix_frag_p (void)
9309 {
9310   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
9311      to PadLock instructions since they include prefixes in opcode.  */
9312   if (!align_branch_power
9313       || !align_branch_prefix_size
9314       || now_seg == absolute_section
9315       || i.tm.cpu_flags.bitfield.cpupadlock
9316       || !cpu_arch_flags.bitfield.cpui386)
9317     return 0;
9318
9319   /* Don't add prefix if it is a prefix or there is no operand in case
9320      that segment prefix is special.  */
9321   if (!i.operands || i.tm.opcode_modifier.isprefix)
9322     return 0;
9323
9324   if (last_insn.kind == last_insn_other
9325       || last_insn.seg != now_seg)
9326     return 1;
9327
9328   if (flag_debug)
9329     as_warn_where (last_insn.file, last_insn.line,
9330                    _("`%s` skips -malign-branch-boundary on `%s`"),
9331                    last_insn.name, insn_name (&i.tm));
9332
9333   return 0;
9334 }
9335
9336 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
9337
9338 static int
9339 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
9340                            enum mf_jcc_kind *mf_jcc_p)
9341 {
9342   int add_padding;
9343
9344   /* NB: Don't work with COND_JUMP86 without i386.  */
9345   if (!align_branch_power
9346       || now_seg == absolute_section
9347       || !cpu_arch_flags.bitfield.cpui386
9348       || i.tm.opcode_space != SPACE_BASE)
9349     return 0;
9350
9351   add_padding = 0;
9352
9353   /* Check for jcc and direct jmp.  */
9354   if (i.tm.opcode_modifier.jump == JUMP)
9355     {
9356       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
9357         {
9358           *branch_p = align_branch_jmp;
9359           add_padding = align_branch & align_branch_jmp_bit;
9360         }
9361       else
9362         {
9363           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
9364              igore the lowest bit.  */
9365           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
9366           *branch_p = align_branch_jcc;
9367           if ((align_branch & align_branch_jcc_bit))
9368             add_padding = 1;
9369         }
9370     }
9371   else if ((i.tm.base_opcode | 1) == 0xc3)
9372     {
9373       /* Near ret.  */
9374       *branch_p = align_branch_ret;
9375       if ((align_branch & align_branch_ret_bit))
9376         add_padding = 1;
9377     }
9378   else
9379     {
9380       /* Check for indirect jmp, direct and indirect calls.  */
9381       if (i.tm.base_opcode == 0xe8)
9382         {
9383           /* Direct call.  */
9384           *branch_p = align_branch_call;
9385           if ((align_branch & align_branch_call_bit))
9386             add_padding = 1;
9387         }
9388       else if (i.tm.base_opcode == 0xff
9389                && (i.tm.extension_opcode == 2
9390                    || i.tm.extension_opcode == 4))
9391         {
9392           /* Indirect call and jmp.  */
9393           *branch_p = align_branch_indirect;
9394           if ((align_branch & align_branch_indirect_bit))
9395             add_padding = 1;
9396         }
9397
9398       if (add_padding
9399           && i.disp_operands
9400           && tls_get_addr
9401           && (i.op[0].disps->X_op == O_symbol
9402               || (i.op[0].disps->X_op == O_subtract
9403                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
9404         {
9405           symbolS *s = i.op[0].disps->X_add_symbol;
9406           /* No padding to call to global or undefined tls_get_addr.  */
9407           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
9408               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
9409             return 0;
9410         }
9411     }
9412
9413   if (add_padding
9414       && last_insn.kind != last_insn_other
9415       && last_insn.seg == now_seg)
9416     {
9417       if (flag_debug)
9418         as_warn_where (last_insn.file, last_insn.line,
9419                        _("`%s` skips -malign-branch-boundary on `%s`"),
9420                        last_insn.name, insn_name (&i.tm));
9421       return 0;
9422     }
9423
9424   return add_padding;
9425 }
9426
9427 static void
9428 output_insn (void)
9429 {
9430   fragS *insn_start_frag;
9431   offsetT insn_start_off;
9432   fragS *fragP = NULL;
9433   enum align_branch_kind branch = align_branch_none;
9434   /* The initializer is arbitrary just to avoid uninitialized error.
9435      it's actually either assigned in add_branch_padding_frag_p
9436      or never be used.  */
9437   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
9438
9439 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9440   if (IS_ELF && x86_used_note && now_seg != absolute_section)
9441     {
9442       if ((i.xstate & xstate_tmm) == xstate_tmm
9443           || i.tm.cpu_flags.bitfield.cpuamx_tile)
9444         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
9445
9446       if (i.tm.cpu_flags.bitfield.cpu8087
9447           || i.tm.cpu_flags.bitfield.cpu287
9448           || i.tm.cpu_flags.bitfield.cpu387
9449           || i.tm.cpu_flags.bitfield.cpu687
9450           || i.tm.cpu_flags.bitfield.cpufisttp)
9451         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
9452
9453       if ((i.xstate & xstate_mmx)
9454           || i.tm.mnem_off == MN_emms
9455           || i.tm.mnem_off == MN_femms)
9456         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
9457
9458       if (i.index_reg)
9459         {
9460           if (i.index_reg->reg_type.bitfield.zmmword)
9461             i.xstate |= xstate_zmm;
9462           else if (i.index_reg->reg_type.bitfield.ymmword)
9463             i.xstate |= xstate_ymm;
9464           else if (i.index_reg->reg_type.bitfield.xmmword)
9465             i.xstate |= xstate_xmm;
9466         }
9467
9468       /* vzeroall / vzeroupper */
9469       if (i.tm.base_opcode == 0x77 && i.tm.cpu_flags.bitfield.cpuavx)
9470         i.xstate |= xstate_ymm;
9471
9472       if ((i.xstate & xstate_xmm)
9473           /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
9474           || (i.tm.base_opcode == 0xae
9475               && (i.tm.cpu_flags.bitfield.cpusse
9476                   || i.tm.cpu_flags.bitfield.cpuavx))
9477           || i.tm.cpu_flags.bitfield.cpuwidekl
9478           || i.tm.cpu_flags.bitfield.cpukl)
9479         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
9480
9481       if ((i.xstate & xstate_ymm) == xstate_ymm)
9482         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
9483       if ((i.xstate & xstate_zmm) == xstate_zmm)
9484         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
9485       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
9486         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
9487       if (i.tm.cpu_flags.bitfield.cpufxsr)
9488         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
9489       if (i.tm.cpu_flags.bitfield.cpuxsave)
9490         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
9491       if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
9492         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
9493       if (i.tm.cpu_flags.bitfield.cpuxsavec)
9494         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
9495
9496       if (x86_feature_2_used
9497           || i.tm.cpu_flags.bitfield.cpucmov
9498           || i.tm.cpu_flags.bitfield.cpusyscall
9499           || i.tm.mnem_off == MN_cmpxchg8b)
9500         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
9501       if (i.tm.cpu_flags.bitfield.cpusse3
9502           || i.tm.cpu_flags.bitfield.cpussse3
9503           || i.tm.cpu_flags.bitfield.cpusse4_1
9504           || i.tm.cpu_flags.bitfield.cpusse4_2
9505           || i.tm.cpu_flags.bitfield.cpucx16
9506           || i.tm.cpu_flags.bitfield.cpupopcnt
9507           /* LAHF-SAHF insns in 64-bit mode.  */
9508           || (flag_code == CODE_64BIT
9509               && (i.tm.base_opcode | 1) == 0x9f
9510               && i.tm.opcode_space == SPACE_BASE))
9511         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
9512       if (i.tm.cpu_flags.bitfield.cpuavx
9513           || i.tm.cpu_flags.bitfield.cpuavx2
9514           /* Any VEX encoded insns execpt for AVX512F, AVX512BW, AVX512DQ,
9515              XOP, FMA4, LPW, TBM, and AMX.  */
9516           || (i.tm.opcode_modifier.vex
9517               && !i.tm.cpu_flags.bitfield.cpuavx512f
9518               && !i.tm.cpu_flags.bitfield.cpuavx512bw
9519               && !i.tm.cpu_flags.bitfield.cpuavx512dq
9520               && !i.tm.cpu_flags.bitfield.cpuxop
9521               && !i.tm.cpu_flags.bitfield.cpufma4
9522               && !i.tm.cpu_flags.bitfield.cpulwp
9523               && !i.tm.cpu_flags.bitfield.cputbm
9524               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
9525           || i.tm.cpu_flags.bitfield.cpuf16c
9526           || i.tm.cpu_flags.bitfield.cpufma
9527           || i.tm.cpu_flags.bitfield.cpulzcnt
9528           || i.tm.cpu_flags.bitfield.cpumovbe
9529           || i.tm.cpu_flags.bitfield.cpuxsaves
9530           || (x86_feature_2_used
9531               & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
9532                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
9533                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
9534         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
9535       if (i.tm.cpu_flags.bitfield.cpuavx512f
9536           || i.tm.cpu_flags.bitfield.cpuavx512bw
9537           || i.tm.cpu_flags.bitfield.cpuavx512dq
9538           || i.tm.cpu_flags.bitfield.cpuavx512vl
9539           /* Any EVEX encoded insns except for AVX512ER, AVX512PF,
9540              AVX512-4FMAPS, and AVX512-4VNNIW.  */
9541           || (i.tm.opcode_modifier.evex
9542               && !i.tm.cpu_flags.bitfield.cpuavx512er
9543               && !i.tm.cpu_flags.bitfield.cpuavx512pf
9544               && !i.tm.cpu_flags.bitfield.cpuavx512_4fmaps
9545               && !i.tm.cpu_flags.bitfield.cpuavx512_4vnniw))
9546         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
9547     }
9548 #endif
9549
9550   /* Tie dwarf2 debug info to the address at the start of the insn.
9551      We can't do this after the insn has been output as the current
9552      frag may have been closed off.  eg. by frag_var.  */
9553   dwarf2_emit_insn (0);
9554
9555   insn_start_frag = frag_now;
9556   insn_start_off = frag_now_fix ();
9557
9558   if (add_branch_padding_frag_p (&branch, &mf_jcc))
9559     {
9560       char *p;
9561       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
9562       unsigned int max_branch_padding_size = 14;
9563
9564       /* Align section to boundary.  */
9565       record_alignment (now_seg, align_branch_power);
9566
9567       /* Make room for padding.  */
9568       frag_grow (max_branch_padding_size);
9569
9570       /* Start of the padding.  */
9571       p = frag_more (0);
9572
9573       fragP = frag_now;
9574
9575       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
9576                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
9577                 NULL, 0, p);
9578
9579       fragP->tc_frag_data.mf_type = mf_jcc;
9580       fragP->tc_frag_data.branch_type = branch;
9581       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
9582     }
9583
9584   if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT)
9585       && !pre_386_16bit_warned)
9586     {
9587       as_warn (_("use .code16 to ensure correct addressing mode"));
9588       pre_386_16bit_warned = true;
9589     }
9590
9591   /* Output jumps.  */
9592   if (i.tm.opcode_modifier.jump == JUMP)
9593     output_branch ();
9594   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
9595            || i.tm.opcode_modifier.jump == JUMP_DWORD)
9596     output_jump ();
9597   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
9598     output_interseg_jump ();
9599   else
9600     {
9601       /* Output normal instructions here.  */
9602       char *p;
9603       unsigned char *q;
9604       unsigned int j;
9605       enum mf_cmp_kind mf_cmp;
9606
9607       if (avoid_fence
9608           && (i.tm.base_opcode == 0xaee8
9609               || i.tm.base_opcode == 0xaef0
9610               || i.tm.base_opcode == 0xaef8))
9611         {
9612           /* Encode lfence, mfence, and sfence as
9613              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
9614           if (flag_code == CODE_16BIT)
9615             as_bad (_("Cannot convert `%s' in 16-bit mode"), insn_name (&i.tm));
9616           else if (omit_lock_prefix)
9617             as_bad (_("Cannot convert `%s' with `-momit-lock-prefix=yes' in effect"),
9618                     insn_name (&i.tm));
9619           else if (now_seg != absolute_section)
9620             {
9621               offsetT val = 0x240483f0ULL;
9622
9623               p = frag_more (5);
9624               md_number_to_chars (p, val, 5);
9625             }
9626           else
9627             abs_section_offset += 5;
9628           return;
9629         }
9630
9631       /* Some processors fail on LOCK prefix. This options makes
9632          assembler ignore LOCK prefix and serves as a workaround.  */
9633       if (omit_lock_prefix)
9634         {
9635           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
9636               && i.tm.opcode_modifier.isprefix)
9637             return;
9638           i.prefix[LOCK_PREFIX] = 0;
9639         }
9640
9641       if (branch)
9642         /* Skip if this is a branch.  */
9643         ;
9644       else if (add_fused_jcc_padding_frag_p (&mf_cmp))
9645         {
9646           /* Make room for padding.  */
9647           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
9648           p = frag_more (0);
9649
9650           fragP = frag_now;
9651
9652           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
9653                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
9654                     NULL, 0, p);
9655
9656           fragP->tc_frag_data.mf_type = mf_cmp;
9657           fragP->tc_frag_data.branch_type = align_branch_fused;
9658           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
9659         }
9660       else if (add_branch_prefix_frag_p ())
9661         {
9662           unsigned int max_prefix_size = align_branch_prefix_size;
9663
9664           /* Make room for padding.  */
9665           frag_grow (max_prefix_size);
9666           p = frag_more (0);
9667
9668           fragP = frag_now;
9669
9670           frag_var (rs_machine_dependent, max_prefix_size, 0,
9671                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
9672                     NULL, 0, p);
9673
9674           fragP->tc_frag_data.max_bytes = max_prefix_size;
9675         }
9676
9677       /* Since the VEX/EVEX prefix contains the implicit prefix, we
9678          don't need the explicit prefix.  */
9679       if (!is_any_vex_encoding (&i.tm))
9680         {
9681           switch (i.tm.opcode_modifier.opcodeprefix)
9682             {
9683             case PREFIX_0X66:
9684               add_prefix (0x66);
9685               break;
9686             case PREFIX_0XF2:
9687               add_prefix (0xf2);
9688               break;
9689             case PREFIX_0XF3:
9690               if (!i.tm.cpu_flags.bitfield.cpupadlock
9691                   || (i.prefix[REP_PREFIX] != 0xf3))
9692                 add_prefix (0xf3);
9693               break;
9694             case PREFIX_NONE:
9695               switch (i.opcode_length)
9696                 {
9697                 case 2:
9698                   break;
9699                 case 1:
9700                   /* Check for pseudo prefixes.  */
9701                   if (!i.tm.opcode_modifier.isprefix || i.tm.base_opcode)
9702                     break;
9703                   as_bad_where (insn_start_frag->fr_file,
9704                                 insn_start_frag->fr_line,
9705                                 _("pseudo prefix without instruction"));
9706                   return;
9707                 default:
9708                   abort ();
9709                 }
9710               break;
9711             default:
9712               abort ();
9713             }
9714
9715 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
9716           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
9717              R_X86_64_GOTTPOFF relocation so that linker can safely
9718              perform IE->LE optimization.  A dummy REX_OPCODE prefix
9719              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
9720              relocation for GDesc -> IE/LE optimization.  */
9721           if (x86_elf_abi == X86_64_X32_ABI
9722               && i.operands == 2
9723               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
9724                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
9725               && i.prefix[REX_PREFIX] == 0)
9726             add_prefix (REX_OPCODE);
9727 #endif
9728
9729           /* The prefix bytes.  */
9730           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
9731             if (*q)
9732               frag_opcode_byte (*q);
9733         }
9734       else
9735         {
9736           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
9737             if (*q)
9738               switch (j)
9739                 {
9740                 case SEG_PREFIX:
9741                 case ADDR_PREFIX:
9742                   frag_opcode_byte (*q);
9743                   break;
9744                 default:
9745                   /* There should be no other prefixes for instructions
9746                      with VEX prefix.  */
9747                   abort ();
9748                 }
9749
9750           /* For EVEX instructions i.vrex should become 0 after
9751              build_evex_prefix.  For VEX instructions upper 16 registers
9752              aren't available, so VREX should be 0.  */
9753           if (i.vrex)
9754             abort ();
9755           /* Now the VEX prefix.  */
9756           if (now_seg != absolute_section)
9757             {
9758               p = frag_more (i.vex.length);
9759               for (j = 0; j < i.vex.length; j++)
9760                 p[j] = i.vex.bytes[j];
9761             }
9762           else
9763             abs_section_offset += i.vex.length;
9764         }
9765
9766       /* Now the opcode; be careful about word order here!  */
9767       j = i.opcode_length;
9768       if (!i.vex.length)
9769         switch (i.tm.opcode_space)
9770           {
9771           case SPACE_BASE:
9772             break;
9773           case SPACE_0F:
9774             ++j;
9775             break;
9776           case SPACE_0F38:
9777           case SPACE_0F3A:
9778             j += 2;
9779             break;
9780           default:
9781             abort ();
9782           }
9783
9784       if (now_seg == absolute_section)
9785         abs_section_offset += j;
9786       else if (j == 1)
9787         {
9788           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
9789         }
9790       else
9791         {
9792           p = frag_more (j);
9793           if (!i.vex.length
9794               && i.tm.opcode_space != SPACE_BASE)
9795             {
9796               *p++ = 0x0f;
9797               if (i.tm.opcode_space != SPACE_0F)
9798                 *p++ = i.tm.opcode_space == SPACE_0F38
9799                        ? 0x38 : 0x3a;
9800             }
9801
9802           switch (i.opcode_length)
9803             {
9804             case 2:
9805               /* Put out high byte first: can't use md_number_to_chars!  */
9806               *p++ = (i.tm.base_opcode >> 8) & 0xff;
9807               /* Fall through.  */
9808             case 1:
9809               *p = i.tm.base_opcode & 0xff;
9810               break;
9811             default:
9812               abort ();
9813               break;
9814             }
9815
9816         }
9817
9818       /* Now the modrm byte and sib byte (if present).  */
9819       if (i.tm.opcode_modifier.modrm)
9820         {
9821           frag_opcode_byte ((i.rm.regmem << 0)
9822                              | (i.rm.reg << 3)
9823                              | (i.rm.mode << 6));
9824           /* If i.rm.regmem == ESP (4)
9825              && i.rm.mode != (Register mode)
9826              && not 16 bit
9827              ==> need second modrm byte.  */
9828           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
9829               && i.rm.mode != 3
9830               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
9831             frag_opcode_byte ((i.sib.base << 0)
9832                               | (i.sib.index << 3)
9833                               | (i.sib.scale << 6));
9834         }
9835
9836       if (i.disp_operands)
9837         output_disp (insn_start_frag, insn_start_off);
9838
9839       if (i.imm_operands)
9840         output_imm (insn_start_frag, insn_start_off);
9841
9842       /*
9843        * frag_now_fix () returning plain abs_section_offset when we're in the
9844        * absolute section, and abs_section_offset not getting updated as data
9845        * gets added to the frag breaks the logic below.
9846        */
9847       if (now_seg != absolute_section)
9848         {
9849           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
9850           if (j > 15)
9851             as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
9852                      j);
9853           else if (fragP)
9854             {
9855               /* NB: Don't add prefix with GOTPC relocation since
9856                  output_disp() above depends on the fixed encoding
9857                  length.  Can't add prefix with TLS relocation since
9858                  it breaks TLS linker optimization.  */
9859               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
9860               /* Prefix count on the current instruction.  */
9861               unsigned int count = i.vex.length;
9862               unsigned int k;
9863               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
9864                 /* REX byte is encoded in VEX/EVEX prefix.  */
9865                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
9866                   count++;
9867
9868               /* Count prefixes for extended opcode maps.  */
9869               if (!i.vex.length)
9870                 switch (i.tm.opcode_space)
9871                   {
9872                   case SPACE_BASE:
9873                     break;
9874                   case SPACE_0F:
9875                     count++;
9876                     break;
9877                   case SPACE_0F38:
9878                   case SPACE_0F3A:
9879                     count += 2;
9880                     break;
9881                   default:
9882                     abort ();
9883                   }
9884
9885               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
9886                   == BRANCH_PREFIX)
9887                 {
9888                   /* Set the maximum prefix size in BRANCH_PREFIX
9889                      frag.  */
9890                   if (fragP->tc_frag_data.max_bytes > max)
9891                     fragP->tc_frag_data.max_bytes = max;
9892                   if (fragP->tc_frag_data.max_bytes > count)
9893                     fragP->tc_frag_data.max_bytes -= count;
9894                   else
9895                     fragP->tc_frag_data.max_bytes = 0;
9896                 }
9897               else
9898                 {
9899                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
9900                      frag.  */
9901                   unsigned int max_prefix_size;
9902                   if (align_branch_prefix_size > max)
9903                     max_prefix_size = max;
9904                   else
9905                     max_prefix_size = align_branch_prefix_size;
9906                   if (max_prefix_size > count)
9907                     fragP->tc_frag_data.max_prefix_length
9908                       = max_prefix_size - count;
9909                 }
9910
9911               /* Use existing segment prefix if possible.  Use CS
9912                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
9913                  segment prefix with ESP/EBP base register and use DS
9914                  segment prefix without ESP/EBP base register.  */
9915               if (i.prefix[SEG_PREFIX])
9916                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
9917               else if (flag_code == CODE_64BIT)
9918                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
9919               else if (i.base_reg
9920                        && (i.base_reg->reg_num == 4
9921                            || i.base_reg->reg_num == 5))
9922                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
9923               else
9924                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
9925             }
9926         }
9927     }
9928
9929   /* NB: Don't work with COND_JUMP86 without i386.  */
9930   if (align_branch_power
9931       && now_seg != absolute_section
9932       && cpu_arch_flags.bitfield.cpui386)
9933     {
9934       /* Terminate each frag so that we can add prefix and check for
9935          fused jcc.  */
9936       frag_wane (frag_now);
9937       frag_new (0);
9938     }
9939
9940 #ifdef DEBUG386
9941   if (flag_debug)
9942     {
9943       pi ("" /*line*/, &i);
9944     }
9945 #endif /* DEBUG386  */
9946 }
9947
9948 /* Return the size of the displacement operand N.  */
9949
9950 static int
9951 disp_size (unsigned int n)
9952 {
9953   int size = 4;
9954
9955   if (i.types[n].bitfield.disp64)
9956     size = 8;
9957   else if (i.types[n].bitfield.disp8)
9958     size = 1;
9959   else if (i.types[n].bitfield.disp16)
9960     size = 2;
9961   return size;
9962 }
9963
9964 /* Return the size of the immediate operand N.  */
9965
9966 static int
9967 imm_size (unsigned int n)
9968 {
9969   int size = 4;
9970   if (i.types[n].bitfield.imm64)
9971     size = 8;
9972   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
9973     size = 1;
9974   else if (i.types[n].bitfield.imm16)
9975     size = 2;
9976   return size;
9977 }
9978
9979 static void
9980 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
9981 {
9982   char *p;
9983   unsigned int n;
9984
9985   for (n = 0; n < i.operands; n++)
9986     {
9987       if (operand_type_check (i.types[n], disp))
9988         {
9989           int size = disp_size (n);
9990
9991           if (now_seg == absolute_section)
9992             abs_section_offset += size;
9993           else if (i.op[n].disps->X_op == O_constant)
9994             {
9995               offsetT val = i.op[n].disps->X_add_number;
9996
9997               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
9998                                      size);
9999               p = frag_more (size);
10000               md_number_to_chars (p, val, size);
10001             }
10002           else
10003             {
10004               enum bfd_reloc_code_real reloc_type;
10005               bool pcrel = (i.flags[n] & Operand_PCrel) != 0;
10006               bool sign = (flag_code == CODE_64BIT && size == 4
10007                            && (!want_disp32 (&i.tm)
10008                                || (i.tm.opcode_modifier.jump && !i.jumpabsolute
10009                                    && !i.types[n].bitfield.baseindex)))
10010                           || pcrel;
10011               fixS *fixP;
10012
10013               /* We can't have 8 bit displacement here.  */
10014               gas_assert (!i.types[n].bitfield.disp8);
10015
10016               /* The PC relative address is computed relative
10017                  to the instruction boundary, so in case immediate
10018                  fields follows, we need to adjust the value.  */
10019               if (pcrel && i.imm_operands)
10020                 {
10021                   unsigned int n1;
10022                   int sz = 0;
10023
10024                   for (n1 = 0; n1 < i.operands; n1++)
10025                     if (operand_type_check (i.types[n1], imm))
10026                       {
10027                         /* Only one immediate is allowed for PC
10028                            relative address.  */
10029                         gas_assert (sz == 0);
10030                         sz = imm_size (n1);
10031                         i.op[n].disps->X_add_number -= sz;
10032                       }
10033                   /* We should find the immediate.  */
10034                   gas_assert (sz != 0);
10035                 }
10036
10037               p = frag_more (size);
10038               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
10039               if (GOT_symbol
10040                   && GOT_symbol == i.op[n].disps->X_add_symbol
10041                   && (((reloc_type == BFD_RELOC_32
10042                         || reloc_type == BFD_RELOC_X86_64_32S
10043                         || (reloc_type == BFD_RELOC_64
10044                             && object_64bit))
10045                        && (i.op[n].disps->X_op == O_symbol
10046                            || (i.op[n].disps->X_op == O_add
10047                                && ((symbol_get_value_expression
10048                                     (i.op[n].disps->X_op_symbol)->X_op)
10049                                    == O_subtract))))
10050                       || reloc_type == BFD_RELOC_32_PCREL))
10051                 {
10052                   if (!object_64bit)
10053                     {
10054                       reloc_type = BFD_RELOC_386_GOTPC;
10055                       i.has_gotpc_tls_reloc = true;
10056                       i.op[n].disps->X_add_number +=
10057                         encoding_length (insn_start_frag, insn_start_off, p);
10058                     }
10059                   else if (reloc_type == BFD_RELOC_64)
10060                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10061                   else
10062                     /* Don't do the adjustment for x86-64, as there
10063                        the pcrel addressing is relative to the _next_
10064                        insn, and that is taken care of in other code.  */
10065                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10066                 }
10067               else if (align_branch_power)
10068                 {
10069                   switch (reloc_type)
10070                     {
10071                     case BFD_RELOC_386_TLS_GD:
10072                     case BFD_RELOC_386_TLS_LDM:
10073                     case BFD_RELOC_386_TLS_IE:
10074                     case BFD_RELOC_386_TLS_IE_32:
10075                     case BFD_RELOC_386_TLS_GOTIE:
10076                     case BFD_RELOC_386_TLS_GOTDESC:
10077                     case BFD_RELOC_386_TLS_DESC_CALL:
10078                     case BFD_RELOC_X86_64_TLSGD:
10079                     case BFD_RELOC_X86_64_TLSLD:
10080                     case BFD_RELOC_X86_64_GOTTPOFF:
10081                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
10082                     case BFD_RELOC_X86_64_TLSDESC_CALL:
10083                       i.has_gotpc_tls_reloc = true;
10084                     default:
10085                       break;
10086                     }
10087                 }
10088               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
10089                                   size, i.op[n].disps, pcrel,
10090                                   reloc_type);
10091
10092               if (flag_code == CODE_64BIT && size == 4 && pcrel
10093                   && !i.prefix[ADDR_PREFIX])
10094                 fixP->fx_signed = 1;
10095
10096               /* Check for "call/jmp *mem", "mov mem, %reg",
10097                  "test %reg, mem" and "binop mem, %reg" where binop
10098                  is one of adc, add, and, cmp, or, sbb, sub, xor
10099                  instructions without data prefix.  Always generate
10100                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
10101               if (i.prefix[DATA_PREFIX] == 0
10102                   && (generate_relax_relocations
10103                       || (!object_64bit
10104                           && i.rm.mode == 0
10105                           && i.rm.regmem == 5))
10106                   && (i.rm.mode == 2
10107                       || (i.rm.mode == 0 && i.rm.regmem == 5))
10108                   && i.tm.opcode_space == SPACE_BASE
10109                   && ((i.operands == 1
10110                        && i.tm.base_opcode == 0xff
10111                        && (i.rm.reg == 2 || i.rm.reg == 4))
10112                       || (i.operands == 2
10113                           && (i.tm.base_opcode == 0x8b
10114                               || i.tm.base_opcode == 0x85
10115                               || (i.tm.base_opcode & ~0x38) == 0x03))))
10116                 {
10117                   if (object_64bit)
10118                     {
10119                       fixP->fx_tcbit = i.rex != 0;
10120                       if (i.base_reg
10121                           && (i.base_reg->reg_num == RegIP))
10122                       fixP->fx_tcbit2 = 1;
10123                     }
10124                   else
10125                     fixP->fx_tcbit2 = 1;
10126                 }
10127             }
10128         }
10129     }
10130 }
10131
10132 static void
10133 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
10134 {
10135   char *p;
10136   unsigned int n;
10137
10138   for (n = 0; n < i.operands; n++)
10139     {
10140       if (operand_type_check (i.types[n], imm))
10141         {
10142           int size = imm_size (n);
10143
10144           if (now_seg == absolute_section)
10145             abs_section_offset += size;
10146           else if (i.op[n].imms->X_op == O_constant)
10147             {
10148               offsetT val;
10149
10150               val = offset_in_range (i.op[n].imms->X_add_number,
10151                                      size);
10152               p = frag_more (size);
10153               md_number_to_chars (p, val, size);
10154             }
10155           else
10156             {
10157               /* Not absolute_section.
10158                  Need a 32-bit fixup (don't support 8bit
10159                  non-absolute imms).  Try to support other
10160                  sizes ...  */
10161               enum bfd_reloc_code_real reloc_type;
10162               int sign;
10163
10164               if (i.types[n].bitfield.imm32s
10165                   && (i.suffix == QWORD_MNEM_SUFFIX
10166                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)))
10167                 sign = 1;
10168               else
10169                 sign = 0;
10170
10171               p = frag_more (size);
10172               reloc_type = reloc (size, 0, sign, i.reloc[n]);
10173
10174               /*   This is tough to explain.  We end up with this one if we
10175                * have operands that look like
10176                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
10177                * obtain the absolute address of the GOT, and it is strongly
10178                * preferable from a performance point of view to avoid using
10179                * a runtime relocation for this.  The actual sequence of
10180                * instructions often look something like:
10181                *
10182                *        call    .L66
10183                * .L66:
10184                *        popl    %ebx
10185                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
10186                *
10187                *   The call and pop essentially return the absolute address
10188                * of the label .L66 and store it in %ebx.  The linker itself
10189                * will ultimately change the first operand of the addl so
10190                * that %ebx points to the GOT, but to keep things simple, the
10191                * .o file must have this operand set so that it generates not
10192                * the absolute address of .L66, but the absolute address of
10193                * itself.  This allows the linker itself simply treat a GOTPC
10194                * relocation as asking for a pcrel offset to the GOT to be
10195                * added in, and the addend of the relocation is stored in the
10196                * operand field for the instruction itself.
10197                *
10198                *   Our job here is to fix the operand so that it would add
10199                * the correct offset so that %ebx would point to itself.  The
10200                * thing that is tricky is that .-.L66 will point to the
10201                * beginning of the instruction, so we need to further modify
10202                * the operand so that it will point to itself.  There are
10203                * other cases where you have something like:
10204                *
10205                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
10206                *
10207                * and here no correction would be required.  Internally in
10208                * the assembler we treat operands of this form as not being
10209                * pcrel since the '.' is explicitly mentioned, and I wonder
10210                * whether it would simplify matters to do it this way.  Who
10211                * knows.  In earlier versions of the PIC patches, the
10212                * pcrel_adjust field was used to store the correction, but
10213                * since the expression is not pcrel, I felt it would be
10214                * confusing to do it this way.  */
10215
10216               if ((reloc_type == BFD_RELOC_32
10217                    || reloc_type == BFD_RELOC_X86_64_32S
10218                    || reloc_type == BFD_RELOC_64)
10219                   && GOT_symbol
10220                   && GOT_symbol == i.op[n].imms->X_add_symbol
10221                   && (i.op[n].imms->X_op == O_symbol
10222                       || (i.op[n].imms->X_op == O_add
10223                           && ((symbol_get_value_expression
10224                                (i.op[n].imms->X_op_symbol)->X_op)
10225                               == O_subtract))))
10226                 {
10227                   if (!object_64bit)
10228                     reloc_type = BFD_RELOC_386_GOTPC;
10229                   else if (size == 4)
10230                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10231                   else if (size == 8)
10232                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10233                   i.has_gotpc_tls_reloc = true;
10234                   i.op[n].imms->X_add_number +=
10235                     encoding_length (insn_start_frag, insn_start_off, p);
10236                 }
10237               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
10238                            i.op[n].imms, 0, reloc_type);
10239             }
10240         }
10241     }
10242 }
10243 \f
10244 /* x86_cons_fix_new is called via the expression parsing code when a
10245    reloc is needed.  We use this hook to get the correct .got reloc.  */
10246 static int cons_sign = -1;
10247
10248 void
10249 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
10250                   expressionS *exp, bfd_reloc_code_real_type r)
10251 {
10252   r = reloc (len, 0, cons_sign, r);
10253
10254 #ifdef TE_PE
10255   if (exp->X_op == O_secrel)
10256     {
10257       exp->X_op = O_symbol;
10258       r = BFD_RELOC_32_SECREL;
10259     }
10260   else if (exp->X_op == O_secidx)
10261     r = BFD_RELOC_16_SECIDX;
10262 #endif
10263
10264   fix_new_exp (frag, off, len, exp, 0, r);
10265 }
10266
10267 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
10268    purpose of the `.dc.a' internal pseudo-op.  */
10269
10270 int
10271 x86_address_bytes (void)
10272 {
10273   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
10274     return 4;
10275   return stdoutput->arch_info->bits_per_address / 8;
10276 }
10277
10278 #if (!(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
10279      || defined (LEX_AT)) && !defined (TE_PE)
10280 # define lex_got(reloc, adjust, types) NULL
10281 #else
10282 /* Parse operands of the form
10283    <symbol>@GOTOFF+<nnn>
10284    and similar .plt or .got references.
10285
10286    If we find one, set up the correct relocation in RELOC and copy the
10287    input string, minus the `@GOTOFF' into a malloc'd buffer for
10288    parsing by the calling routine.  Return this buffer, and if ADJUST
10289    is non-null set it to the length of the string we removed from the
10290    input line.  Otherwise return NULL.  */
10291 static char *
10292 lex_got (enum bfd_reloc_code_real *rel,
10293          int *adjust,
10294          i386_operand_type *types)
10295 {
10296   /* Some of the relocations depend on the size of what field is to
10297      be relocated.  But in our callers i386_immediate and i386_displacement
10298      we don't yet know the operand size (this will be set by insn
10299      matching).  Hence we record the word32 relocation here,
10300      and adjust the reloc according to the real size in reloc().  */
10301   static const struct
10302   {
10303     const char *str;
10304     int len;
10305     const enum bfd_reloc_code_real rel[2];
10306     const i386_operand_type types64;
10307     bool need_GOT_symbol;
10308   }
10309     gotrel[] =
10310   {
10311
10312 #define OPERAND_TYPE_IMM32_32S_DISP32 { .bitfield = \
10313   { .imm32 = 1, .imm32s = 1, .disp32 = 1 } }
10314 #define OPERAND_TYPE_IMM32_32S_64_DISP32 { .bitfield = \
10315   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1 } }
10316 #define OPERAND_TYPE_IMM32_32S_64_DISP32_64 { .bitfield = \
10317   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1, .disp64 = 1 } }
10318 #define OPERAND_TYPE_IMM64_DISP64 { .bitfield = \
10319   { .imm64 = 1, .disp64 = 1 } }
10320
10321 #ifndef TE_PE
10322 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10323     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
10324                                         BFD_RELOC_SIZE32 },
10325       { .bitfield = { .imm32 = 1, .imm64 = 1 } }, false },
10326 #endif
10327     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
10328                                        BFD_RELOC_X86_64_PLTOFF64 },
10329       { .bitfield = { .imm64 = 1 } }, true },
10330     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
10331                                        BFD_RELOC_X86_64_PLT32    },
10332       OPERAND_TYPE_IMM32_32S_DISP32, false },
10333     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
10334                                        BFD_RELOC_X86_64_GOTPLT64 },
10335       OPERAND_TYPE_IMM64_DISP64, true },
10336     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
10337                                        BFD_RELOC_X86_64_GOTOFF64 },
10338       OPERAND_TYPE_IMM64_DISP64, true },
10339     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
10340                                        BFD_RELOC_X86_64_GOTPCREL },
10341       OPERAND_TYPE_IMM32_32S_DISP32, true },
10342     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
10343                                        BFD_RELOC_X86_64_TLSGD    },
10344       OPERAND_TYPE_IMM32_32S_DISP32, true },
10345     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
10346                                        _dummy_first_bfd_reloc_code_real },
10347       OPERAND_TYPE_NONE, true },
10348     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
10349                                        BFD_RELOC_X86_64_TLSLD    },
10350       OPERAND_TYPE_IMM32_32S_DISP32, true },
10351     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
10352                                        BFD_RELOC_X86_64_GOTTPOFF },
10353       OPERAND_TYPE_IMM32_32S_DISP32, true },
10354     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
10355                                        BFD_RELOC_X86_64_TPOFF32  },
10356       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10357     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
10358                                        _dummy_first_bfd_reloc_code_real },
10359       OPERAND_TYPE_NONE, true },
10360     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
10361                                        BFD_RELOC_X86_64_DTPOFF32 },
10362       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10363     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
10364                                        _dummy_first_bfd_reloc_code_real },
10365       OPERAND_TYPE_NONE, true },
10366     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
10367                                        _dummy_first_bfd_reloc_code_real },
10368       OPERAND_TYPE_NONE, true },
10369     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
10370                                        BFD_RELOC_X86_64_GOT32    },
10371       OPERAND_TYPE_IMM32_32S_64_DISP32, true },
10372     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
10373                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
10374       OPERAND_TYPE_IMM32_32S_DISP32, true },
10375     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
10376                                        BFD_RELOC_X86_64_TLSDESC_CALL },
10377       OPERAND_TYPE_IMM32_32S_DISP32, true },
10378 #else /* TE_PE */
10379     { STRING_COMMA_LEN ("SECREL32"), { BFD_RELOC_32_SECREL,
10380                                        BFD_RELOC_32_SECREL },
10381       OPERAND_TYPE_IMM32_32S_64_DISP32_64, false },
10382 #endif
10383
10384 #undef OPERAND_TYPE_IMM32_32S_DISP32
10385 #undef OPERAND_TYPE_IMM32_32S_64_DISP32
10386 #undef OPERAND_TYPE_IMM32_32S_64_DISP32_64
10387 #undef OPERAND_TYPE_IMM64_DISP64
10388
10389   };
10390   char *cp;
10391   unsigned int j;
10392
10393 #if defined (OBJ_MAYBE_ELF) && !defined (TE_PE)
10394   if (!IS_ELF)
10395     return NULL;
10396 #endif
10397
10398   for (cp = input_line_pointer; *cp != '@'; cp++)
10399     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
10400       return NULL;
10401
10402   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
10403     {
10404       int len = gotrel[j].len;
10405       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
10406         {
10407           if (gotrel[j].rel[object_64bit] != 0)
10408             {
10409               int first, second;
10410               char *tmpbuf, *past_reloc;
10411
10412               *rel = gotrel[j].rel[object_64bit];
10413
10414               if (types)
10415                 {
10416                   if (flag_code != CODE_64BIT)
10417                     {
10418                       types->bitfield.imm32 = 1;
10419                       types->bitfield.disp32 = 1;
10420                     }
10421                   else
10422                     *types = gotrel[j].types64;
10423                 }
10424
10425               if (gotrel[j].need_GOT_symbol && GOT_symbol == NULL)
10426                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
10427
10428               /* The length of the first part of our input line.  */
10429               first = cp - input_line_pointer;
10430
10431               /* The second part goes from after the reloc token until
10432                  (and including) an end_of_line char or comma.  */
10433               past_reloc = cp + 1 + len;
10434               cp = past_reloc;
10435               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
10436                 ++cp;
10437               second = cp + 1 - past_reloc;
10438
10439               /* Allocate and copy string.  The trailing NUL shouldn't
10440                  be necessary, but be safe.  */
10441               tmpbuf = XNEWVEC (char, first + second + 2);
10442               memcpy (tmpbuf, input_line_pointer, first);
10443               if (second != 0 && *past_reloc != ' ')
10444                 /* Replace the relocation token with ' ', so that
10445                    errors like foo@GOTOFF1 will be detected.  */
10446                 tmpbuf[first++] = ' ';
10447               else
10448                 /* Increment length by 1 if the relocation token is
10449                    removed.  */
10450                 len++;
10451               if (adjust)
10452                 *adjust = len;
10453               memcpy (tmpbuf + first, past_reloc, second);
10454               tmpbuf[first + second] = '\0';
10455               return tmpbuf;
10456             }
10457
10458           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10459                   gotrel[j].str, 1 << (5 + object_64bit));
10460           return NULL;
10461         }
10462     }
10463
10464   /* Might be a symbol version string.  Don't as_bad here.  */
10465   return NULL;
10466 }
10467 #endif
10468
10469 bfd_reloc_code_real_type
10470 x86_cons (expressionS *exp, int size)
10471 {
10472   bfd_reloc_code_real_type got_reloc = NO_RELOC;
10473
10474 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
10475       && !defined (LEX_AT)) \
10476     || defined (TE_PE)
10477   intel_syntax = -intel_syntax;
10478
10479   exp->X_md = 0;
10480   if (size == 4 || (object_64bit && size == 8))
10481     {
10482       /* Handle @GOTOFF and the like in an expression.  */
10483       char *save;
10484       char *gotfree_input_line;
10485       int adjust = 0;
10486
10487       save = input_line_pointer;
10488       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
10489       if (gotfree_input_line)
10490         input_line_pointer = gotfree_input_line;
10491
10492       expression (exp);
10493
10494       if (gotfree_input_line)
10495         {
10496           /* expression () has merrily parsed up to the end of line,
10497              or a comma - in the wrong buffer.  Transfer how far
10498              input_line_pointer has moved to the right buffer.  */
10499           input_line_pointer = (save
10500                                 + (input_line_pointer - gotfree_input_line)
10501                                 + adjust);
10502           free (gotfree_input_line);
10503           if (exp->X_op == O_constant
10504               || exp->X_op == O_absent
10505               || exp->X_op == O_illegal
10506               || exp->X_op == O_register
10507               || exp->X_op == O_big)
10508             {
10509               char c = *input_line_pointer;
10510               *input_line_pointer = 0;
10511               as_bad (_("missing or invalid expression `%s'"), save);
10512               *input_line_pointer = c;
10513             }
10514           else if ((got_reloc == BFD_RELOC_386_PLT32
10515                     || got_reloc == BFD_RELOC_X86_64_PLT32)
10516                    && exp->X_op != O_symbol)
10517             {
10518               char c = *input_line_pointer;
10519               *input_line_pointer = 0;
10520               as_bad (_("invalid PLT expression `%s'"), save);
10521               *input_line_pointer = c;
10522             }
10523         }
10524     }
10525   else
10526     expression (exp);
10527
10528   intel_syntax = -intel_syntax;
10529
10530   if (intel_syntax)
10531     i386_intel_simplify (exp);
10532 #else
10533   expression (exp);
10534 #endif
10535
10536   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
10537   if (size == 4 && exp->X_op == O_constant && !object_64bit)
10538     exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10539
10540   return got_reloc;
10541 }
10542
10543 static void
10544 signed_cons (int size)
10545 {
10546   if (object_64bit)
10547     cons_sign = 1;
10548   cons (size);
10549   cons_sign = -1;
10550 }
10551
10552 static void
10553 s_insn (int dummy ATTRIBUTE_UNUSED)
10554 {
10555   char mnemonic[MAX_MNEM_SIZE], *line = input_line_pointer;
10556   char *saved_ilp = find_end_of_line (line, false), saved_char;
10557   const char *end;
10558   unsigned int j;
10559   valueT val;
10560   bool vex = false, xop = false, evex = false;
10561   static const templates tt = { &i.tm, &i.tm + 1 };
10562
10563   init_globals ();
10564
10565   saved_char = *saved_ilp;
10566   *saved_ilp = 0;
10567
10568   end = parse_insn (line, mnemonic, true);
10569   if (end == NULL)
10570     {
10571   bad:
10572       *saved_ilp = saved_char;
10573       ignore_rest_of_line ();
10574       i.tm.mnem_off = 0;
10575       return;
10576     }
10577   line += end - line;
10578
10579   current_templates = &tt;
10580   i.tm.mnem_off = MN__insn;
10581
10582   if (startswith (line, "VEX")
10583       && (line[3] == '.' || is_space_char (line[3])))
10584     {
10585       vex = true;
10586       line += 3;
10587     }
10588   else if (startswith (line, "XOP") && ISDIGIT (line[3]))
10589     {
10590       char *e;
10591       unsigned long n = strtoul (line + 3, &e, 16);
10592
10593       if (e == line + 5 && n >= 0x08 && n <= 0x1f
10594           && (*e == '.' || is_space_char (*e)))
10595         {
10596           xop = true;
10597           /* Arrange for build_vex_prefix() to emit 0x8f.  */
10598           i.tm.opcode_space = SPACE_XOP08;
10599           i.insn_opcode_space = n;
10600           line = e;
10601         }
10602     }
10603   else if (startswith (line, "EVEX")
10604            && (line[4] == '.' || is_space_char (line[4])))
10605     {
10606       evex = true;
10607       line += 4;
10608     }
10609
10610   if (vex || xop
10611       ? i.vec_encoding == vex_encoding_evex
10612       : evex
10613         ? i.vec_encoding == vex_encoding_vex
10614           || i.vec_encoding == vex_encoding_vex3
10615         : i.vec_encoding != vex_encoding_default)
10616     {
10617       as_bad (_("pseudo-prefix conflicts with encoding specifier"));
10618       goto bad;
10619     }
10620
10621   if (line > end && *line == '.')
10622     {
10623       /* Length specifier (VEX.L, XOP.L, EVEX.L'L).  */
10624       switch (line[1])
10625         {
10626         case 'L':
10627           switch (line[2])
10628             {
10629             case '0':
10630               if (evex)
10631                 i.tm.opcode_modifier.evex = EVEX128;
10632               else
10633                 i.tm.opcode_modifier.vex = VEX128;
10634               break;
10635
10636             case '1':
10637               if (evex)
10638                 i.tm.opcode_modifier.evex = EVEX256;
10639               else
10640                 i.tm.opcode_modifier.vex = VEX256;
10641               break;
10642
10643             case '2':
10644               if (evex)
10645                 i.tm.opcode_modifier.evex = EVEX512;
10646               break;
10647
10648             case '3':
10649               if (evex)
10650                 i.tm.opcode_modifier.evex = EVEX_L3;
10651               break;
10652
10653             case 'I':
10654               if (line[3] == 'G')
10655                 {
10656                   if (evex)
10657                     i.tm.opcode_modifier.evex = EVEXLIG;
10658                   else
10659                     i.tm.opcode_modifier.vex = VEXScalar; /* LIG */
10660                   ++line;
10661                 }
10662               break;
10663             }
10664
10665           if (i.tm.opcode_modifier.vex || i.tm.opcode_modifier.evex)
10666             line += 3;
10667           break;
10668
10669         case '1':
10670           if (line[2] == '2' && line[3] == '8')
10671             {
10672               if (evex)
10673                 i.tm.opcode_modifier.evex = EVEX128;
10674               else
10675                 i.tm.opcode_modifier.vex = VEX128;
10676               line += 4;
10677             }
10678           break;
10679
10680         case '2':
10681           if (line[2] == '5' && line[3] == '6')
10682             {
10683               if (evex)
10684                 i.tm.opcode_modifier.evex = EVEX256;
10685               else
10686                 i.tm.opcode_modifier.vex = VEX256;
10687               line += 4;
10688             }
10689           break;
10690
10691         case '5':
10692           if (evex && line[2] == '1' && line[3] == '2')
10693             {
10694               i.tm.opcode_modifier.evex = EVEX512;
10695               line += 4;
10696             }
10697           break;
10698         }
10699     }
10700
10701   if (line > end && *line == '.')
10702     {
10703       /* embedded prefix (VEX.pp, XOP.pp, EVEX.pp).  */
10704       switch (line[1])
10705         {
10706         case 'N':
10707           if (line[2] == 'P')
10708             line += 3;
10709           break;
10710
10711         case '6':
10712           if (line[2] == '6')
10713             {
10714               i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
10715               line += 3;
10716             }
10717           break;
10718
10719         case 'F': case 'f':
10720           if (line[2] == '3')
10721             {
10722               i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
10723               line += 3;
10724             }
10725           else if (line[2] == '2')
10726             {
10727               i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF2;
10728               line += 3;
10729             }
10730           break;
10731         }
10732     }
10733
10734   if (line > end && !xop && *line == '.')
10735     {
10736       /* Encoding space (VEX.mmmmm, EVEX.mmmm).  */
10737       switch (line[1])
10738         {
10739         case '0':
10740           if (TOUPPER (line[2]) != 'F')
10741             break;
10742           if (line[3] == '.' || is_space_char (line[3]))
10743             {
10744               i.insn_opcode_space = SPACE_0F;
10745               line += 3;
10746             }
10747           else if (line[3] == '3'
10748                    && (line[4] == '8' || TOUPPER (line[4]) == 'A')
10749                    && (line[5] == '.' || is_space_char (line[5])))
10750             {
10751               i.insn_opcode_space = line[4] == '8' ? SPACE_0F38 : SPACE_0F3A;
10752               line += 5;
10753             }
10754           break;
10755
10756         case 'M':
10757           if (ISDIGIT (line[2]) && line[2] != '0')
10758             {
10759               char *e;
10760               unsigned long n = strtoul (line + 2, &e, 10);
10761
10762               if (n <= (evex ? 15 : 31)
10763                   && (*e == '.' || is_space_char (*e)))
10764                 {
10765                   i.insn_opcode_space = n;
10766                   line = e;
10767                 }
10768             }
10769           break;
10770         }
10771     }
10772
10773   if (line > end && *line == '.' && line[1] == 'W')
10774     {
10775       /* VEX.W, XOP.W, EVEX.W  */
10776       switch (line[2])
10777         {
10778         case '0':
10779           i.tm.opcode_modifier.vexw = VEXW0;
10780           break;
10781
10782         case '1':
10783           i.tm.opcode_modifier.vexw = VEXW1;
10784           break;
10785
10786         case 'I':
10787           if (line[3] == 'G')
10788             {
10789               i.tm.opcode_modifier.vexw = VEXWIG;
10790               ++line;
10791             }
10792           break;
10793         }
10794
10795       if (i.tm.opcode_modifier.vexw)
10796         line += 3;
10797     }
10798
10799   if (line > end && *line && !is_space_char (*line))
10800     {
10801       /* Improve diagnostic a little.  */
10802       if (*line == '.' && line[1] && !is_space_char (line[1]))
10803         ++line;
10804       goto done;
10805     }
10806
10807   input_line_pointer = line;
10808   val = get_absolute_expression ();
10809   line = input_line_pointer;
10810
10811   for (j = 1; j < sizeof(val); ++j)
10812     if (!(val >> (j * 8)))
10813       break;
10814
10815   /* Trim off a prefix if present.  */
10816   if (j > 1 && !vex && !xop && !evex)
10817     {
10818       uint8_t byte = val >> ((j - 1) * 8);
10819
10820       switch (byte)
10821         {
10822         case DATA_PREFIX_OPCODE:
10823         case REPE_PREFIX_OPCODE:
10824         case REPNE_PREFIX_OPCODE:
10825           if (!add_prefix (byte))
10826             goto bad;
10827           val &= ((uint64_t)1 << (--j * 8)) - 1;
10828           break;
10829         }
10830     }
10831
10832   /* Trim off encoding space.  */
10833   if (j > 1 && !i.insn_opcode_space && (val >> ((j - 1) * 8)) == 0x0f)
10834     {
10835       uint8_t byte = val >> ((--j - 1) * 8);
10836
10837       i.insn_opcode_space = SPACE_0F;
10838       switch (byte & -(j > 1))
10839         {
10840         case 0x38:
10841           i.insn_opcode_space = SPACE_0F38;
10842           --j;
10843           break;
10844         case 0x3a:
10845           i.insn_opcode_space = SPACE_0F3A;
10846           --j;
10847           break;
10848         }
10849       i.tm.opcode_space = i.insn_opcode_space;
10850       val &= ((uint64_t)1 << (j * 8)) - 1;
10851     }
10852   if (!i.tm.opcode_space && (vex || evex))
10853     /* Arrange for build_vex_prefix() to properly emit 0xC4/0xC5.
10854        Also avoid hitting abort() there or in build_evex_prefix().  */
10855     i.tm.opcode_space = i.insn_opcode_space == SPACE_0F ? SPACE_0F
10856                                                    : SPACE_0F38;
10857
10858   if (j > 2)
10859     {
10860       as_bad (_("opcode residual (%#"PRIx64") too wide"), (uint64_t) val);
10861       goto bad;
10862     }
10863   i.opcode_length = j;
10864   i.tm.base_opcode = val;
10865
10866   if (vex || xop)
10867     {
10868       if (!i.tm.opcode_modifier.vex)
10869         i.tm.opcode_modifier.vex = VEXScalar; /* LIG */
10870
10871       build_vex_prefix (NULL);
10872       i.rex &= REX_OPCODE;
10873     }
10874   else if (evex)
10875     {
10876       if (!i.tm.opcode_modifier.evex)
10877         i.tm.opcode_modifier.evex = EVEXLIG;
10878
10879       build_evex_prefix ();
10880       i.rex &= REX_OPCODE;
10881     }
10882
10883   output_insn ();
10884
10885  done:
10886   *saved_ilp = saved_char;
10887   input_line_pointer = line;
10888
10889   demand_empty_rest_of_line ();
10890
10891   /* Make sure dot_insn() won't yield "true" anymore.  */
10892   i.tm.mnem_off = 0;
10893 }
10894
10895 #ifdef TE_PE
10896 static void
10897 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
10898 {
10899   expressionS exp;
10900
10901   do
10902     {
10903       expression (&exp);
10904       if (exp.X_op == O_symbol)
10905         exp.X_op = O_secrel;
10906
10907       emit_expr (&exp, 4);
10908     }
10909   while (*input_line_pointer++ == ',');
10910
10911   input_line_pointer--;
10912   demand_empty_rest_of_line ();
10913 }
10914
10915 static void
10916 pe_directive_secidx (int dummy ATTRIBUTE_UNUSED)
10917 {
10918   expressionS exp;
10919
10920   do
10921     {
10922       expression (&exp);
10923       if (exp.X_op == O_symbol)
10924         exp.X_op = O_secidx;
10925
10926       emit_expr (&exp, 2);
10927     }
10928   while (*input_line_pointer++ == ',');
10929
10930   input_line_pointer--;
10931   demand_empty_rest_of_line ();
10932 }
10933 #endif
10934
10935 /* Handle Rounding Control / SAE specifiers.  */
10936
10937 static char *
10938 RC_SAE_specifier (const char *pstr)
10939 {
10940   unsigned int j;
10941
10942   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
10943     {
10944       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
10945         {
10946           if (i.rounding.type != rc_none)
10947             {
10948               as_bad (_("duplicated `{%s}'"), RC_NamesTable[j].name);
10949               return NULL;
10950             }
10951
10952           i.rounding.type = RC_NamesTable[j].type;
10953
10954           return (char *)(pstr + RC_NamesTable[j].len);
10955         }
10956     }
10957
10958   return NULL;
10959 }
10960
10961 /* Handle Vector operations.  */
10962
10963 static char *
10964 check_VecOperations (char *op_string)
10965 {
10966   const reg_entry *mask;
10967   const char *saved;
10968   char *end_op;
10969
10970   while (*op_string)
10971     {
10972       saved = op_string;
10973       if (*op_string == '{')
10974         {
10975           op_string++;
10976
10977           /* Check broadcasts.  */
10978           if (startswith (op_string, "1to"))
10979             {
10980               unsigned int bcst_type;
10981
10982               if (i.broadcast.type)
10983                 goto duplicated_vec_op;
10984
10985               op_string += 3;
10986               if (*op_string == '8')
10987                 bcst_type = 8;
10988               else if (*op_string == '4')
10989                 bcst_type = 4;
10990               else if (*op_string == '2')
10991                 bcst_type = 2;
10992               else if (*op_string == '1'
10993                        && *(op_string+1) == '6')
10994                 {
10995                   bcst_type = 16;
10996                   op_string++;
10997                 }
10998               else if (*op_string == '3'
10999                        && *(op_string+1) == '2')
11000                 {
11001                   bcst_type = 32;
11002                   op_string++;
11003                 }
11004               else
11005                 {
11006                   as_bad (_("Unsupported broadcast: `%s'"), saved);
11007                   return NULL;
11008                 }
11009               op_string++;
11010
11011               i.broadcast.type = bcst_type;
11012               i.broadcast.operand = this_operand;
11013             }
11014           /* Check masking operation.  */
11015           else if ((mask = parse_register (op_string, &end_op)) != NULL)
11016             {
11017               if (mask == &bad_reg)
11018                 return NULL;
11019
11020               /* k0 can't be used for write mask.  */
11021               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
11022                 {
11023                   as_bad (_("`%s%s' can't be used for write mask"),
11024                           register_prefix, mask->reg_name);
11025                   return NULL;
11026                 }
11027
11028               if (!i.mask.reg)
11029                 {
11030                   i.mask.reg = mask;
11031                   i.mask.operand = this_operand;
11032                 }
11033               else if (i.mask.reg->reg_num)
11034                 goto duplicated_vec_op;
11035               else
11036                 {
11037                   i.mask.reg = mask;
11038
11039                   /* Only "{z}" is allowed here.  No need to check
11040                      zeroing mask explicitly.  */
11041                   if (i.mask.operand != (unsigned int) this_operand)
11042                     {
11043                       as_bad (_("invalid write mask `%s'"), saved);
11044                       return NULL;
11045                     }
11046                 }
11047
11048               op_string = end_op;
11049             }
11050           /* Check zeroing-flag for masking operation.  */
11051           else if (*op_string == 'z')
11052             {
11053               if (!i.mask.reg)
11054                 {
11055                   i.mask.reg = reg_k0;
11056                   i.mask.zeroing = 1;
11057                   i.mask.operand = this_operand;
11058                 }
11059               else
11060                 {
11061                   if (i.mask.zeroing)
11062                     {
11063                     duplicated_vec_op:
11064                       as_bad (_("duplicated `%s'"), saved);
11065                       return NULL;
11066                     }
11067
11068                   i.mask.zeroing = 1;
11069
11070                   /* Only "{%k}" is allowed here.  No need to check mask
11071                      register explicitly.  */
11072                   if (i.mask.operand != (unsigned int) this_operand)
11073                     {
11074                       as_bad (_("invalid zeroing-masking `%s'"),
11075                               saved);
11076                       return NULL;
11077                     }
11078                 }
11079
11080               op_string++;
11081             }
11082           else if (intel_syntax
11083                    && (op_string = RC_SAE_specifier (op_string)) != NULL)
11084             i.rounding.modifier = true;
11085           else
11086             goto unknown_vec_op;
11087
11088           if (*op_string != '}')
11089             {
11090               as_bad (_("missing `}' in `%s'"), saved);
11091               return NULL;
11092             }
11093           op_string++;
11094
11095           /* Strip whitespace since the addition of pseudo prefixes
11096              changed how the scrubber treats '{'.  */
11097           if (is_space_char (*op_string))
11098             ++op_string;
11099
11100           continue;
11101         }
11102     unknown_vec_op:
11103       /* We don't know this one.  */
11104       as_bad (_("unknown vector operation: `%s'"), saved);
11105       return NULL;
11106     }
11107
11108   if (i.mask.reg && i.mask.zeroing && !i.mask.reg->reg_num)
11109     {
11110       as_bad (_("zeroing-masking only allowed with write mask"));
11111       return NULL;
11112     }
11113
11114   return op_string;
11115 }
11116
11117 static int
11118 i386_immediate (char *imm_start)
11119 {
11120   char *save_input_line_pointer;
11121   char *gotfree_input_line;
11122   segT exp_seg = 0;
11123   expressionS *exp;
11124   i386_operand_type types;
11125
11126   operand_type_set (&types, ~0);
11127
11128   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
11129     {
11130       as_bad (_("at most %d immediate operands are allowed"),
11131               MAX_IMMEDIATE_OPERANDS);
11132       return 0;
11133     }
11134
11135   exp = &im_expressions[i.imm_operands++];
11136   i.op[this_operand].imms = exp;
11137
11138   if (is_space_char (*imm_start))
11139     ++imm_start;
11140
11141   save_input_line_pointer = input_line_pointer;
11142   input_line_pointer = imm_start;
11143
11144   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
11145   if (gotfree_input_line)
11146     input_line_pointer = gotfree_input_line;
11147
11148   exp_seg = expression (exp);
11149
11150   SKIP_WHITESPACE ();
11151   if (*input_line_pointer)
11152     as_bad (_("junk `%s' after expression"), input_line_pointer);
11153
11154   input_line_pointer = save_input_line_pointer;
11155   if (gotfree_input_line)
11156     {
11157       free (gotfree_input_line);
11158
11159       if (exp->X_op == O_constant)
11160         exp->X_op = O_illegal;
11161     }
11162
11163   if (exp_seg == reg_section)
11164     {
11165       as_bad (_("illegal immediate register operand %s"), imm_start);
11166       return 0;
11167     }
11168
11169   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
11170 }
11171
11172 static int
11173 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
11174                          i386_operand_type types, const char *imm_start)
11175 {
11176   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
11177     {
11178       if (imm_start)
11179         as_bad (_("missing or invalid immediate expression `%s'"),
11180                 imm_start);
11181       return 0;
11182     }
11183   else if (exp->X_op == O_constant)
11184     {
11185       /* Size it properly later.  */
11186       i.types[this_operand].bitfield.imm64 = 1;
11187
11188       /* If not 64bit, sign/zero extend val, to account for wraparound
11189          when !BFD64.  */
11190       if (flag_code != CODE_64BIT)
11191         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
11192     }
11193 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
11194   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
11195            && exp_seg != absolute_section
11196            && exp_seg != text_section
11197            && exp_seg != data_section
11198            && exp_seg != bss_section
11199            && exp_seg != undefined_section
11200            && !bfd_is_com_section (exp_seg))
11201     {
11202       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
11203       return 0;
11204     }
11205 #endif
11206   else
11207     {
11208       /* This is an address.  The size of the address will be
11209          determined later, depending on destination register,
11210          suffix, or the default for the section.  */
11211       i.types[this_operand].bitfield.imm8 = 1;
11212       i.types[this_operand].bitfield.imm16 = 1;
11213       i.types[this_operand].bitfield.imm32 = 1;
11214       i.types[this_operand].bitfield.imm32s = 1;
11215       i.types[this_operand].bitfield.imm64 = 1;
11216       i.types[this_operand] = operand_type_and (i.types[this_operand],
11217                                                 types);
11218     }
11219
11220   return 1;
11221 }
11222
11223 static char *
11224 i386_scale (char *scale)
11225 {
11226   offsetT val;
11227   char *save = input_line_pointer;
11228
11229   input_line_pointer = scale;
11230   val = get_absolute_expression ();
11231
11232   switch (val)
11233     {
11234     case 1:
11235       i.log2_scale_factor = 0;
11236       break;
11237     case 2:
11238       i.log2_scale_factor = 1;
11239       break;
11240     case 4:
11241       i.log2_scale_factor = 2;
11242       break;
11243     case 8:
11244       i.log2_scale_factor = 3;
11245       break;
11246     default:
11247       {
11248         char sep = *input_line_pointer;
11249
11250         *input_line_pointer = '\0';
11251         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
11252                 scale);
11253         *input_line_pointer = sep;
11254         input_line_pointer = save;
11255         return NULL;
11256       }
11257     }
11258   if (i.log2_scale_factor != 0 && i.index_reg == 0)
11259     {
11260       as_warn (_("scale factor of %d without an index register"),
11261                1 << i.log2_scale_factor);
11262       i.log2_scale_factor = 0;
11263     }
11264   scale = input_line_pointer;
11265   input_line_pointer = save;
11266   return scale;
11267 }
11268
11269 static int
11270 i386_displacement (char *disp_start, char *disp_end)
11271 {
11272   expressionS *exp;
11273   segT exp_seg = 0;
11274   char *save_input_line_pointer;
11275   char *gotfree_input_line;
11276   int override;
11277   i386_operand_type bigdisp, types = anydisp;
11278   int ret;
11279
11280   if (i.disp_operands == MAX_MEMORY_OPERANDS)
11281     {
11282       as_bad (_("at most %d displacement operands are allowed"),
11283               MAX_MEMORY_OPERANDS);
11284       return 0;
11285     }
11286
11287   operand_type_set (&bigdisp, 0);
11288   if (i.jumpabsolute
11289       || i.types[this_operand].bitfield.baseindex
11290       || (current_templates->start->opcode_modifier.jump != JUMP
11291           && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
11292     {
11293       i386_addressing_mode ();
11294       override = (i.prefix[ADDR_PREFIX] != 0);
11295       if (flag_code == CODE_64BIT)
11296         {
11297           bigdisp.bitfield.disp32 = 1;
11298           if (!override)
11299             bigdisp.bitfield.disp64 = 1;
11300         }
11301       else if ((flag_code == CODE_16BIT) ^ override)
11302           bigdisp.bitfield.disp16 = 1;
11303       else
11304           bigdisp.bitfield.disp32 = 1;
11305     }
11306   else
11307     {
11308       /* For PC-relative branches, the width of the displacement may be
11309          dependent upon data size, but is never dependent upon address size.
11310          Also make sure to not unintentionally match against a non-PC-relative
11311          branch template.  */
11312       static templates aux_templates;
11313       const insn_template *t = current_templates->start;
11314       bool has_intel64 = false;
11315
11316       aux_templates.start = t;
11317       while (++t < current_templates->end)
11318         {
11319           if (t->opcode_modifier.jump
11320               != current_templates->start->opcode_modifier.jump)
11321             break;
11322           if ((t->opcode_modifier.isa64 >= INTEL64))
11323             has_intel64 = true;
11324         }
11325       if (t < current_templates->end)
11326         {
11327           aux_templates.end = t;
11328           current_templates = &aux_templates;
11329         }
11330
11331       override = (i.prefix[DATA_PREFIX] != 0);
11332       if (flag_code == CODE_64BIT)
11333         {
11334           if ((override || i.suffix == WORD_MNEM_SUFFIX)
11335               && (!intel64 || !has_intel64))
11336             bigdisp.bitfield.disp16 = 1;
11337           else
11338             bigdisp.bitfield.disp32 = 1;
11339         }
11340       else
11341         {
11342           if (!override)
11343             override = (i.suffix == (flag_code != CODE_16BIT
11344                                      ? WORD_MNEM_SUFFIX
11345                                      : LONG_MNEM_SUFFIX));
11346           bigdisp.bitfield.disp32 = 1;
11347           if ((flag_code == CODE_16BIT) ^ override)
11348             {
11349               bigdisp.bitfield.disp32 = 0;
11350               bigdisp.bitfield.disp16 = 1;
11351             }
11352         }
11353     }
11354   i.types[this_operand] = operand_type_or (i.types[this_operand],
11355                                            bigdisp);
11356
11357   exp = &disp_expressions[i.disp_operands];
11358   i.op[this_operand].disps = exp;
11359   i.disp_operands++;
11360   save_input_line_pointer = input_line_pointer;
11361   input_line_pointer = disp_start;
11362   END_STRING_AND_SAVE (disp_end);
11363
11364 #ifndef GCC_ASM_O_HACK
11365 #define GCC_ASM_O_HACK 0
11366 #endif
11367 #if GCC_ASM_O_HACK
11368   END_STRING_AND_SAVE (disp_end + 1);
11369   if (i.types[this_operand].bitfield.baseIndex
11370       && displacement_string_end[-1] == '+')
11371     {
11372       /* This hack is to avoid a warning when using the "o"
11373          constraint within gcc asm statements.
11374          For instance:
11375
11376          #define _set_tssldt_desc(n,addr,limit,type) \
11377          __asm__ __volatile__ ( \
11378          "movw %w2,%0\n\t" \
11379          "movw %w1,2+%0\n\t" \
11380          "rorl $16,%1\n\t" \
11381          "movb %b1,4+%0\n\t" \
11382          "movb %4,5+%0\n\t" \
11383          "movb $0,6+%0\n\t" \
11384          "movb %h1,7+%0\n\t" \
11385          "rorl $16,%1" \
11386          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
11387
11388          This works great except that the output assembler ends
11389          up looking a bit weird if it turns out that there is
11390          no offset.  You end up producing code that looks like:
11391
11392          #APP
11393          movw $235,(%eax)
11394          movw %dx,2+(%eax)
11395          rorl $16,%edx
11396          movb %dl,4+(%eax)
11397          movb $137,5+(%eax)
11398          movb $0,6+(%eax)
11399          movb %dh,7+(%eax)
11400          rorl $16,%edx
11401          #NO_APP
11402
11403          So here we provide the missing zero.  */
11404
11405       *displacement_string_end = '0';
11406     }
11407 #endif
11408   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
11409   if (gotfree_input_line)
11410     input_line_pointer = gotfree_input_line;
11411
11412   exp_seg = expression (exp);
11413
11414   SKIP_WHITESPACE ();
11415   if (*input_line_pointer)
11416     as_bad (_("junk `%s' after expression"), input_line_pointer);
11417 #if GCC_ASM_O_HACK
11418   RESTORE_END_STRING (disp_end + 1);
11419 #endif
11420   input_line_pointer = save_input_line_pointer;
11421   if (gotfree_input_line)
11422     {
11423       free (gotfree_input_line);
11424
11425       if (exp->X_op == O_constant || exp->X_op == O_register)
11426         exp->X_op = O_illegal;
11427     }
11428
11429   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
11430
11431   RESTORE_END_STRING (disp_end);
11432
11433   return ret;
11434 }
11435
11436 static int
11437 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
11438                             i386_operand_type types, const char *disp_start)
11439 {
11440   int ret = 1;
11441
11442   /* We do this to make sure that the section symbol is in
11443      the symbol table.  We will ultimately change the relocation
11444      to be relative to the beginning of the section.  */
11445   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
11446       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
11447       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11448     {
11449       if (exp->X_op != O_symbol)
11450         goto inv_disp;
11451
11452       if (S_IS_LOCAL (exp->X_add_symbol)
11453           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
11454           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
11455         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
11456       exp->X_op = O_subtract;
11457       exp->X_op_symbol = GOT_symbol;
11458       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
11459         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
11460       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11461         i.reloc[this_operand] = BFD_RELOC_64;
11462       else
11463         i.reloc[this_operand] = BFD_RELOC_32;
11464     }
11465
11466   else if (exp->X_op == O_absent
11467            || exp->X_op == O_illegal
11468            || exp->X_op == O_big)
11469     {
11470     inv_disp:
11471       as_bad (_("missing or invalid displacement expression `%s'"),
11472               disp_start);
11473       ret = 0;
11474     }
11475
11476   else if (exp->X_op == O_constant)
11477     {
11478       /* Sizing gets taken care of by optimize_disp().
11479
11480          If not 64bit, sign/zero extend val, to account for wraparound
11481          when !BFD64.  */
11482       if (flag_code != CODE_64BIT)
11483         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
11484     }
11485
11486 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
11487   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
11488            && exp_seg != absolute_section
11489            && exp_seg != text_section
11490            && exp_seg != data_section
11491            && exp_seg != bss_section
11492            && exp_seg != undefined_section
11493            && !bfd_is_com_section (exp_seg))
11494     {
11495       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
11496       ret = 0;
11497     }
11498 #endif
11499
11500   else if (current_templates->start->opcode_modifier.jump == JUMP_BYTE)
11501     i.types[this_operand].bitfield.disp8 = 1;
11502
11503   /* Check if this is a displacement only operand.  */
11504   if (!i.types[this_operand].bitfield.baseindex)
11505     i.types[this_operand] =
11506       operand_type_or (operand_type_and_not (i.types[this_operand], anydisp),
11507                        operand_type_and (i.types[this_operand], types));
11508
11509   return ret;
11510 }
11511
11512 /* Return the active addressing mode, taking address override and
11513    registers forming the address into consideration.  Update the
11514    address override prefix if necessary.  */
11515
11516 static enum flag_code
11517 i386_addressing_mode (void)
11518 {
11519   enum flag_code addr_mode;
11520
11521   if (i.prefix[ADDR_PREFIX])
11522     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
11523   else if (flag_code == CODE_16BIT
11524            && current_templates->start->cpu_flags.bitfield.cpumpx
11525            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
11526               from md_assemble() by "is not a valid base/index expression"
11527               when there is a base and/or index.  */
11528            && !i.types[this_operand].bitfield.baseindex)
11529     {
11530       /* MPX insn memory operands with neither base nor index must be forced
11531          to use 32-bit addressing in 16-bit mode.  */
11532       addr_mode = CODE_32BIT;
11533       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11534       ++i.prefixes;
11535       gas_assert (!i.types[this_operand].bitfield.disp16);
11536       gas_assert (!i.types[this_operand].bitfield.disp32);
11537     }
11538   else
11539     {
11540       addr_mode = flag_code;
11541
11542 #if INFER_ADDR_PREFIX
11543       if (i.mem_operands == 0)
11544         {
11545           /* Infer address prefix from the first memory operand.  */
11546           const reg_entry *addr_reg = i.base_reg;
11547
11548           if (addr_reg == NULL)
11549             addr_reg = i.index_reg;
11550
11551           if (addr_reg)
11552             {
11553               if (addr_reg->reg_type.bitfield.dword)
11554                 addr_mode = CODE_32BIT;
11555               else if (flag_code != CODE_64BIT
11556                        && addr_reg->reg_type.bitfield.word)
11557                 addr_mode = CODE_16BIT;
11558
11559               if (addr_mode != flag_code)
11560                 {
11561                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11562                   i.prefixes += 1;
11563                   /* Change the size of any displacement too.  At most one
11564                      of Disp16 or Disp32 is set.
11565                      FIXME.  There doesn't seem to be any real need for
11566                      separate Disp16 and Disp32 flags.  The same goes for
11567                      Imm16 and Imm32.  Removing them would probably clean
11568                      up the code quite a lot.  */
11569                   if (flag_code != CODE_64BIT
11570                       && (i.types[this_operand].bitfield.disp16
11571                           || i.types[this_operand].bitfield.disp32))
11572                     {
11573                       static const i386_operand_type disp16_32 = {
11574                         .bitfield = { .disp16 = 1, .disp32 = 1 }
11575                       };
11576
11577                       i.types[this_operand]
11578                         = operand_type_xor (i.types[this_operand], disp16_32);
11579                     }
11580                 }
11581             }
11582         }
11583 #endif
11584     }
11585
11586   return addr_mode;
11587 }
11588
11589 /* Make sure the memory operand we've been dealt is valid.
11590    Return 1 on success, 0 on a failure.  */
11591
11592 static int
11593 i386_index_check (const char *operand_string)
11594 {
11595   const char *kind = "base/index";
11596   enum flag_code addr_mode = i386_addressing_mode ();
11597   const insn_template *t = current_templates->end - 1;
11598
11599   if (t->opcode_modifier.isstring)
11600     {
11601       /* Memory operands of string insns are special in that they only allow
11602          a single register (rDI, rSI, or rBX) as their memory address.  */
11603       const reg_entry *expected_reg;
11604       static const char *di_si[][2] =
11605         {
11606           { "esi", "edi" },
11607           { "si", "di" },
11608           { "rsi", "rdi" }
11609         };
11610       static const char *bx[] = { "ebx", "bx", "rbx" };
11611
11612       kind = "string address";
11613
11614       if (t->opcode_modifier.prefixok == PrefixRep)
11615         {
11616           int es_op = t->opcode_modifier.isstring - IS_STRING_ES_OP0;
11617           int op = 0;
11618
11619           if (!t->operand_types[0].bitfield.baseindex
11620               || ((!i.mem_operands != !intel_syntax)
11621                   && t->operand_types[1].bitfield.baseindex))
11622             op = 1;
11623           expected_reg
11624             = (const reg_entry *) str_hash_find (reg_hash,
11625                                                  di_si[addr_mode][op == es_op]);
11626         }
11627       else
11628         expected_reg
11629           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
11630
11631       if (i.base_reg != expected_reg
11632           || i.index_reg
11633           || operand_type_check (i.types[this_operand], disp))
11634         {
11635           /* The second memory operand must have the same size as
11636              the first one.  */
11637           if (i.mem_operands
11638               && i.base_reg
11639               && !((addr_mode == CODE_64BIT
11640                     && i.base_reg->reg_type.bitfield.qword)
11641                    || (addr_mode == CODE_32BIT
11642                        ? i.base_reg->reg_type.bitfield.dword
11643                        : i.base_reg->reg_type.bitfield.word)))
11644             goto bad_address;
11645
11646           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
11647                    operand_string,
11648                    intel_syntax ? '[' : '(',
11649                    register_prefix,
11650                    expected_reg->reg_name,
11651                    intel_syntax ? ']' : ')');
11652           return 1;
11653         }
11654       else
11655         return 1;
11656
11657     bad_address:
11658       as_bad (_("`%s' is not a valid %s expression"),
11659               operand_string, kind);
11660       return 0;
11661     }
11662   else
11663     {
11664       t = current_templates->start;
11665
11666       if (addr_mode != CODE_16BIT)
11667         {
11668           /* 32-bit/64-bit checks.  */
11669           if (i.disp_encoding == disp_encoding_16bit)
11670             {
11671             bad_disp:
11672               as_bad (_("invalid `%s' prefix"),
11673                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
11674               return 0;
11675             }
11676
11677           if ((i.base_reg
11678                && ((addr_mode == CODE_64BIT
11679                     ? !i.base_reg->reg_type.bitfield.qword
11680                     : !i.base_reg->reg_type.bitfield.dword)
11681                    || (i.index_reg && i.base_reg->reg_num == RegIP)
11682                    || i.base_reg->reg_num == RegIZ))
11683               || (i.index_reg
11684                   && !i.index_reg->reg_type.bitfield.xmmword
11685                   && !i.index_reg->reg_type.bitfield.ymmword
11686                   && !i.index_reg->reg_type.bitfield.zmmword
11687                   && ((addr_mode == CODE_64BIT
11688                        ? !i.index_reg->reg_type.bitfield.qword
11689                        : !i.index_reg->reg_type.bitfield.dword)
11690                       || !i.index_reg->reg_type.bitfield.baseindex)))
11691             goto bad_address;
11692
11693           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
11694           if (t->mnem_off == MN_bndmk
11695               || t->mnem_off == MN_bndldx
11696               || t->mnem_off == MN_bndstx
11697               || t->opcode_modifier.sib == SIBMEM)
11698             {
11699               /* They cannot use RIP-relative addressing. */
11700               if (i.base_reg && i.base_reg->reg_num == RegIP)
11701                 {
11702                   as_bad (_("`%s' cannot be used here"), operand_string);
11703                   return 0;
11704                 }
11705
11706               /* bndldx and bndstx ignore their scale factor. */
11707               if ((t->mnem_off == MN_bndldx || t->mnem_off == MN_bndstx)
11708                   && i.log2_scale_factor)
11709                 as_warn (_("register scaling is being ignored here"));
11710             }
11711         }
11712       else
11713         {
11714           /* 16-bit checks.  */
11715           if (i.disp_encoding == disp_encoding_32bit)
11716             goto bad_disp;
11717
11718           if ((i.base_reg
11719                && (!i.base_reg->reg_type.bitfield.word
11720                    || !i.base_reg->reg_type.bitfield.baseindex))
11721               || (i.index_reg
11722                   && (!i.index_reg->reg_type.bitfield.word
11723                       || !i.index_reg->reg_type.bitfield.baseindex
11724                       || !(i.base_reg
11725                            && i.base_reg->reg_num < 6
11726                            && i.index_reg->reg_num >= 6
11727                            && i.log2_scale_factor == 0))))
11728             goto bad_address;
11729         }
11730     }
11731   return 1;
11732 }
11733
11734 /* Handle vector immediates.  */
11735
11736 static int
11737 RC_SAE_immediate (const char *imm_start)
11738 {
11739   const char *pstr = imm_start;
11740
11741   if (*pstr != '{')
11742     return 0;
11743
11744   pstr = RC_SAE_specifier (pstr + 1);
11745   if (pstr == NULL)
11746     return 0;
11747
11748   if (*pstr++ != '}')
11749     {
11750       as_bad (_("Missing '}': '%s'"), imm_start);
11751       return 0;
11752     }
11753   /* RC/SAE immediate string should contain nothing more.  */;
11754   if (*pstr != 0)
11755     {
11756       as_bad (_("Junk after '}': '%s'"), imm_start);
11757       return 0;
11758     }
11759
11760   /* Internally this doesn't count as an operand.  */
11761   --i.operands;
11762
11763   return 1;
11764 }
11765
11766 static INLINE bool starts_memory_operand (char c)
11767 {
11768   return ISDIGIT (c)
11769          || is_name_beginner (c)
11770          || strchr ("([\"+-!~", c);
11771 }
11772
11773 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
11774    on error.  */
11775
11776 static int
11777 i386_att_operand (char *operand_string)
11778 {
11779   const reg_entry *r;
11780   char *end_op;
11781   char *op_string = operand_string;
11782
11783   if (is_space_char (*op_string))
11784     ++op_string;
11785
11786   /* We check for an absolute prefix (differentiating,
11787      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
11788   if (*op_string == ABSOLUTE_PREFIX
11789       && current_templates->start->opcode_modifier.jump)
11790     {
11791       ++op_string;
11792       if (is_space_char (*op_string))
11793         ++op_string;
11794       i.jumpabsolute = true;
11795     }
11796
11797   /* Check if operand is a register.  */
11798   if ((r = parse_register (op_string, &end_op)) != NULL)
11799     {
11800       i386_operand_type temp;
11801
11802       if (r == &bad_reg)
11803         return 0;
11804
11805       /* Check for a segment override by searching for ':' after a
11806          segment register.  */
11807       op_string = end_op;
11808       if (is_space_char (*op_string))
11809         ++op_string;
11810       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
11811         {
11812           i.seg[i.mem_operands] = r;
11813
11814           /* Skip the ':' and whitespace.  */
11815           ++op_string;
11816           if (is_space_char (*op_string))
11817             ++op_string;
11818
11819           /* Handle case of %es:*foo.  */
11820           if (!i.jumpabsolute && *op_string == ABSOLUTE_PREFIX
11821               && current_templates->start->opcode_modifier.jump)
11822             {
11823               ++op_string;
11824               if (is_space_char (*op_string))
11825                 ++op_string;
11826               i.jumpabsolute = true;
11827             }
11828
11829           if (!starts_memory_operand (*op_string))
11830             {
11831               as_bad (_("bad memory operand `%s'"), op_string);
11832               return 0;
11833             }
11834           goto do_memory_reference;
11835         }
11836
11837       /* Handle vector operations.  */
11838       if (*op_string == '{')
11839         {
11840           op_string = check_VecOperations (op_string);
11841           if (op_string == NULL)
11842             return 0;
11843         }
11844
11845       if (*op_string)
11846         {
11847           as_bad (_("junk `%s' after register"), op_string);
11848           return 0;
11849         }
11850       temp = r->reg_type;
11851       temp.bitfield.baseindex = 0;
11852       i.types[this_operand] = operand_type_or (i.types[this_operand],
11853                                                temp);
11854       i.types[this_operand].bitfield.unspecified = 0;
11855       i.op[this_operand].regs = r;
11856       i.reg_operands++;
11857
11858       /* A GPR may follow an RC or SAE immediate only if a (vector) register
11859          operand was also present earlier on.  */
11860       if (i.rounding.type != rc_none && temp.bitfield.class == Reg
11861           && i.reg_operands == 1)
11862         {
11863           unsigned int j;
11864
11865           for (j = 0; j < ARRAY_SIZE (RC_NamesTable); ++j)
11866             if (i.rounding.type == RC_NamesTable[j].type)
11867               break;
11868           as_bad (_("`%s': misplaced `{%s}'"),
11869                   insn_name (current_templates->start), RC_NamesTable[j].name);
11870           return 0;
11871         }
11872     }
11873   else if (*op_string == REGISTER_PREFIX)
11874     {
11875       as_bad (_("bad register name `%s'"), op_string);
11876       return 0;
11877     }
11878   else if (*op_string == IMMEDIATE_PREFIX)
11879     {
11880       ++op_string;
11881       if (i.jumpabsolute)
11882         {
11883           as_bad (_("immediate operand illegal with absolute jump"));
11884           return 0;
11885         }
11886       if (!i386_immediate (op_string))
11887         return 0;
11888       if (i.rounding.type != rc_none)
11889         {
11890           as_bad (_("`%s': RC/SAE operand must follow immediate operands"),
11891                   insn_name (current_templates->start));
11892           return 0;
11893         }
11894     }
11895   else if (RC_SAE_immediate (operand_string))
11896     {
11897       /* If it is a RC or SAE immediate, do the necessary placement check:
11898          Only another immediate or a GPR may precede it.  */
11899       if (i.mem_operands || i.reg_operands + i.imm_operands > 1
11900           || (i.reg_operands == 1
11901               && i.op[0].regs->reg_type.bitfield.class != Reg))
11902         {
11903           as_bad (_("`%s': misplaced `%s'"),
11904                   insn_name (current_templates->start), operand_string);
11905           return 0;
11906         }
11907     }
11908   else if (starts_memory_operand (*op_string))
11909     {
11910       /* This is a memory reference of some sort.  */
11911       char *base_string;
11912
11913       /* Start and end of displacement string expression (if found).  */
11914       char *displacement_string_start;
11915       char *displacement_string_end;
11916
11917     do_memory_reference:
11918       /* Check for base index form.  We detect the base index form by
11919          looking for an ')' at the end of the operand, searching
11920          for the '(' matching it, and finding a REGISTER_PREFIX or ','
11921          after the '('.  */
11922       base_string = op_string + strlen (op_string);
11923
11924       /* Handle vector operations.  */
11925       --base_string;
11926       if (is_space_char (*base_string))
11927         --base_string;
11928
11929       if (*base_string == '}')
11930         {
11931           char *vop_start = NULL;
11932
11933           while (base_string-- > op_string)
11934             {
11935               if (*base_string == '"')
11936                 break;
11937               if (*base_string != '{')
11938                 continue;
11939
11940               vop_start = base_string;
11941
11942               --base_string;
11943               if (is_space_char (*base_string))
11944                 --base_string;
11945
11946               if (*base_string != '}')
11947                 break;
11948
11949               vop_start = NULL;
11950             }
11951
11952           if (!vop_start)
11953             {
11954               as_bad (_("unbalanced figure braces"));
11955               return 0;
11956             }
11957
11958           if (check_VecOperations (vop_start) == NULL)
11959             return 0;
11960         }
11961
11962       /* If we only have a displacement, set-up for it to be parsed later.  */
11963       displacement_string_start = op_string;
11964       displacement_string_end = base_string + 1;
11965
11966       if (*base_string == ')')
11967         {
11968           char *temp_string;
11969           unsigned int parens_not_balanced = 0;
11970           bool in_quotes = false;
11971
11972           /* We've already checked that the number of left & right ()'s are
11973              equal, and that there's a matching set of double quotes.  */
11974           end_op = base_string;
11975           for (temp_string = op_string; temp_string < end_op; temp_string++)
11976             {
11977               if (*temp_string == '\\' && temp_string[1] == '"')
11978                 ++temp_string;
11979               else if (*temp_string == '"')
11980                 in_quotes = !in_quotes;
11981               else if (!in_quotes)
11982                 {
11983                   if (*temp_string == '(' && !parens_not_balanced++)
11984                     base_string = temp_string;
11985                   if (*temp_string == ')')
11986                     --parens_not_balanced;
11987                 }
11988             }
11989
11990           temp_string = base_string;
11991
11992           /* Skip past '(' and whitespace.  */
11993           if (*base_string != '(')
11994             {
11995               as_bad (_("unbalanced braces"));
11996               return 0;
11997             }
11998           ++base_string;
11999           if (is_space_char (*base_string))
12000             ++base_string;
12001
12002           if (*base_string == ','
12003               || ((i.base_reg = parse_register (base_string, &end_op))
12004                   != NULL))
12005             {
12006               displacement_string_end = temp_string;
12007
12008               i.types[this_operand].bitfield.baseindex = 1;
12009
12010               if (i.base_reg)
12011                 {
12012                   if (i.base_reg == &bad_reg)
12013                     return 0;
12014                   base_string = end_op;
12015                   if (is_space_char (*base_string))
12016                     ++base_string;
12017                 }
12018
12019               /* There may be an index reg or scale factor here.  */
12020               if (*base_string == ',')
12021                 {
12022                   ++base_string;
12023                   if (is_space_char (*base_string))
12024                     ++base_string;
12025
12026                   if ((i.index_reg = parse_register (base_string, &end_op))
12027                       != NULL)
12028                     {
12029                       if (i.index_reg == &bad_reg)
12030                         return 0;
12031                       base_string = end_op;
12032                       if (is_space_char (*base_string))
12033                         ++base_string;
12034                       if (*base_string == ',')
12035                         {
12036                           ++base_string;
12037                           if (is_space_char (*base_string))
12038                             ++base_string;
12039                         }
12040                       else if (*base_string != ')')
12041                         {
12042                           as_bad (_("expecting `,' or `)' "
12043                                     "after index register in `%s'"),
12044                                   operand_string);
12045                           return 0;
12046                         }
12047                     }
12048                   else if (*base_string == REGISTER_PREFIX)
12049                     {
12050                       end_op = strchr (base_string, ',');
12051                       if (end_op)
12052                         *end_op = '\0';
12053                       as_bad (_("bad register name `%s'"), base_string);
12054                       return 0;
12055                     }
12056
12057                   /* Check for scale factor.  */
12058                   if (*base_string != ')')
12059                     {
12060                       char *end_scale = i386_scale (base_string);
12061
12062                       if (!end_scale)
12063                         return 0;
12064
12065                       base_string = end_scale;
12066                       if (is_space_char (*base_string))
12067                         ++base_string;
12068                       if (*base_string != ')')
12069                         {
12070                           as_bad (_("expecting `)' "
12071                                     "after scale factor in `%s'"),
12072                                   operand_string);
12073                           return 0;
12074                         }
12075                     }
12076                   else if (!i.index_reg)
12077                     {
12078                       as_bad (_("expecting index register or scale factor "
12079                                 "after `,'; got '%c'"),
12080                               *base_string);
12081                       return 0;
12082                     }
12083                 }
12084               else if (*base_string != ')')
12085                 {
12086                   as_bad (_("expecting `,' or `)' "
12087                             "after base register in `%s'"),
12088                           operand_string);
12089                   return 0;
12090                 }
12091             }
12092           else if (*base_string == REGISTER_PREFIX)
12093             {
12094               end_op = strchr (base_string, ',');
12095               if (end_op)
12096                 *end_op = '\0';
12097               as_bad (_("bad register name `%s'"), base_string);
12098               return 0;
12099             }
12100         }
12101
12102       /* If there's an expression beginning the operand, parse it,
12103          assuming displacement_string_start and
12104          displacement_string_end are meaningful.  */
12105       if (displacement_string_start != displacement_string_end)
12106         {
12107           if (!i386_displacement (displacement_string_start,
12108                                   displacement_string_end))
12109             return 0;
12110         }
12111
12112       /* Special case for (%dx) while doing input/output op.  */
12113       if (i.base_reg
12114           && i.base_reg->reg_type.bitfield.instance == RegD
12115           && i.base_reg->reg_type.bitfield.word
12116           && i.index_reg == 0
12117           && i.log2_scale_factor == 0
12118           && i.seg[i.mem_operands] == 0
12119           && !operand_type_check (i.types[this_operand], disp))
12120         {
12121           i.types[this_operand] = i.base_reg->reg_type;
12122           i.input_output_operand = true;
12123           return 1;
12124         }
12125
12126       if (i386_index_check (operand_string) == 0)
12127         return 0;
12128       i.flags[this_operand] |= Operand_Mem;
12129       i.mem_operands++;
12130     }
12131   else
12132     {
12133       /* It's not a memory operand; argh!  */
12134       as_bad (_("invalid char %s beginning operand %d `%s'"),
12135               output_invalid (*op_string),
12136               this_operand + 1,
12137               op_string);
12138       return 0;
12139     }
12140   return 1;                     /* Normal return.  */
12141 }
12142 \f
12143 /* Calculate the maximum variable size (i.e., excluding fr_fix)
12144    that an rs_machine_dependent frag may reach.  */
12145
12146 unsigned int
12147 i386_frag_max_var (fragS *frag)
12148 {
12149   /* The only relaxable frags are for jumps.
12150      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
12151   gas_assert (frag->fr_type == rs_machine_dependent);
12152   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
12153 }
12154
12155 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12156 static int
12157 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
12158 {
12159   /* STT_GNU_IFUNC symbol must go through PLT.  */
12160   if ((symbol_get_bfdsym (fr_symbol)->flags
12161        & BSF_GNU_INDIRECT_FUNCTION) != 0)
12162     return 0;
12163
12164   if (!S_IS_EXTERNAL (fr_symbol))
12165     /* Symbol may be weak or local.  */
12166     return !S_IS_WEAK (fr_symbol);
12167
12168   /* Global symbols with non-default visibility can't be preempted. */
12169   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
12170     return 1;
12171
12172   if (fr_var != NO_RELOC)
12173     switch ((enum bfd_reloc_code_real) fr_var)
12174       {
12175       case BFD_RELOC_386_PLT32:
12176       case BFD_RELOC_X86_64_PLT32:
12177         /* Symbol with PLT relocation may be preempted. */
12178         return 0;
12179       default:
12180         abort ();
12181       }
12182
12183   /* Global symbols with default visibility in a shared library may be
12184      preempted by another definition.  */
12185   return !shared;
12186 }
12187 #endif
12188
12189 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
12190    Note also work for Skylake and Cascadelake.
12191 ---------------------------------------------------------------------
12192 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
12193 | ------  | ----------- | ------- | -------- |
12194 |   Jo    |      N      |    N    |     Y    |
12195 |   Jno   |      N      |    N    |     Y    |
12196 |  Jc/Jb  |      Y      |    N    |     Y    |
12197 | Jae/Jnb |      Y      |    N    |     Y    |
12198 |  Je/Jz  |      Y      |    Y    |     Y    |
12199 | Jne/Jnz |      Y      |    Y    |     Y    |
12200 | Jna/Jbe |      Y      |    N    |     Y    |
12201 | Ja/Jnbe |      Y      |    N    |     Y    |
12202 |   Js    |      N      |    N    |     Y    |
12203 |   Jns   |      N      |    N    |     Y    |
12204 |  Jp/Jpe |      N      |    N    |     Y    |
12205 | Jnp/Jpo |      N      |    N    |     Y    |
12206 | Jl/Jnge |      Y      |    Y    |     Y    |
12207 | Jge/Jnl |      Y      |    Y    |     Y    |
12208 | Jle/Jng |      Y      |    Y    |     Y    |
12209 | Jg/Jnle |      Y      |    Y    |     Y    |
12210 ---------------------------------------------------------------------  */
12211 static int
12212 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
12213 {
12214   if (mf_cmp == mf_cmp_alu_cmp)
12215     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
12216             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
12217   if (mf_cmp == mf_cmp_incdec)
12218     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
12219             || mf_jcc == mf_jcc_jle);
12220   if (mf_cmp == mf_cmp_test_and)
12221     return 1;
12222   return 0;
12223 }
12224
12225 /* Return the next non-empty frag.  */
12226
12227 static fragS *
12228 i386_next_non_empty_frag (fragS *fragP)
12229 {
12230   /* There may be a frag with a ".fill 0" when there is no room in
12231      the current frag for frag_grow in output_insn.  */
12232   for (fragP = fragP->fr_next;
12233        (fragP != NULL
12234         && fragP->fr_type == rs_fill
12235         && fragP->fr_fix == 0);
12236        fragP = fragP->fr_next)
12237     ;
12238   return fragP;
12239 }
12240
12241 /* Return the next jcc frag after BRANCH_PADDING.  */
12242
12243 static fragS *
12244 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
12245 {
12246   fragS *branch_fragP;
12247   if (!pad_fragP)
12248     return NULL;
12249
12250   if (pad_fragP->fr_type == rs_machine_dependent
12251       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
12252           == BRANCH_PADDING))
12253     {
12254       branch_fragP = i386_next_non_empty_frag (pad_fragP);
12255       if (branch_fragP->fr_type != rs_machine_dependent)
12256         return NULL;
12257       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
12258           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
12259                                    pad_fragP->tc_frag_data.mf_type))
12260         return branch_fragP;
12261     }
12262
12263   return NULL;
12264 }
12265
12266 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
12267
12268 static void
12269 i386_classify_machine_dependent_frag (fragS *fragP)
12270 {
12271   fragS *cmp_fragP;
12272   fragS *pad_fragP;
12273   fragS *branch_fragP;
12274   fragS *next_fragP;
12275   unsigned int max_prefix_length;
12276
12277   if (fragP->tc_frag_data.classified)
12278     return;
12279
12280   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
12281      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
12282   for (next_fragP = fragP;
12283        next_fragP != NULL;
12284        next_fragP = next_fragP->fr_next)
12285     {
12286       next_fragP->tc_frag_data.classified = 1;
12287       if (next_fragP->fr_type == rs_machine_dependent)
12288         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
12289           {
12290           case BRANCH_PADDING:
12291             /* The BRANCH_PADDING frag must be followed by a branch
12292                frag.  */
12293             branch_fragP = i386_next_non_empty_frag (next_fragP);
12294             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
12295             break;
12296           case FUSED_JCC_PADDING:
12297             /* Check if this is a fused jcc:
12298                FUSED_JCC_PADDING
12299                CMP like instruction
12300                BRANCH_PADDING
12301                COND_JUMP
12302                */
12303             cmp_fragP = i386_next_non_empty_frag (next_fragP);
12304             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
12305             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
12306             if (branch_fragP)
12307               {
12308                 /* The BRANCH_PADDING frag is merged with the
12309                    FUSED_JCC_PADDING frag.  */
12310                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
12311                 /* CMP like instruction size.  */
12312                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
12313                 frag_wane (pad_fragP);
12314                 /* Skip to branch_fragP.  */
12315                 next_fragP = branch_fragP;
12316               }
12317             else if (next_fragP->tc_frag_data.max_prefix_length)
12318               {
12319                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
12320                    a fused jcc.  */
12321                 next_fragP->fr_subtype
12322                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
12323                 next_fragP->tc_frag_data.max_bytes
12324                   = next_fragP->tc_frag_data.max_prefix_length;
12325                 /* This will be updated in the BRANCH_PREFIX scan.  */
12326                 next_fragP->tc_frag_data.max_prefix_length = 0;
12327               }
12328             else
12329               frag_wane (next_fragP);
12330             break;
12331           }
12332     }
12333
12334   /* Stop if there is no BRANCH_PREFIX.  */
12335   if (!align_branch_prefix_size)
12336     return;
12337
12338   /* Scan for BRANCH_PREFIX.  */
12339   for (; fragP != NULL; fragP = fragP->fr_next)
12340     {
12341       if (fragP->fr_type != rs_machine_dependent
12342           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12343               != BRANCH_PREFIX))
12344         continue;
12345
12346       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
12347          COND_JUMP_PREFIX.  */
12348       max_prefix_length = 0;
12349       for (next_fragP = fragP;
12350            next_fragP != NULL;
12351            next_fragP = next_fragP->fr_next)
12352         {
12353           if (next_fragP->fr_type == rs_fill)
12354             /* Skip rs_fill frags.  */
12355             continue;
12356           else if (next_fragP->fr_type != rs_machine_dependent)
12357             /* Stop for all other frags.  */
12358             break;
12359
12360           /* rs_machine_dependent frags.  */
12361           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12362               == BRANCH_PREFIX)
12363             {
12364               /* Count BRANCH_PREFIX frags.  */
12365               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
12366                 {
12367                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
12368                   frag_wane (next_fragP);
12369                 }
12370               else
12371                 max_prefix_length
12372                   += next_fragP->tc_frag_data.max_bytes;
12373             }
12374           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12375                     == BRANCH_PADDING)
12376                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12377                        == FUSED_JCC_PADDING))
12378             {
12379               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
12380               fragP->tc_frag_data.u.padding_fragP = next_fragP;
12381               break;
12382             }
12383           else
12384             /* Stop for other rs_machine_dependent frags.  */
12385             break;
12386         }
12387
12388       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
12389
12390       /* Skip to the next frag.  */
12391       fragP = next_fragP;
12392     }
12393 }
12394
12395 /* Compute padding size for
12396
12397         FUSED_JCC_PADDING
12398         CMP like instruction
12399         BRANCH_PADDING
12400         COND_JUMP/UNCOND_JUMP
12401
12402    or
12403
12404         BRANCH_PADDING
12405         COND_JUMP/UNCOND_JUMP
12406  */
12407
12408 static int
12409 i386_branch_padding_size (fragS *fragP, offsetT address)
12410 {
12411   unsigned int offset, size, padding_size;
12412   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
12413
12414   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
12415   if (!address)
12416     address = fragP->fr_address;
12417   address += fragP->fr_fix;
12418
12419   /* CMP like instrunction size.  */
12420   size = fragP->tc_frag_data.cmp_size;
12421
12422   /* The base size of the branch frag.  */
12423   size += branch_fragP->fr_fix;
12424
12425   /* Add opcode and displacement bytes for the rs_machine_dependent
12426      branch frag.  */
12427   if (branch_fragP->fr_type == rs_machine_dependent)
12428     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
12429
12430   /* Check if branch is within boundary and doesn't end at the last
12431      byte.  */
12432   offset = address & ((1U << align_branch_power) - 1);
12433   if ((offset + size) >= (1U << align_branch_power))
12434     /* Padding needed to avoid crossing boundary.  */
12435     padding_size = (1U << align_branch_power) - offset;
12436   else
12437     /* No padding needed.  */
12438     padding_size = 0;
12439
12440   /* The return value may be saved in tc_frag_data.length which is
12441      unsigned byte.  */
12442   if (!fits_in_unsigned_byte (padding_size))
12443     abort ();
12444
12445   return padding_size;
12446 }
12447
12448 /* i386_generic_table_relax_frag()
12449
12450    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
12451    grow/shrink padding to align branch frags.  Hand others to
12452    relax_frag().  */
12453
12454 long
12455 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
12456 {
12457   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12458       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12459     {
12460       long padding_size = i386_branch_padding_size (fragP, 0);
12461       long grow = padding_size - fragP->tc_frag_data.length;
12462
12463       /* When the BRANCH_PREFIX frag is used, the computed address
12464          must match the actual address and there should be no padding.  */
12465       if (fragP->tc_frag_data.padding_address
12466           && (fragP->tc_frag_data.padding_address != fragP->fr_address
12467               || padding_size))
12468         abort ();
12469
12470       /* Update the padding size.  */
12471       if (grow)
12472         fragP->tc_frag_data.length = padding_size;
12473
12474       return grow;
12475     }
12476   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12477     {
12478       fragS *padding_fragP, *next_fragP;
12479       long padding_size, left_size, last_size;
12480
12481       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12482       if (!padding_fragP)
12483         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
12484         return (fragP->tc_frag_data.length
12485                 - fragP->tc_frag_data.last_length);
12486
12487       /* Compute the relative address of the padding frag in the very
12488         first time where the BRANCH_PREFIX frag sizes are zero.  */
12489       if (!fragP->tc_frag_data.padding_address)
12490         fragP->tc_frag_data.padding_address
12491           = padding_fragP->fr_address - (fragP->fr_address - stretch);
12492
12493       /* First update the last length from the previous interation.  */
12494       left_size = fragP->tc_frag_data.prefix_length;
12495       for (next_fragP = fragP;
12496            next_fragP != padding_fragP;
12497            next_fragP = next_fragP->fr_next)
12498         if (next_fragP->fr_type == rs_machine_dependent
12499             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12500                 == BRANCH_PREFIX))
12501           {
12502             if (left_size)
12503               {
12504                 int max = next_fragP->tc_frag_data.max_bytes;
12505                 if (max)
12506                   {
12507                     int size;
12508                     if (max > left_size)
12509                       size = left_size;
12510                     else
12511                       size = max;
12512                     left_size -= size;
12513                     next_fragP->tc_frag_data.last_length = size;
12514                   }
12515               }
12516             else
12517               next_fragP->tc_frag_data.last_length = 0;
12518           }
12519
12520       /* Check the padding size for the padding frag.  */
12521       padding_size = i386_branch_padding_size
12522         (padding_fragP, (fragP->fr_address
12523                          + fragP->tc_frag_data.padding_address));
12524
12525       last_size = fragP->tc_frag_data.prefix_length;
12526       /* Check if there is change from the last interation.  */
12527       if (padding_size == last_size)
12528         {
12529           /* Update the expected address of the padding frag.  */
12530           padding_fragP->tc_frag_data.padding_address
12531             = (fragP->fr_address + padding_size
12532                + fragP->tc_frag_data.padding_address);
12533           return 0;
12534         }
12535
12536       if (padding_size > fragP->tc_frag_data.max_prefix_length)
12537         {
12538           /* No padding if there is no sufficient room.  Clear the
12539              expected address of the padding frag.  */
12540           padding_fragP->tc_frag_data.padding_address = 0;
12541           padding_size = 0;
12542         }
12543       else
12544         /* Store the expected address of the padding frag.  */
12545         padding_fragP->tc_frag_data.padding_address
12546           = (fragP->fr_address + padding_size
12547              + fragP->tc_frag_data.padding_address);
12548
12549       fragP->tc_frag_data.prefix_length = padding_size;
12550
12551       /* Update the length for the current interation.  */
12552       left_size = padding_size;
12553       for (next_fragP = fragP;
12554            next_fragP != padding_fragP;
12555            next_fragP = next_fragP->fr_next)
12556         if (next_fragP->fr_type == rs_machine_dependent
12557             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12558                 == BRANCH_PREFIX))
12559           {
12560             if (left_size)
12561               {
12562                 int max = next_fragP->tc_frag_data.max_bytes;
12563                 if (max)
12564                   {
12565                     int size;
12566                     if (max > left_size)
12567                       size = left_size;
12568                     else
12569                       size = max;
12570                     left_size -= size;
12571                     next_fragP->tc_frag_data.length = size;
12572                   }
12573               }
12574             else
12575               next_fragP->tc_frag_data.length = 0;
12576           }
12577
12578       return (fragP->tc_frag_data.length
12579               - fragP->tc_frag_data.last_length);
12580     }
12581   return relax_frag (segment, fragP, stretch);
12582 }
12583
12584 /* md_estimate_size_before_relax()
12585
12586    Called just before relax() for rs_machine_dependent frags.  The x86
12587    assembler uses these frags to handle variable size jump
12588    instructions.
12589
12590    Any symbol that is now undefined will not become defined.
12591    Return the correct fr_subtype in the frag.
12592    Return the initial "guess for variable size of frag" to caller.
12593    The guess is actually the growth beyond the fixed part.  Whatever
12594    we do to grow the fixed or variable part contributes to our
12595    returned value.  */
12596
12597 int
12598 md_estimate_size_before_relax (fragS *fragP, segT segment)
12599 {
12600   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12601       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
12602       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12603     {
12604       i386_classify_machine_dependent_frag (fragP);
12605       return fragP->tc_frag_data.length;
12606     }
12607
12608   /* We've already got fragP->fr_subtype right;  all we have to do is
12609      check for un-relaxable symbols.  On an ELF system, we can't relax
12610      an externally visible symbol, because it may be overridden by a
12611      shared library.  */
12612   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
12613 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12614       || (IS_ELF
12615           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
12616                                                 fragP->fr_var))
12617 #endif
12618 #if defined (OBJ_COFF) && defined (TE_PE)
12619       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
12620           && S_IS_WEAK (fragP->fr_symbol))
12621 #endif
12622       )
12623     {
12624       /* Symbol is undefined in this segment, or we need to keep a
12625          reloc so that weak symbols can be overridden.  */
12626       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
12627       enum bfd_reloc_code_real reloc_type;
12628       unsigned char *opcode;
12629       int old_fr_fix;
12630       fixS *fixP = NULL;
12631
12632       if (fragP->fr_var != NO_RELOC)
12633         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
12634       else if (size == 2)
12635         reloc_type = BFD_RELOC_16_PCREL;
12636 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12637       else if (fragP->tc_frag_data.code64 && fragP->fr_offset == 0
12638                && need_plt32_p (fragP->fr_symbol))
12639         reloc_type = BFD_RELOC_X86_64_PLT32;
12640 #endif
12641       else
12642         reloc_type = BFD_RELOC_32_PCREL;
12643
12644       old_fr_fix = fragP->fr_fix;
12645       opcode = (unsigned char *) fragP->fr_opcode;
12646
12647       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
12648         {
12649         case UNCOND_JUMP:
12650           /* Make jmp (0xeb) a (d)word displacement jump.  */
12651           opcode[0] = 0xe9;
12652           fragP->fr_fix += size;
12653           fixP = fix_new (fragP, old_fr_fix, size,
12654                           fragP->fr_symbol,
12655                           fragP->fr_offset, 1,
12656                           reloc_type);
12657           break;
12658
12659         case COND_JUMP86:
12660           if (size == 2
12661               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
12662             {
12663               /* Negate the condition, and branch past an
12664                  unconditional jump.  */
12665               opcode[0] ^= 1;
12666               opcode[1] = 3;
12667               /* Insert an unconditional jump.  */
12668               opcode[2] = 0xe9;
12669               /* We added two extra opcode bytes, and have a two byte
12670                  offset.  */
12671               fragP->fr_fix += 2 + 2;
12672               fix_new (fragP, old_fr_fix + 2, 2,
12673                        fragP->fr_symbol,
12674                        fragP->fr_offset, 1,
12675                        reloc_type);
12676               break;
12677             }
12678           /* Fall through.  */
12679
12680         case COND_JUMP:
12681           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
12682             {
12683               fragP->fr_fix += 1;
12684               fixP = fix_new (fragP, old_fr_fix, 1,
12685                               fragP->fr_symbol,
12686                               fragP->fr_offset, 1,
12687                               BFD_RELOC_8_PCREL);
12688               fixP->fx_signed = 1;
12689               break;
12690             }
12691
12692           /* This changes the byte-displacement jump 0x7N
12693              to the (d)word-displacement jump 0x0f,0x8N.  */
12694           opcode[1] = opcode[0] + 0x10;
12695           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12696           /* We've added an opcode byte.  */
12697           fragP->fr_fix += 1 + size;
12698           fixP = fix_new (fragP, old_fr_fix + 1, size,
12699                           fragP->fr_symbol,
12700                           fragP->fr_offset, 1,
12701                           reloc_type);
12702           break;
12703
12704         default:
12705           BAD_CASE (fragP->fr_subtype);
12706           break;
12707         }
12708
12709       /* All jumps handled here are signed, but don't unconditionally use a
12710          signed limit check for 32 and 16 bit jumps as we want to allow wrap
12711          around at 4G (outside of 64-bit mode) and 64k.  */
12712       if (size == 4 && flag_code == CODE_64BIT)
12713         fixP->fx_signed = 1;
12714
12715       frag_wane (fragP);
12716       return fragP->fr_fix - old_fr_fix;
12717     }
12718
12719   /* Guess size depending on current relax state.  Initially the relax
12720      state will correspond to a short jump and we return 1, because
12721      the variable part of the frag (the branch offset) is one byte
12722      long.  However, we can relax a section more than once and in that
12723      case we must either set fr_subtype back to the unrelaxed state,
12724      or return the value for the appropriate branch.  */
12725   return md_relax_table[fragP->fr_subtype].rlx_length;
12726 }
12727
12728 /* Called after relax() is finished.
12729
12730    In:  Address of frag.
12731         fr_type == rs_machine_dependent.
12732         fr_subtype is what the address relaxed to.
12733
12734    Out: Any fixSs and constants are set up.
12735         Caller will turn frag into a ".space 0".  */
12736
12737 void
12738 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
12739                  fragS *fragP)
12740 {
12741   unsigned char *opcode;
12742   unsigned char *where_to_put_displacement = NULL;
12743   offsetT target_address;
12744   offsetT opcode_address;
12745   unsigned int extension = 0;
12746   offsetT displacement_from_opcode_start;
12747
12748   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12749       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
12750       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12751     {
12752       /* Generate nop padding.  */
12753       unsigned int size = fragP->tc_frag_data.length;
12754       if (size)
12755         {
12756           if (size > fragP->tc_frag_data.max_bytes)
12757             abort ();
12758
12759           if (flag_debug)
12760             {
12761               const char *msg;
12762               const char *branch = "branch";
12763               const char *prefix = "";
12764               fragS *padding_fragP;
12765               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12766                   == BRANCH_PREFIX)
12767                 {
12768                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12769                   switch (fragP->tc_frag_data.default_prefix)
12770                     {
12771                     default:
12772                       abort ();
12773                       break;
12774                     case CS_PREFIX_OPCODE:
12775                       prefix = " cs";
12776                       break;
12777                     case DS_PREFIX_OPCODE:
12778                       prefix = " ds";
12779                       break;
12780                     case ES_PREFIX_OPCODE:
12781                       prefix = " es";
12782                       break;
12783                     case FS_PREFIX_OPCODE:
12784                       prefix = " fs";
12785                       break;
12786                     case GS_PREFIX_OPCODE:
12787                       prefix = " gs";
12788                       break;
12789                     case SS_PREFIX_OPCODE:
12790                       prefix = " ss";
12791                       break;
12792                     }
12793                   if (padding_fragP)
12794                     msg = _("%s:%u: add %d%s at 0x%llx to align "
12795                             "%s within %d-byte boundary\n");
12796                   else
12797                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
12798                             "align %s within %d-byte boundary\n");
12799                 }
12800               else
12801                 {
12802                   padding_fragP = fragP;
12803                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
12804                           "%s within %d-byte boundary\n");
12805                 }
12806
12807               if (padding_fragP)
12808                 switch (padding_fragP->tc_frag_data.branch_type)
12809                   {
12810                   case align_branch_jcc:
12811                     branch = "jcc";
12812                     break;
12813                   case align_branch_fused:
12814                     branch = "fused jcc";
12815                     break;
12816                   case align_branch_jmp:
12817                     branch = "jmp";
12818                     break;
12819                   case align_branch_call:
12820                     branch = "call";
12821                     break;
12822                   case align_branch_indirect:
12823                     branch = "indiret branch";
12824                     break;
12825                   case align_branch_ret:
12826                     branch = "ret";
12827                     break;
12828                   default:
12829                     break;
12830                   }
12831
12832               fprintf (stdout, msg,
12833                        fragP->fr_file, fragP->fr_line, size, prefix,
12834                        (long long) fragP->fr_address, branch,
12835                        1 << align_branch_power);
12836             }
12837           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12838             memset (fragP->fr_opcode,
12839                     fragP->tc_frag_data.default_prefix, size);
12840           else
12841             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
12842                                 size, 0);
12843           fragP->fr_fix += size;
12844         }
12845       return;
12846     }
12847
12848   opcode = (unsigned char *) fragP->fr_opcode;
12849
12850   /* Address we want to reach in file space.  */
12851   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
12852
12853   /* Address opcode resides at in file space.  */
12854   opcode_address = fragP->fr_address + fragP->fr_fix;
12855
12856   /* Displacement from opcode start to fill into instruction.  */
12857   displacement_from_opcode_start = target_address - opcode_address;
12858
12859   if ((fragP->fr_subtype & BIG) == 0)
12860     {
12861       /* Don't have to change opcode.  */
12862       extension = 1;            /* 1 opcode + 1 displacement  */
12863       where_to_put_displacement = &opcode[1];
12864     }
12865   else
12866     {
12867       if (no_cond_jump_promotion
12868           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
12869         as_warn_where (fragP->fr_file, fragP->fr_line,
12870                        _("long jump required"));
12871
12872       switch (fragP->fr_subtype)
12873         {
12874         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
12875           extension = 4;                /* 1 opcode + 4 displacement  */
12876           opcode[0] = 0xe9;
12877           where_to_put_displacement = &opcode[1];
12878           break;
12879
12880         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
12881           extension = 2;                /* 1 opcode + 2 displacement  */
12882           opcode[0] = 0xe9;
12883           where_to_put_displacement = &opcode[1];
12884           break;
12885
12886         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
12887         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
12888           extension = 5;                /* 2 opcode + 4 displacement  */
12889           opcode[1] = opcode[0] + 0x10;
12890           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12891           where_to_put_displacement = &opcode[2];
12892           break;
12893
12894         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
12895           extension = 3;                /* 2 opcode + 2 displacement  */
12896           opcode[1] = opcode[0] + 0x10;
12897           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12898           where_to_put_displacement = &opcode[2];
12899           break;
12900
12901         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
12902           extension = 4;
12903           opcode[0] ^= 1;
12904           opcode[1] = 3;
12905           opcode[2] = 0xe9;
12906           where_to_put_displacement = &opcode[3];
12907           break;
12908
12909         default:
12910           BAD_CASE (fragP->fr_subtype);
12911           break;
12912         }
12913     }
12914
12915   /* If size if less then four we are sure that the operand fits,
12916      but if it's 4, then it could be that the displacement is larger
12917      then -/+ 2GB.  */
12918   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
12919       && object_64bit
12920       && ((addressT) (displacement_from_opcode_start - extension
12921                       + ((addressT) 1 << 31))
12922           > (((addressT) 2 << 31) - 1)))
12923     {
12924       as_bad_where (fragP->fr_file, fragP->fr_line,
12925                     _("jump target out of range"));
12926       /* Make us emit 0.  */
12927       displacement_from_opcode_start = extension;
12928     }
12929   /* Now put displacement after opcode.  */
12930   md_number_to_chars ((char *) where_to_put_displacement,
12931                       (valueT) (displacement_from_opcode_start - extension),
12932                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
12933   fragP->fr_fix += extension;
12934 }
12935 \f
12936 /* Apply a fixup (fixP) to segment data, once it has been determined
12937    by our caller that we have all the info we need to fix it up.
12938
12939    Parameter valP is the pointer to the value of the bits.
12940
12941    On the 386, immediates, displacements, and data pointers are all in
12942    the same (little-endian) format, so we don't need to care about which
12943    we are handling.  */
12944
12945 void
12946 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
12947 {
12948   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
12949   valueT value = *valP;
12950
12951 #if !defined (TE_Mach)
12952   if (fixP->fx_pcrel)
12953     {
12954       switch (fixP->fx_r_type)
12955         {
12956         default:
12957           break;
12958
12959         case BFD_RELOC_64:
12960           fixP->fx_r_type = BFD_RELOC_64_PCREL;
12961           break;
12962         case BFD_RELOC_32:
12963         case BFD_RELOC_X86_64_32S:
12964           fixP->fx_r_type = BFD_RELOC_32_PCREL;
12965           break;
12966         case BFD_RELOC_16:
12967           fixP->fx_r_type = BFD_RELOC_16_PCREL;
12968           break;
12969         case BFD_RELOC_8:
12970           fixP->fx_r_type = BFD_RELOC_8_PCREL;
12971           break;
12972         }
12973     }
12974
12975   if (fixP->fx_addsy != NULL
12976       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
12977           || fixP->fx_r_type == BFD_RELOC_64_PCREL
12978           || fixP->fx_r_type == BFD_RELOC_16_PCREL
12979           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
12980       && !use_rela_relocations)
12981     {
12982       /* This is a hack.  There should be a better way to handle this.
12983          This covers for the fact that bfd_install_relocation will
12984          subtract the current location (for partial_inplace, PC relative
12985          relocations); see more below.  */
12986 #ifndef OBJ_AOUT
12987       if (IS_ELF
12988 #ifdef TE_PE
12989           || OUTPUT_FLAVOR == bfd_target_coff_flavour
12990 #endif
12991           )
12992         value += fixP->fx_where + fixP->fx_frag->fr_address;
12993 #endif
12994 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12995       if (IS_ELF)
12996         {
12997           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
12998
12999           if ((sym_seg == seg
13000                || (symbol_section_p (fixP->fx_addsy)
13001                    && sym_seg != absolute_section))
13002               && !generic_force_reloc (fixP))
13003             {
13004               /* Yes, we add the values in twice.  This is because
13005                  bfd_install_relocation subtracts them out again.  I think
13006                  bfd_install_relocation is broken, but I don't dare change
13007                  it.  FIXME.  */
13008               value += fixP->fx_where + fixP->fx_frag->fr_address;
13009             }
13010         }
13011 #endif
13012 #if defined (OBJ_COFF) && defined (TE_PE)
13013       /* For some reason, the PE format does not store a
13014          section address offset for a PC relative symbol.  */
13015       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
13016           || S_IS_WEAK (fixP->fx_addsy))
13017         value += md_pcrel_from (fixP);
13018 #endif
13019     }
13020 #if defined (OBJ_COFF) && defined (TE_PE)
13021   if (fixP->fx_addsy != NULL
13022       && S_IS_WEAK (fixP->fx_addsy)
13023       /* PR 16858: Do not modify weak function references.  */
13024       && ! fixP->fx_pcrel)
13025     {
13026 #if !defined (TE_PEP)
13027       /* For x86 PE weak function symbols are neither PC-relative
13028          nor do they set S_IS_FUNCTION.  So the only reliable way
13029          to detect them is to check the flags of their containing
13030          section.  */
13031       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
13032           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
13033         ;
13034       else
13035 #endif
13036       value -= S_GET_VALUE (fixP->fx_addsy);
13037     }
13038 #endif
13039
13040   /* Fix a few things - the dynamic linker expects certain values here,
13041      and we must not disappoint it.  */
13042 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13043   if (IS_ELF && fixP->fx_addsy)
13044     switch (fixP->fx_r_type)
13045       {
13046       case BFD_RELOC_386_PLT32:
13047       case BFD_RELOC_X86_64_PLT32:
13048         /* Make the jump instruction point to the address of the operand.
13049            At runtime we merely add the offset to the actual PLT entry.
13050            NB: Subtract the offset size only for jump instructions.  */
13051         if (fixP->fx_pcrel)
13052           value = -4;
13053         break;
13054
13055       case BFD_RELOC_386_TLS_GD:
13056       case BFD_RELOC_386_TLS_LDM:
13057       case BFD_RELOC_386_TLS_IE_32:
13058       case BFD_RELOC_386_TLS_IE:
13059       case BFD_RELOC_386_TLS_GOTIE:
13060       case BFD_RELOC_386_TLS_GOTDESC:
13061       case BFD_RELOC_X86_64_TLSGD:
13062       case BFD_RELOC_X86_64_TLSLD:
13063       case BFD_RELOC_X86_64_GOTTPOFF:
13064       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
13065         value = 0; /* Fully resolved at runtime.  No addend.  */
13066         /* Fallthrough */
13067       case BFD_RELOC_386_TLS_LE:
13068       case BFD_RELOC_386_TLS_LDO_32:
13069       case BFD_RELOC_386_TLS_LE_32:
13070       case BFD_RELOC_X86_64_DTPOFF32:
13071       case BFD_RELOC_X86_64_DTPOFF64:
13072       case BFD_RELOC_X86_64_TPOFF32:
13073       case BFD_RELOC_X86_64_TPOFF64:
13074         S_SET_THREAD_LOCAL (fixP->fx_addsy);
13075         break;
13076
13077       case BFD_RELOC_386_TLS_DESC_CALL:
13078       case BFD_RELOC_X86_64_TLSDESC_CALL:
13079         value = 0; /* Fully resolved at runtime.  No addend.  */
13080         S_SET_THREAD_LOCAL (fixP->fx_addsy);
13081         fixP->fx_done = 0;
13082         return;
13083
13084       case BFD_RELOC_VTABLE_INHERIT:
13085       case BFD_RELOC_VTABLE_ENTRY:
13086         fixP->fx_done = 0;
13087         return;
13088
13089       default:
13090         break;
13091       }
13092 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
13093
13094   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
13095   if (!object_64bit)
13096     value = extend_to_32bit_address (value);
13097
13098   *valP = value;
13099 #endif /* !defined (TE_Mach)  */
13100
13101   /* Are we finished with this relocation now?  */
13102   if (fixP->fx_addsy == NULL)
13103     {
13104       fixP->fx_done = 1;
13105       switch (fixP->fx_r_type)
13106         {
13107         case BFD_RELOC_X86_64_32S:
13108           fixP->fx_signed = 1;
13109           break;
13110
13111         default:
13112           break;
13113         }
13114     }
13115 #if defined (OBJ_COFF) && defined (TE_PE)
13116   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
13117     {
13118       fixP->fx_done = 0;
13119       /* Remember value for tc_gen_reloc.  */
13120       fixP->fx_addnumber = value;
13121       /* Clear out the frag for now.  */
13122       value = 0;
13123     }
13124 #endif
13125   else if (use_rela_relocations)
13126     {
13127       if (!disallow_64bit_reloc || fixP->fx_r_type == NO_RELOC)
13128         fixP->fx_no_overflow = 1;
13129       /* Remember value for tc_gen_reloc.  */
13130       fixP->fx_addnumber = value;
13131       value = 0;
13132     }
13133
13134   md_number_to_chars (p, value, fixP->fx_size);
13135 }
13136 \f
13137 const char *
13138 md_atof (int type, char *litP, int *sizeP)
13139 {
13140   /* This outputs the LITTLENUMs in REVERSE order;
13141      in accord with the bigendian 386.  */
13142   return ieee_md_atof (type, litP, sizeP, false);
13143 }
13144 \f
13145 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
13146
13147 static char *
13148 output_invalid (int c)
13149 {
13150   if (ISPRINT (c))
13151     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
13152               "'%c'", c);
13153   else
13154     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
13155               "(0x%x)", (unsigned char) c);
13156   return output_invalid_buf;
13157 }
13158
13159 /* Verify that @r can be used in the current context.  */
13160
13161 static bool check_register (const reg_entry *r)
13162 {
13163   if (allow_pseudo_reg)
13164     return true;
13165
13166   if (operand_type_all_zero (&r->reg_type))
13167     return false;
13168
13169   if ((r->reg_type.bitfield.dword
13170        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
13171        || r->reg_type.bitfield.class == RegCR
13172        || r->reg_type.bitfield.class == RegDR)
13173       && !cpu_arch_flags.bitfield.cpui386)
13174     return false;
13175
13176   if (r->reg_type.bitfield.class == RegTR
13177       && (flag_code == CODE_64BIT
13178           || !cpu_arch_flags.bitfield.cpui386
13179           || cpu_arch_isa_flags.bitfield.cpui586
13180           || cpu_arch_isa_flags.bitfield.cpui686))
13181     return false;
13182
13183   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
13184     return false;
13185
13186   if (!cpu_arch_flags.bitfield.cpuavx512f)
13187     {
13188       if (r->reg_type.bitfield.zmmword
13189           || r->reg_type.bitfield.class == RegMask)
13190         return false;
13191
13192       if (!cpu_arch_flags.bitfield.cpuavx)
13193         {
13194           if (r->reg_type.bitfield.ymmword)
13195             return false;
13196
13197           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
13198             return false;
13199         }
13200     }
13201
13202   if (r->reg_type.bitfield.tmmword
13203       && (!cpu_arch_flags.bitfield.cpuamx_tile
13204           || flag_code != CODE_64BIT))
13205     return false;
13206
13207   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
13208     return false;
13209
13210   /* Don't allow fake index register unless allow_index_reg isn't 0. */
13211   if (!allow_index_reg && r->reg_num == RegIZ)
13212     return false;
13213
13214   /* Upper 16 vector registers are only available with VREX in 64bit
13215      mode, and require EVEX encoding.  */
13216   if (r->reg_flags & RegVRex)
13217     {
13218       if (!cpu_arch_flags.bitfield.cpuavx512f
13219           || flag_code != CODE_64BIT)
13220         return false;
13221
13222       if (i.vec_encoding == vex_encoding_default)
13223         i.vec_encoding = vex_encoding_evex;
13224       else if (i.vec_encoding != vex_encoding_evex)
13225         i.vec_encoding = vex_encoding_error;
13226     }
13227
13228   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
13229       && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
13230       && flag_code != CODE_64BIT)
13231     return false;
13232
13233   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
13234       && !intel_syntax)
13235     return false;
13236
13237   return true;
13238 }
13239
13240 /* REG_STRING starts *before* REGISTER_PREFIX.  */
13241
13242 static const reg_entry *
13243 parse_real_register (char *reg_string, char **end_op)
13244 {
13245   char *s = reg_string;
13246   char *p;
13247   char reg_name_given[MAX_REG_NAME_SIZE + 1];
13248   const reg_entry *r;
13249
13250   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
13251   if (*s == REGISTER_PREFIX)
13252     ++s;
13253
13254   if (is_space_char (*s))
13255     ++s;
13256
13257   p = reg_name_given;
13258   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
13259     {
13260       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
13261         return (const reg_entry *) NULL;
13262       s++;
13263     }
13264
13265   if (is_part_of_name (*s))
13266     return (const reg_entry *) NULL;
13267
13268   *end_op = s;
13269
13270   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
13271
13272   /* Handle floating point regs, allowing spaces in the (i) part.  */
13273   if (r == reg_st0)
13274     {
13275       if (!cpu_arch_flags.bitfield.cpu8087
13276           && !cpu_arch_flags.bitfield.cpu287
13277           && !cpu_arch_flags.bitfield.cpu387
13278           && !allow_pseudo_reg)
13279         return (const reg_entry *) NULL;
13280
13281       if (is_space_char (*s))
13282         ++s;
13283       if (*s == '(')
13284         {
13285           ++s;
13286           if (is_space_char (*s))
13287             ++s;
13288           if (*s >= '0' && *s <= '7')
13289             {
13290               int fpr = *s - '0';
13291               ++s;
13292               if (is_space_char (*s))
13293                 ++s;
13294               if (*s == ')')
13295                 {
13296                   *end_op = s + 1;
13297                   know (r[fpr].reg_num == fpr);
13298                   return r + fpr;
13299                 }
13300             }
13301           /* We have "%st(" then garbage.  */
13302           return (const reg_entry *) NULL;
13303         }
13304     }
13305
13306   return r && check_register (r) ? r : NULL;
13307 }
13308
13309 /* REG_STRING starts *before* REGISTER_PREFIX.  */
13310
13311 static const reg_entry *
13312 parse_register (char *reg_string, char **end_op)
13313 {
13314   const reg_entry *r;
13315
13316   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
13317     r = parse_real_register (reg_string, end_op);
13318   else
13319     r = NULL;
13320   if (!r)
13321     {
13322       char *save = input_line_pointer;
13323       char c;
13324       symbolS *symbolP;
13325
13326       input_line_pointer = reg_string;
13327       c = get_symbol_name (&reg_string);
13328       symbolP = symbol_find (reg_string);
13329       while (symbolP && S_GET_SEGMENT (symbolP) != reg_section)
13330         {
13331           const expressionS *e = symbol_get_value_expression(symbolP);
13332
13333           if (e->X_op != O_symbol || e->X_add_number)
13334             break;
13335           symbolP = e->X_add_symbol;
13336         }
13337       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
13338         {
13339           const expressionS *e = symbol_get_value_expression (symbolP);
13340
13341           if (e->X_op == O_register)
13342             {
13343               know (e->X_add_number >= 0
13344                     && (valueT) e->X_add_number < i386_regtab_size);
13345               r = i386_regtab + e->X_add_number;
13346               *end_op = input_line_pointer;
13347             }
13348           if (r && !check_register (r))
13349             {
13350               as_bad (_("register '%s%s' cannot be used here"),
13351                       register_prefix, r->reg_name);
13352               r = &bad_reg;
13353             }
13354         }
13355       *input_line_pointer = c;
13356       input_line_pointer = save;
13357     }
13358   return r;
13359 }
13360
13361 int
13362 i386_parse_name (char *name, expressionS *e, char *nextcharP)
13363 {
13364   const reg_entry *r = NULL;
13365   char *end = input_line_pointer;
13366
13367   *end = *nextcharP;
13368   if (*name == REGISTER_PREFIX || allow_naked_reg)
13369     r = parse_real_register (name, &input_line_pointer);
13370   if (r && end <= input_line_pointer)
13371     {
13372       *nextcharP = *input_line_pointer;
13373       *input_line_pointer = 0;
13374       if (r != &bad_reg)
13375         {
13376           e->X_op = O_register;
13377           e->X_add_number = r - i386_regtab;
13378         }
13379       else
13380           e->X_op = O_illegal;
13381       return 1;
13382     }
13383   input_line_pointer = end;
13384   *end = 0;
13385   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
13386 }
13387
13388 void
13389 md_operand (expressionS *e)
13390 {
13391   char *end;
13392   const reg_entry *r;
13393
13394   switch (*input_line_pointer)
13395     {
13396     case REGISTER_PREFIX:
13397       r = parse_real_register (input_line_pointer, &end);
13398       if (r)
13399         {
13400           e->X_op = O_register;
13401           e->X_add_number = r - i386_regtab;
13402           input_line_pointer = end;
13403         }
13404       break;
13405
13406     case '[':
13407       gas_assert (intel_syntax);
13408       end = input_line_pointer++;
13409       expression (e);
13410       if (*input_line_pointer == ']')
13411         {
13412           ++input_line_pointer;
13413           e->X_op_symbol = make_expr_symbol (e);
13414           e->X_add_symbol = NULL;
13415           e->X_add_number = 0;
13416           e->X_op = O_index;
13417         }
13418       else
13419         {
13420           e->X_op = O_absent;
13421           input_line_pointer = end;
13422         }
13423       break;
13424     }
13425 }
13426
13427 \f
13428 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13429 const char *md_shortopts = "kVQ:sqnO::";
13430 #else
13431 const char *md_shortopts = "qnO::";
13432 #endif
13433
13434 #define OPTION_32 (OPTION_MD_BASE + 0)
13435 #define OPTION_64 (OPTION_MD_BASE + 1)
13436 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
13437 #define OPTION_MARCH (OPTION_MD_BASE + 3)
13438 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
13439 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
13440 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
13441 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
13442 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
13443 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
13444 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
13445 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
13446 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
13447 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
13448 #define OPTION_X32 (OPTION_MD_BASE + 14)
13449 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
13450 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
13451 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
13452 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
13453 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
13454 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
13455 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
13456 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
13457 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
13458 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
13459 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
13460 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
13461 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
13462 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
13463 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
13464 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
13465 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
13466 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
13467 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
13468 #define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34)
13469
13470 struct option md_longopts[] =
13471 {
13472   {"32", no_argument, NULL, OPTION_32},
13473 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13474      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13475   {"64", no_argument, NULL, OPTION_64},
13476 #endif
13477 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13478   {"x32", no_argument, NULL, OPTION_X32},
13479   {"mshared", no_argument, NULL, OPTION_MSHARED},
13480   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
13481 #endif
13482   {"divide", no_argument, NULL, OPTION_DIVIDE},
13483   {"march", required_argument, NULL, OPTION_MARCH},
13484   {"mtune", required_argument, NULL, OPTION_MTUNE},
13485   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
13486   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
13487   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
13488   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
13489   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
13490   {"muse-unaligned-vector-move", no_argument, NULL, OPTION_MUSE_UNALIGNED_VECTOR_MOVE},
13491   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
13492   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
13493   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
13494   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
13495   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
13496   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
13497   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
13498 # if defined (TE_PE) || defined (TE_PEP)
13499   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
13500 #endif
13501   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
13502   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
13503   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
13504   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
13505   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
13506   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
13507   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
13508   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
13509   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
13510   {"mlfence-before-indirect-branch", required_argument, NULL,
13511    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
13512   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
13513   {"mamd64", no_argument, NULL, OPTION_MAMD64},
13514   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
13515   {NULL, no_argument, NULL, 0}
13516 };
13517 size_t md_longopts_size = sizeof (md_longopts);
13518
13519 int
13520 md_parse_option (int c, const char *arg)
13521 {
13522   unsigned int j;
13523   char *arch, *next, *saved, *type;
13524
13525   switch (c)
13526     {
13527     case 'n':
13528       optimize_align_code = 0;
13529       break;
13530
13531     case 'q':
13532       quiet_warnings = 1;
13533       break;
13534
13535 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13536       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
13537          should be emitted or not.  FIXME: Not implemented.  */
13538     case 'Q':
13539       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
13540         return 0;
13541       break;
13542
13543       /* -V: SVR4 argument to print version ID.  */
13544     case 'V':
13545       print_version_id ();
13546       break;
13547
13548       /* -k: Ignore for FreeBSD compatibility.  */
13549     case 'k':
13550       break;
13551
13552     case 's':
13553       /* -s: On i386 Solaris, this tells the native assembler to use
13554          .stab instead of .stab.excl.  We always use .stab anyhow.  */
13555       break;
13556
13557     case OPTION_MSHARED:
13558       shared = 1;
13559       break;
13560
13561     case OPTION_X86_USED_NOTE:
13562       if (strcasecmp (arg, "yes") == 0)
13563         x86_used_note = 1;
13564       else if (strcasecmp (arg, "no") == 0)
13565         x86_used_note = 0;
13566       else
13567         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
13568       break;
13569
13570
13571 #endif
13572 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13573      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13574     case OPTION_64:
13575       {
13576         const char **list, **l;
13577
13578         list = bfd_target_list ();
13579         for (l = list; *l != NULL; l++)
13580           if (startswith (*l, "elf64-x86-64")
13581               || strcmp (*l, "coff-x86-64") == 0
13582               || strcmp (*l, "pe-x86-64") == 0
13583               || strcmp (*l, "pei-x86-64") == 0
13584               || strcmp (*l, "mach-o-x86-64") == 0)
13585             {
13586               default_arch = "x86_64";
13587               break;
13588             }
13589         if (*l == NULL)
13590           as_fatal (_("no compiled in support for x86_64"));
13591         free (list);
13592       }
13593       break;
13594 #endif
13595
13596 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13597     case OPTION_X32:
13598       if (IS_ELF)
13599         {
13600           const char **list, **l;
13601
13602           list = bfd_target_list ();
13603           for (l = list; *l != NULL; l++)
13604             if (startswith (*l, "elf32-x86-64"))
13605               {
13606                 default_arch = "x86_64:32";
13607                 break;
13608               }
13609           if (*l == NULL)
13610             as_fatal (_("no compiled in support for 32bit x86_64"));
13611           free (list);
13612         }
13613       else
13614         as_fatal (_("32bit x86_64 is only supported for ELF"));
13615       break;
13616 #endif
13617
13618     case OPTION_32:
13619       default_arch = "i386";
13620       break;
13621
13622     case OPTION_DIVIDE:
13623 #ifdef SVR4_COMMENT_CHARS
13624       {
13625         char *n, *t;
13626         const char *s;
13627
13628         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
13629         t = n;
13630         for (s = i386_comment_chars; *s != '\0'; s++)
13631           if (*s != '/')
13632             *t++ = *s;
13633         *t = '\0';
13634         i386_comment_chars = n;
13635       }
13636 #endif
13637       break;
13638
13639     case OPTION_MARCH:
13640       saved = xstrdup (arg);
13641       arch = saved;
13642       /* Allow -march=+nosse.  */
13643       if (*arch == '+')
13644         arch++;
13645       do
13646         {
13647           if (*arch == '.')
13648             as_fatal (_("invalid -march= option: `%s'"), arg);
13649           next = strchr (arch, '+');
13650           if (next)
13651             *next++ = '\0';
13652           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13653             {
13654               if (arch == saved && cpu_arch[j].type != PROCESSOR_NONE
13655                   && strcmp (arch, cpu_arch[j].name) == 0)
13656                 {
13657                   /* Processor.  */
13658                   if (! cpu_arch[j].enable.bitfield.cpui386)
13659                     continue;
13660
13661                   cpu_arch_name = cpu_arch[j].name;
13662                   free (cpu_sub_arch_name);
13663                   cpu_sub_arch_name = NULL;
13664                   cpu_arch_flags = cpu_arch[j].enable;
13665                   cpu_arch_isa = cpu_arch[j].type;
13666                   cpu_arch_isa_flags = cpu_arch[j].enable;
13667                   if (!cpu_arch_tune_set)
13668                     {
13669                       cpu_arch_tune = cpu_arch_isa;
13670                       cpu_arch_tune_flags = cpu_arch_isa_flags;
13671                     }
13672                   break;
13673                 }
13674               else if (cpu_arch[j].type == PROCESSOR_NONE
13675                        && strcmp (arch, cpu_arch[j].name) == 0
13676                        && !cpu_flags_all_zero (&cpu_arch[j].enable))
13677                 {
13678                   /* ISA extension.  */
13679                   i386_cpu_flags flags;
13680
13681                   flags = cpu_flags_or (cpu_arch_flags,
13682                                         cpu_arch[j].enable);
13683
13684                   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13685                     {
13686                       extend_cpu_sub_arch_name (arch);
13687                       cpu_arch_flags = flags;
13688                       cpu_arch_isa_flags = flags;
13689                     }
13690                   else
13691                     cpu_arch_isa_flags
13692                       = cpu_flags_or (cpu_arch_isa_flags,
13693                                       cpu_arch[j].enable);
13694                   break;
13695                 }
13696             }
13697
13698           if (j >= ARRAY_SIZE (cpu_arch) && startswith (arch, "no"))
13699             {
13700               /* Disable an ISA extension.  */
13701               for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13702                 if (cpu_arch[j].type == PROCESSOR_NONE
13703                     && strcmp (arch + 2, cpu_arch[j].name) == 0)
13704                   {
13705                     i386_cpu_flags flags;
13706
13707                     flags = cpu_flags_and_not (cpu_arch_flags,
13708                                                cpu_arch[j].disable);
13709                     if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13710                       {
13711                         extend_cpu_sub_arch_name (arch);
13712                         cpu_arch_flags = flags;
13713                         cpu_arch_isa_flags = flags;
13714                       }
13715                     break;
13716                   }
13717             }
13718
13719           if (j >= ARRAY_SIZE (cpu_arch))
13720             as_fatal (_("invalid -march= option: `%s'"), arg);
13721
13722           arch = next;
13723         }
13724       while (next != NULL);
13725       free (saved);
13726       break;
13727
13728     case OPTION_MTUNE:
13729       if (*arg == '.')
13730         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13731       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13732         {
13733           if (cpu_arch[j].type != PROCESSOR_NONE
13734               && strcmp (arg, cpu_arch[j].name) == 0)
13735             {
13736               cpu_arch_tune_set = 1;
13737               cpu_arch_tune = cpu_arch [j].type;
13738               cpu_arch_tune_flags = cpu_arch[j].enable;
13739               break;
13740             }
13741         }
13742       if (j >= ARRAY_SIZE (cpu_arch))
13743         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13744       break;
13745
13746     case OPTION_MMNEMONIC:
13747       if (strcasecmp (arg, "att") == 0)
13748         intel_mnemonic = 0;
13749       else if (strcasecmp (arg, "intel") == 0)
13750         intel_mnemonic = 1;
13751       else
13752         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
13753       break;
13754
13755     case OPTION_MSYNTAX:
13756       if (strcasecmp (arg, "att") == 0)
13757         intel_syntax = 0;
13758       else if (strcasecmp (arg, "intel") == 0)
13759         intel_syntax = 1;
13760       else
13761         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
13762       break;
13763
13764     case OPTION_MINDEX_REG:
13765       allow_index_reg = 1;
13766       break;
13767
13768     case OPTION_MNAKED_REG:
13769       allow_naked_reg = 1;
13770       break;
13771
13772     case OPTION_MSSE2AVX:
13773       sse2avx = 1;
13774       break;
13775
13776     case OPTION_MUSE_UNALIGNED_VECTOR_MOVE:
13777       use_unaligned_vector_move = 1;
13778       break;
13779
13780     case OPTION_MSSE_CHECK:
13781       if (strcasecmp (arg, "error") == 0)
13782         sse_check = check_error;
13783       else if (strcasecmp (arg, "warning") == 0)
13784         sse_check = check_warning;
13785       else if (strcasecmp (arg, "none") == 0)
13786         sse_check = check_none;
13787       else
13788         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
13789       break;
13790
13791     case OPTION_MOPERAND_CHECK:
13792       if (strcasecmp (arg, "error") == 0)
13793         operand_check = check_error;
13794       else if (strcasecmp (arg, "warning") == 0)
13795         operand_check = check_warning;
13796       else if (strcasecmp (arg, "none") == 0)
13797         operand_check = check_none;
13798       else
13799         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
13800       break;
13801
13802     case OPTION_MAVXSCALAR:
13803       if (strcasecmp (arg, "128") == 0)
13804         avxscalar = vex128;
13805       else if (strcasecmp (arg, "256") == 0)
13806         avxscalar = vex256;
13807       else
13808         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
13809       break;
13810
13811     case OPTION_MVEXWIG:
13812       if (strcmp (arg, "0") == 0)
13813         vexwig = vexw0;
13814       else if (strcmp (arg, "1") == 0)
13815         vexwig = vexw1;
13816       else
13817         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
13818       break;
13819
13820     case OPTION_MADD_BND_PREFIX:
13821       add_bnd_prefix = 1;
13822       break;
13823
13824     case OPTION_MEVEXLIG:
13825       if (strcmp (arg, "128") == 0)
13826         evexlig = evexl128;
13827       else if (strcmp (arg, "256") == 0)
13828         evexlig = evexl256;
13829       else  if (strcmp (arg, "512") == 0)
13830         evexlig = evexl512;
13831       else
13832         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
13833       break;
13834
13835     case OPTION_MEVEXRCIG:
13836       if (strcmp (arg, "rne") == 0)
13837         evexrcig = rne;
13838       else if (strcmp (arg, "rd") == 0)
13839         evexrcig = rd;
13840       else if (strcmp (arg, "ru") == 0)
13841         evexrcig = ru;
13842       else if (strcmp (arg, "rz") == 0)
13843         evexrcig = rz;
13844       else
13845         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
13846       break;
13847
13848     case OPTION_MEVEXWIG:
13849       if (strcmp (arg, "0") == 0)
13850         evexwig = evexw0;
13851       else if (strcmp (arg, "1") == 0)
13852         evexwig = evexw1;
13853       else
13854         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
13855       break;
13856
13857 # if defined (TE_PE) || defined (TE_PEP)
13858     case OPTION_MBIG_OBJ:
13859       use_big_obj = 1;
13860       break;
13861 #endif
13862
13863     case OPTION_MOMIT_LOCK_PREFIX:
13864       if (strcasecmp (arg, "yes") == 0)
13865         omit_lock_prefix = 1;
13866       else if (strcasecmp (arg, "no") == 0)
13867         omit_lock_prefix = 0;
13868       else
13869         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
13870       break;
13871
13872     case OPTION_MFENCE_AS_LOCK_ADD:
13873       if (strcasecmp (arg, "yes") == 0)
13874         avoid_fence = 1;
13875       else if (strcasecmp (arg, "no") == 0)
13876         avoid_fence = 0;
13877       else
13878         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
13879       break;
13880
13881     case OPTION_MLFENCE_AFTER_LOAD:
13882       if (strcasecmp (arg, "yes") == 0)
13883         lfence_after_load = 1;
13884       else if (strcasecmp (arg, "no") == 0)
13885         lfence_after_load = 0;
13886       else
13887         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
13888       break;
13889
13890     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
13891       if (strcasecmp (arg, "all") == 0)
13892         {
13893           lfence_before_indirect_branch = lfence_branch_all;
13894           if (lfence_before_ret == lfence_before_ret_none)
13895             lfence_before_ret = lfence_before_ret_shl;
13896         }
13897       else if (strcasecmp (arg, "memory") == 0)
13898         lfence_before_indirect_branch = lfence_branch_memory;
13899       else if (strcasecmp (arg, "register") == 0)
13900         lfence_before_indirect_branch = lfence_branch_register;
13901       else if (strcasecmp (arg, "none") == 0)
13902         lfence_before_indirect_branch = lfence_branch_none;
13903       else
13904         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
13905                   arg);
13906       break;
13907
13908     case OPTION_MLFENCE_BEFORE_RET:
13909       if (strcasecmp (arg, "or") == 0)
13910         lfence_before_ret = lfence_before_ret_or;
13911       else if (strcasecmp (arg, "not") == 0)
13912         lfence_before_ret = lfence_before_ret_not;
13913       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
13914         lfence_before_ret = lfence_before_ret_shl;
13915       else if (strcasecmp (arg, "none") == 0)
13916         lfence_before_ret = lfence_before_ret_none;
13917       else
13918         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
13919                   arg);
13920       break;
13921
13922     case OPTION_MRELAX_RELOCATIONS:
13923       if (strcasecmp (arg, "yes") == 0)
13924         generate_relax_relocations = 1;
13925       else if (strcasecmp (arg, "no") == 0)
13926         generate_relax_relocations = 0;
13927       else
13928         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
13929       break;
13930
13931     case OPTION_MALIGN_BRANCH_BOUNDARY:
13932       {
13933         char *end;
13934         long int align = strtoul (arg, &end, 0);
13935         if (*end == '\0')
13936           {
13937             if (align == 0)
13938               {
13939                 align_branch_power = 0;
13940                 break;
13941               }
13942             else if (align >= 16)
13943               {
13944                 int align_power;
13945                 for (align_power = 0;
13946                      (align & 1) == 0;
13947                      align >>= 1, align_power++)
13948                   continue;
13949                 /* Limit alignment power to 31.  */
13950                 if (align == 1 && align_power < 32)
13951                   {
13952                     align_branch_power = align_power;
13953                     break;
13954                   }
13955               }
13956           }
13957         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
13958       }
13959       break;
13960
13961     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
13962       {
13963         char *end;
13964         int align = strtoul (arg, &end, 0);
13965         /* Some processors only support 5 prefixes.  */
13966         if (*end == '\0' && align >= 0 && align < 6)
13967           {
13968             align_branch_prefix_size = align;
13969             break;
13970           }
13971         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
13972                   arg);
13973       }
13974       break;
13975
13976     case OPTION_MALIGN_BRANCH:
13977       align_branch = 0;
13978       saved = xstrdup (arg);
13979       type = saved;
13980       do
13981         {
13982           next = strchr (type, '+');
13983           if (next)
13984             *next++ = '\0';
13985           if (strcasecmp (type, "jcc") == 0)
13986             align_branch |= align_branch_jcc_bit;
13987           else if (strcasecmp (type, "fused") == 0)
13988             align_branch |= align_branch_fused_bit;
13989           else if (strcasecmp (type, "jmp") == 0)
13990             align_branch |= align_branch_jmp_bit;
13991           else if (strcasecmp (type, "call") == 0)
13992             align_branch |= align_branch_call_bit;
13993           else if (strcasecmp (type, "ret") == 0)
13994             align_branch |= align_branch_ret_bit;
13995           else if (strcasecmp (type, "indirect") == 0)
13996             align_branch |= align_branch_indirect_bit;
13997           else
13998             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
13999           type = next;
14000         }
14001       while (next != NULL);
14002       free (saved);
14003       break;
14004
14005     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
14006       align_branch_power = 5;
14007       align_branch_prefix_size = 5;
14008       align_branch = (align_branch_jcc_bit
14009                       | align_branch_fused_bit
14010                       | align_branch_jmp_bit);
14011       break;
14012
14013     case OPTION_MAMD64:
14014       isa64 = amd64;
14015       break;
14016
14017     case OPTION_MINTEL64:
14018       isa64 = intel64;
14019       break;
14020
14021     case 'O':
14022       if (arg == NULL)
14023         {
14024           optimize = 1;
14025           /* Turn off -Os.  */
14026           optimize_for_space = 0;
14027         }
14028       else if (*arg == 's')
14029         {
14030           optimize_for_space = 1;
14031           /* Turn on all encoding optimizations.  */
14032           optimize = INT_MAX;
14033         }
14034       else
14035         {
14036           optimize = atoi (arg);
14037           /* Turn off -Os.  */
14038           optimize_for_space = 0;
14039         }
14040       break;
14041
14042     default:
14043       return 0;
14044     }
14045   return 1;
14046 }
14047
14048 #define MESSAGE_TEMPLATE \
14049 "                                                                                "
14050
14051 static char *
14052 output_message (FILE *stream, char *p, char *message, char *start,
14053                 int *left_p, const char *name, int len)
14054 {
14055   int size = sizeof (MESSAGE_TEMPLATE);
14056   int left = *left_p;
14057
14058   /* Reserve 2 spaces for ", " or ",\0" */
14059   left -= len + 2;
14060
14061   /* Check if there is any room.  */
14062   if (left >= 0)
14063     {
14064       if (p != start)
14065         {
14066           *p++ = ',';
14067           *p++ = ' ';
14068         }
14069       p = mempcpy (p, name, len);
14070     }
14071   else
14072     {
14073       /* Output the current message now and start a new one.  */
14074       *p++ = ',';
14075       *p = '\0';
14076       fprintf (stream, "%s\n", message);
14077       p = start;
14078       left = size - (start - message) - len - 2;
14079
14080       gas_assert (left >= 0);
14081
14082       p = mempcpy (p, name, len);
14083     }
14084
14085   *left_p = left;
14086   return p;
14087 }
14088
14089 static void
14090 show_arch (FILE *stream, int ext, int check)
14091 {
14092   static char message[] = MESSAGE_TEMPLATE;
14093   char *start = message + 27;
14094   char *p;
14095   int size = sizeof (MESSAGE_TEMPLATE);
14096   int left;
14097   const char *name;
14098   int len;
14099   unsigned int j;
14100
14101   p = start;
14102   left = size - (start - message);
14103
14104   if (!ext && check)
14105     {
14106       p = output_message (stream, p, message, start, &left,
14107                           STRING_COMMA_LEN ("default"));
14108       p = output_message (stream, p, message, start, &left,
14109                           STRING_COMMA_LEN ("push"));
14110       p = output_message (stream, p, message, start, &left,
14111                           STRING_COMMA_LEN ("pop"));
14112     }
14113
14114   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
14115     {
14116       /* Should it be skipped?  */
14117       if (cpu_arch [j].skip)
14118         continue;
14119
14120       name = cpu_arch [j].name;
14121       len = cpu_arch [j].len;
14122       if (cpu_arch[j].type == PROCESSOR_NONE)
14123         {
14124           /* It is an extension.  Skip if we aren't asked to show it.  */
14125           if (!ext || cpu_flags_all_zero (&cpu_arch[j].enable))
14126             continue;
14127         }
14128       else if (ext)
14129         {
14130           /* It is an processor.  Skip if we show only extension.  */
14131           continue;
14132         }
14133       else if (check && ! cpu_arch[j].enable.bitfield.cpui386)
14134         {
14135           /* It is an impossible processor - skip.  */
14136           continue;
14137         }
14138
14139       p = output_message (stream, p, message, start, &left, name, len);
14140     }
14141
14142   /* Display disabled extensions.  */
14143   if (ext)
14144     for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
14145       {
14146         char *str;
14147
14148         if (cpu_arch[j].type != PROCESSOR_NONE
14149             || !cpu_flags_all_zero (&cpu_arch[j].enable))
14150           continue;
14151         str = xasprintf ("no%s", cpu_arch[j].name);
14152         p = output_message (stream, p, message, start, &left, str,
14153                             strlen (str));
14154         free (str);
14155       }
14156
14157   *p = '\0';
14158   fprintf (stream, "%s\n", message);
14159 }
14160
14161 void
14162 md_show_usage (FILE *stream)
14163 {
14164 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14165   fprintf (stream, _("\
14166   -Qy, -Qn                ignored\n\
14167   -V                      print assembler version number\n\
14168   -k                      ignored\n"));
14169 #endif
14170   fprintf (stream, _("\
14171   -n                      do not optimize code alignment\n\
14172   -O{012s}                attempt some code optimizations\n\
14173   -q                      quieten some warnings\n"));
14174 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14175   fprintf (stream, _("\
14176   -s                      ignored\n"));
14177 #endif
14178 #ifdef BFD64
14179 # if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14180   fprintf (stream, _("\
14181   --32/--64/--x32         generate 32bit/64bit/x32 object\n"));
14182 # elif defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O)
14183   fprintf (stream, _("\
14184   --32/--64               generate 32bit/64bit object\n"));
14185 # endif
14186 #endif
14187 #ifdef SVR4_COMMENT_CHARS
14188   fprintf (stream, _("\
14189   --divide                do not treat `/' as a comment character\n"));
14190 #else
14191   fprintf (stream, _("\
14192   --divide                ignored\n"));
14193 #endif
14194   fprintf (stream, _("\
14195   -march=CPU[,+EXTENSION...]\n\
14196                           generate code for CPU and EXTENSION, CPU is one of:\n"));
14197   show_arch (stream, 0, 1);
14198   fprintf (stream, _("\
14199                           EXTENSION is combination of (possibly \"no\"-prefixed):\n"));
14200   show_arch (stream, 1, 0);
14201   fprintf (stream, _("\
14202   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
14203   show_arch (stream, 0, 0);
14204   fprintf (stream, _("\
14205   -msse2avx               encode SSE instructions with VEX prefix\n"));
14206   fprintf (stream, _("\
14207   -muse-unaligned-vector-move\n\
14208                           encode aligned vector move as unaligned vector move\n"));
14209   fprintf (stream, _("\
14210   -msse-check=[none|error|warning] (default: warning)\n\
14211                           check SSE instructions\n"));
14212   fprintf (stream, _("\
14213   -moperand-check=[none|error|warning] (default: warning)\n\
14214                           check operand combinations for validity\n"));
14215   fprintf (stream, _("\
14216   -mavxscalar=[128|256] (default: 128)\n\
14217                           encode scalar AVX instructions with specific vector\n\
14218                            length\n"));
14219   fprintf (stream, _("\
14220   -mvexwig=[0|1] (default: 0)\n\
14221                           encode VEX instructions with specific VEX.W value\n\
14222                            for VEX.W bit ignored instructions\n"));
14223   fprintf (stream, _("\
14224   -mevexlig=[128|256|512] (default: 128)\n\
14225                           encode scalar EVEX instructions with specific vector\n\
14226                            length\n"));
14227   fprintf (stream, _("\
14228   -mevexwig=[0|1] (default: 0)\n\
14229                           encode EVEX instructions with specific EVEX.W value\n\
14230                            for EVEX.W bit ignored instructions\n"));
14231   fprintf (stream, _("\
14232   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
14233                           encode EVEX instructions with specific EVEX.RC value\n\
14234                            for SAE-only ignored instructions\n"));
14235   fprintf (stream, _("\
14236   -mmnemonic=[att|intel] "));
14237   if (SYSV386_COMPAT)
14238     fprintf (stream, _("(default: att)\n"));
14239   else
14240     fprintf (stream, _("(default: intel)\n"));
14241   fprintf (stream, _("\
14242                           use AT&T/Intel mnemonic\n"));
14243   fprintf (stream, _("\
14244   -msyntax=[att|intel] (default: att)\n\
14245                           use AT&T/Intel syntax\n"));
14246   fprintf (stream, _("\
14247   -mindex-reg             support pseudo index registers\n"));
14248   fprintf (stream, _("\
14249   -mnaked-reg             don't require `%%' prefix for registers\n"));
14250   fprintf (stream, _("\
14251   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
14252 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14253   fprintf (stream, _("\
14254   -mshared                disable branch optimization for shared code\n"));
14255   fprintf (stream, _("\
14256   -mx86-used-note=[no|yes] "));
14257   if (DEFAULT_X86_USED_NOTE)
14258     fprintf (stream, _("(default: yes)\n"));
14259   else
14260     fprintf (stream, _("(default: no)\n"));
14261   fprintf (stream, _("\
14262                           generate x86 used ISA and feature properties\n"));
14263 #endif
14264 #if defined (TE_PE) || defined (TE_PEP)
14265   fprintf (stream, _("\
14266   -mbig-obj               generate big object files\n"));
14267 #endif
14268   fprintf (stream, _("\
14269   -momit-lock-prefix=[no|yes] (default: no)\n\
14270                           strip all lock prefixes\n"));
14271   fprintf (stream, _("\
14272   -mfence-as-lock-add=[no|yes] (default: no)\n\
14273                           encode lfence, mfence and sfence as\n\
14274                            lock addl $0x0, (%%{re}sp)\n"));
14275   fprintf (stream, _("\
14276   -mrelax-relocations=[no|yes] "));
14277   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
14278     fprintf (stream, _("(default: yes)\n"));
14279   else
14280     fprintf (stream, _("(default: no)\n"));
14281   fprintf (stream, _("\
14282                           generate relax relocations\n"));
14283   fprintf (stream, _("\
14284   -malign-branch-boundary=NUM (default: 0)\n\
14285                           align branches within NUM byte boundary\n"));
14286   fprintf (stream, _("\
14287   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
14288                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
14289                            indirect\n\
14290                           specify types of branches to align\n"));
14291   fprintf (stream, _("\
14292   -malign-branch-prefix-size=NUM (default: 5)\n\
14293                           align branches with NUM prefixes per instruction\n"));
14294   fprintf (stream, _("\
14295   -mbranches-within-32B-boundaries\n\
14296                           align branches within 32 byte boundary\n"));
14297   fprintf (stream, _("\
14298   -mlfence-after-load=[no|yes] (default: no)\n\
14299                           generate lfence after load\n"));
14300   fprintf (stream, _("\
14301   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
14302                           generate lfence before indirect near branch\n"));
14303   fprintf (stream, _("\
14304   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
14305                           generate lfence before ret\n"));
14306   fprintf (stream, _("\
14307   -mamd64                 accept only AMD64 ISA [default]\n"));
14308   fprintf (stream, _("\
14309   -mintel64               accept only Intel64 ISA\n"));
14310 }
14311
14312 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
14313      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
14314      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
14315
14316 /* Pick the target format to use.  */
14317
14318 const char *
14319 i386_target_format (void)
14320 {
14321   if (startswith (default_arch, "x86_64"))
14322     {
14323       update_code_flag (CODE_64BIT, 1);
14324       if (default_arch[6] == '\0')
14325         x86_elf_abi = X86_64_ABI;
14326       else
14327         x86_elf_abi = X86_64_X32_ABI;
14328     }
14329   else if (!strcmp (default_arch, "i386"))
14330     update_code_flag (CODE_32BIT, 1);
14331   else if (!strcmp (default_arch, "iamcu"))
14332     {
14333       update_code_flag (CODE_32BIT, 1);
14334       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
14335         {
14336           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
14337           cpu_arch_name = "iamcu";
14338           free (cpu_sub_arch_name);
14339           cpu_sub_arch_name = NULL;
14340           cpu_arch_flags = iamcu_flags;
14341           cpu_arch_isa = PROCESSOR_IAMCU;
14342           cpu_arch_isa_flags = iamcu_flags;
14343           if (!cpu_arch_tune_set)
14344             {
14345               cpu_arch_tune = cpu_arch_isa;
14346               cpu_arch_tune_flags = cpu_arch_isa_flags;
14347             }
14348         }
14349       else if (cpu_arch_isa != PROCESSOR_IAMCU)
14350         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
14351                   cpu_arch_name);
14352     }
14353   else
14354     as_fatal (_("unknown architecture"));
14355
14356   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
14357     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
14358   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
14359     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].enable;
14360
14361   switch (OUTPUT_FLAVOR)
14362     {
14363 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
14364     case bfd_target_aout_flavour:
14365       return AOUT_TARGET_FORMAT;
14366 #endif
14367 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
14368 # if defined (TE_PE) || defined (TE_PEP)
14369     case bfd_target_coff_flavour:
14370       if (flag_code == CODE_64BIT)
14371         {
14372           object_64bit = 1;
14373           return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
14374         }
14375       return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
14376 # elif defined (TE_GO32)
14377     case bfd_target_coff_flavour:
14378       return "coff-go32";
14379 # else
14380     case bfd_target_coff_flavour:
14381       return "coff-i386";
14382 # endif
14383 #endif
14384 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14385     case bfd_target_elf_flavour:
14386       {
14387         const char *format;
14388
14389         switch (x86_elf_abi)
14390           {
14391           default:
14392             format = ELF_TARGET_FORMAT;
14393 #ifndef TE_SOLARIS
14394             tls_get_addr = "___tls_get_addr";
14395 #endif
14396             break;
14397           case X86_64_ABI:
14398             use_rela_relocations = 1;
14399             object_64bit = 1;
14400 #ifndef TE_SOLARIS
14401             tls_get_addr = "__tls_get_addr";
14402 #endif
14403             format = ELF_TARGET_FORMAT64;
14404             break;
14405           case X86_64_X32_ABI:
14406             use_rela_relocations = 1;
14407             object_64bit = 1;
14408 #ifndef TE_SOLARIS
14409             tls_get_addr = "__tls_get_addr";
14410 #endif
14411             disallow_64bit_reloc = 1;
14412             format = ELF_TARGET_FORMAT32;
14413             break;
14414           }
14415         if (cpu_arch_isa == PROCESSOR_IAMCU)
14416           {
14417             if (x86_elf_abi != I386_ABI)
14418               as_fatal (_("Intel MCU is 32bit only"));
14419             return ELF_TARGET_IAMCU_FORMAT;
14420           }
14421         else
14422           return format;
14423       }
14424 #endif
14425 #if defined (OBJ_MACH_O)
14426     case bfd_target_mach_o_flavour:
14427       if (flag_code == CODE_64BIT)
14428         {
14429           use_rela_relocations = 1;
14430           object_64bit = 1;
14431           return "mach-o-x86-64";
14432         }
14433       else
14434         return "mach-o-i386";
14435 #endif
14436     default:
14437       abort ();
14438       return NULL;
14439     }
14440 }
14441
14442 #endif /* OBJ_MAYBE_ more than one  */
14443 \f
14444 symbolS *
14445 md_undefined_symbol (char *name)
14446 {
14447   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
14448       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
14449       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
14450       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
14451     {
14452       if (!GOT_symbol)
14453         {
14454           if (symbol_find (name))
14455             as_bad (_("GOT already in symbol table"));
14456           GOT_symbol = symbol_new (name, undefined_section,
14457                                    &zero_address_frag, 0);
14458         };
14459       return GOT_symbol;
14460     }
14461   return 0;
14462 }
14463
14464 /* Round up a section size to the appropriate boundary.  */
14465
14466 valueT
14467 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
14468 {
14469 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
14470   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
14471     {
14472       /* For a.out, force the section size to be aligned.  If we don't do
14473          this, BFD will align it for us, but it will not write out the
14474          final bytes of the section.  This may be a bug in BFD, but it is
14475          easier to fix it here since that is how the other a.out targets
14476          work.  */
14477       int align;
14478
14479       align = bfd_section_alignment (segment);
14480       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
14481     }
14482 #endif
14483
14484   return size;
14485 }
14486
14487 /* On the i386, PC-relative offsets are relative to the start of the
14488    next instruction.  That is, the address of the offset, plus its
14489    size, since the offset is always the last part of the insn.  */
14490
14491 long
14492 md_pcrel_from (fixS *fixP)
14493 {
14494   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
14495 }
14496
14497 #ifndef I386COFF
14498
14499 static void
14500 s_bss (int ignore ATTRIBUTE_UNUSED)
14501 {
14502   int temp;
14503
14504 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14505   if (IS_ELF)
14506     obj_elf_section_change_hook ();
14507 #endif
14508   temp = get_absolute_expression ();
14509   subseg_set (bss_section, (subsegT) temp);
14510   demand_empty_rest_of_line ();
14511 }
14512
14513 #endif
14514
14515 /* Remember constant directive.  */
14516
14517 void
14518 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
14519 {
14520   if (last_insn.kind != last_insn_directive
14521       && (bfd_section_flags (now_seg) & SEC_CODE))
14522     {
14523       last_insn.seg = now_seg;
14524       last_insn.kind = last_insn_directive;
14525       last_insn.name = "constant directive";
14526       last_insn.file = as_where (&last_insn.line);
14527       if (lfence_before_ret != lfence_before_ret_none)
14528         {
14529           if (lfence_before_indirect_branch != lfence_branch_none)
14530             as_warn (_("constant directive skips -mlfence-before-ret "
14531                        "and -mlfence-before-indirect-branch"));
14532           else
14533             as_warn (_("constant directive skips -mlfence-before-ret"));
14534         }
14535       else if (lfence_before_indirect_branch != lfence_branch_none)
14536         as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
14537     }
14538 }
14539
14540 int
14541 i386_validate_fix (fixS *fixp)
14542 {
14543   if (fixp->fx_addsy && S_GET_SEGMENT(fixp->fx_addsy) == reg_section)
14544     {
14545       reloc_howto_type *howto;
14546
14547       howto = bfd_reloc_type_lookup (stdoutput, fixp->fx_r_type);
14548       as_bad_where (fixp->fx_file, fixp->fx_line,
14549                     _("invalid %s relocation against register"),
14550                     howto ? howto->name : "<unknown>");
14551       return 0;
14552     }
14553
14554 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14555   if (fixp->fx_r_type == BFD_RELOC_SIZE32
14556       || fixp->fx_r_type == BFD_RELOC_SIZE64)
14557     return IS_ELF && fixp->fx_addsy
14558            && (!S_IS_DEFINED (fixp->fx_addsy)
14559                || S_IS_EXTERNAL (fixp->fx_addsy));
14560 #endif
14561
14562   if (fixp->fx_subsy)
14563     {
14564       if (fixp->fx_subsy == GOT_symbol)
14565         {
14566           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
14567             {
14568               if (!object_64bit)
14569                 abort ();
14570 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14571               if (fixp->fx_tcbit2)
14572                 fixp->fx_r_type = (fixp->fx_tcbit
14573                                    ? BFD_RELOC_X86_64_REX_GOTPCRELX
14574                                    : BFD_RELOC_X86_64_GOTPCRELX);
14575               else
14576 #endif
14577                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
14578             }
14579           else
14580             {
14581               if (!object_64bit)
14582                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
14583               else
14584                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
14585             }
14586           fixp->fx_subsy = 0;
14587         }
14588     }
14589 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14590   else
14591     {
14592       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
14593          to section.  Since PLT32 relocation must be against symbols,
14594          turn such PLT32 relocation into PC32 relocation.  */
14595       if (fixp->fx_addsy
14596           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
14597               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
14598           && symbol_section_p (fixp->fx_addsy))
14599         fixp->fx_r_type = BFD_RELOC_32_PCREL;
14600       if (!object_64bit)
14601         {
14602           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
14603               && fixp->fx_tcbit2)
14604             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
14605         }
14606     }
14607 #endif
14608
14609   return 1;
14610 }
14611
14612 arelent *
14613 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
14614 {
14615   arelent *rel;
14616   bfd_reloc_code_real_type code;
14617
14618   switch (fixp->fx_r_type)
14619     {
14620 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14621       symbolS *sym;
14622
14623     case BFD_RELOC_SIZE32:
14624     case BFD_RELOC_SIZE64:
14625       if (fixp->fx_addsy
14626           && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))
14627           && (!fixp->fx_subsy
14628               || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))))
14629         sym = fixp->fx_addsy;
14630       else if (fixp->fx_subsy
14631                && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))
14632                && (!fixp->fx_addsy
14633                    || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))))
14634         sym = fixp->fx_subsy;
14635       else
14636         sym = NULL;
14637       if (IS_ELF && sym && S_IS_DEFINED (sym) && !S_IS_EXTERNAL (sym))
14638         {
14639           /* Resolve size relocation against local symbol to size of
14640              the symbol plus addend.  */
14641           valueT value = S_GET_SIZE (sym);
14642
14643           if (symbol_get_bfdsym (sym)->flags & BSF_SECTION_SYM)
14644             value = bfd_section_size (S_GET_SEGMENT (sym));
14645           if (sym == fixp->fx_subsy)
14646             {
14647               value = -value;
14648               if (fixp->fx_addsy)
14649                 value += S_GET_VALUE (fixp->fx_addsy);
14650             }
14651           else if (fixp->fx_subsy)
14652             value -= S_GET_VALUE (fixp->fx_subsy);
14653           value += fixp->fx_offset;
14654           if (fixp->fx_r_type == BFD_RELOC_SIZE32
14655               && object_64bit
14656               && !fits_in_unsigned_long (value))
14657             as_bad_where (fixp->fx_file, fixp->fx_line,
14658                           _("symbol size computation overflow"));
14659           fixp->fx_addsy = NULL;
14660           fixp->fx_subsy = NULL;
14661           md_apply_fix (fixp, (valueT *) &value, NULL);
14662           return NULL;
14663         }
14664       if (!fixp->fx_addsy || fixp->fx_subsy)
14665         {
14666           as_bad_where (fixp->fx_file, fixp->fx_line,
14667                         "unsupported expression involving @size");
14668           return NULL;
14669         }
14670 #endif
14671       /* Fall through.  */
14672
14673     case BFD_RELOC_X86_64_PLT32:
14674     case BFD_RELOC_X86_64_GOT32:
14675     case BFD_RELOC_X86_64_GOTPCREL:
14676     case BFD_RELOC_X86_64_GOTPCRELX:
14677     case BFD_RELOC_X86_64_REX_GOTPCRELX:
14678     case BFD_RELOC_386_PLT32:
14679     case BFD_RELOC_386_GOT32:
14680     case BFD_RELOC_386_GOT32X:
14681     case BFD_RELOC_386_GOTOFF:
14682     case BFD_RELOC_386_GOTPC:
14683     case BFD_RELOC_386_TLS_GD:
14684     case BFD_RELOC_386_TLS_LDM:
14685     case BFD_RELOC_386_TLS_LDO_32:
14686     case BFD_RELOC_386_TLS_IE_32:
14687     case BFD_RELOC_386_TLS_IE:
14688     case BFD_RELOC_386_TLS_GOTIE:
14689     case BFD_RELOC_386_TLS_LE_32:
14690     case BFD_RELOC_386_TLS_LE:
14691     case BFD_RELOC_386_TLS_GOTDESC:
14692     case BFD_RELOC_386_TLS_DESC_CALL:
14693     case BFD_RELOC_X86_64_TLSGD:
14694     case BFD_RELOC_X86_64_TLSLD:
14695     case BFD_RELOC_X86_64_DTPOFF32:
14696     case BFD_RELOC_X86_64_DTPOFF64:
14697     case BFD_RELOC_X86_64_GOTTPOFF:
14698     case BFD_RELOC_X86_64_TPOFF32:
14699     case BFD_RELOC_X86_64_TPOFF64:
14700     case BFD_RELOC_X86_64_GOTOFF64:
14701     case BFD_RELOC_X86_64_GOTPC32:
14702     case BFD_RELOC_X86_64_GOT64:
14703     case BFD_RELOC_X86_64_GOTPCREL64:
14704     case BFD_RELOC_X86_64_GOTPC64:
14705     case BFD_RELOC_X86_64_GOTPLT64:
14706     case BFD_RELOC_X86_64_PLTOFF64:
14707     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14708     case BFD_RELOC_X86_64_TLSDESC_CALL:
14709     case BFD_RELOC_RVA:
14710     case BFD_RELOC_VTABLE_ENTRY:
14711     case BFD_RELOC_VTABLE_INHERIT:
14712 #ifdef TE_PE
14713     case BFD_RELOC_32_SECREL:
14714     case BFD_RELOC_16_SECIDX:
14715 #endif
14716       code = fixp->fx_r_type;
14717       break;
14718     case BFD_RELOC_X86_64_32S:
14719       if (!fixp->fx_pcrel)
14720         {
14721           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
14722           code = fixp->fx_r_type;
14723           break;
14724         }
14725       /* Fall through.  */
14726     default:
14727       if (fixp->fx_pcrel)
14728         {
14729           switch (fixp->fx_size)
14730             {
14731             default:
14732               as_bad_where (fixp->fx_file, fixp->fx_line,
14733                             _("can not do %d byte pc-relative relocation"),
14734                             fixp->fx_size);
14735               code = BFD_RELOC_32_PCREL;
14736               break;
14737             case 1: code = BFD_RELOC_8_PCREL;  break;
14738             case 2: code = BFD_RELOC_16_PCREL; break;
14739             case 4: code = BFD_RELOC_32_PCREL; break;
14740 #ifdef BFD64
14741             case 8: code = BFD_RELOC_64_PCREL; break;
14742 #endif
14743             }
14744         }
14745       else
14746         {
14747           switch (fixp->fx_size)
14748             {
14749             default:
14750               as_bad_where (fixp->fx_file, fixp->fx_line,
14751                             _("can not do %d byte relocation"),
14752                             fixp->fx_size);
14753               code = BFD_RELOC_32;
14754               break;
14755             case 1: code = BFD_RELOC_8;  break;
14756             case 2: code = BFD_RELOC_16; break;
14757             case 4: code = BFD_RELOC_32; break;
14758 #ifdef BFD64
14759             case 8: code = BFD_RELOC_64; break;
14760 #endif
14761             }
14762         }
14763       break;
14764     }
14765
14766   if ((code == BFD_RELOC_32
14767        || code == BFD_RELOC_32_PCREL
14768        || code == BFD_RELOC_X86_64_32S)
14769       && GOT_symbol
14770       && fixp->fx_addsy == GOT_symbol)
14771     {
14772       if (!object_64bit)
14773         code = BFD_RELOC_386_GOTPC;
14774       else
14775         code = BFD_RELOC_X86_64_GOTPC32;
14776     }
14777   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
14778       && GOT_symbol
14779       && fixp->fx_addsy == GOT_symbol)
14780     {
14781       code = BFD_RELOC_X86_64_GOTPC64;
14782     }
14783
14784   rel = XNEW (arelent);
14785   rel->sym_ptr_ptr = XNEW (asymbol *);
14786   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
14787
14788   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
14789
14790   if (!use_rela_relocations)
14791     {
14792       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
14793          vtable entry to be used in the relocation's section offset.  */
14794       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
14795         rel->address = fixp->fx_offset;
14796 #if defined (OBJ_COFF) && defined (TE_PE)
14797       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
14798         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
14799       else
14800 #endif
14801       rel->addend = 0;
14802     }
14803   /* Use the rela in 64bit mode.  */
14804   else
14805     {
14806       if (disallow_64bit_reloc)
14807         switch (code)
14808           {
14809           case BFD_RELOC_X86_64_DTPOFF64:
14810           case BFD_RELOC_X86_64_TPOFF64:
14811           case BFD_RELOC_64_PCREL:
14812           case BFD_RELOC_X86_64_GOTOFF64:
14813           case BFD_RELOC_X86_64_GOT64:
14814           case BFD_RELOC_X86_64_GOTPCREL64:
14815           case BFD_RELOC_X86_64_GOTPC64:
14816           case BFD_RELOC_X86_64_GOTPLT64:
14817           case BFD_RELOC_X86_64_PLTOFF64:
14818             as_bad_where (fixp->fx_file, fixp->fx_line,
14819                           _("cannot represent relocation type %s in x32 mode"),
14820                           bfd_get_reloc_code_name (code));
14821             break;
14822           default:
14823             break;
14824           }
14825
14826       if (!fixp->fx_pcrel)
14827         rel->addend = fixp->fx_offset;
14828       else
14829         switch (code)
14830           {
14831           case BFD_RELOC_X86_64_PLT32:
14832           case BFD_RELOC_X86_64_GOT32:
14833           case BFD_RELOC_X86_64_GOTPCREL:
14834           case BFD_RELOC_X86_64_GOTPCRELX:
14835           case BFD_RELOC_X86_64_REX_GOTPCRELX:
14836           case BFD_RELOC_X86_64_TLSGD:
14837           case BFD_RELOC_X86_64_TLSLD:
14838           case BFD_RELOC_X86_64_GOTTPOFF:
14839           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14840           case BFD_RELOC_X86_64_TLSDESC_CALL:
14841             rel->addend = fixp->fx_offset - fixp->fx_size;
14842             break;
14843           default:
14844             rel->addend = (section->vma
14845                            - fixp->fx_size
14846                            + fixp->fx_addnumber
14847                            + md_pcrel_from (fixp));
14848             break;
14849           }
14850     }
14851
14852   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
14853   if (rel->howto == NULL)
14854     {
14855       as_bad_where (fixp->fx_file, fixp->fx_line,
14856                     _("cannot represent relocation type %s"),
14857                     bfd_get_reloc_code_name (code));
14858       /* Set howto to a garbage value so that we can keep going.  */
14859       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
14860       gas_assert (rel->howto != NULL);
14861     }
14862
14863   return rel;
14864 }
14865
14866 #include "tc-i386-intel.c"
14867
14868 void
14869 tc_x86_parse_to_dw2regnum (expressionS *exp)
14870 {
14871   int saved_naked_reg;
14872   char saved_register_dot;
14873
14874   saved_naked_reg = allow_naked_reg;
14875   allow_naked_reg = 1;
14876   saved_register_dot = register_chars['.'];
14877   register_chars['.'] = '.';
14878   allow_pseudo_reg = 1;
14879   expression_and_evaluate (exp);
14880   allow_pseudo_reg = 0;
14881   register_chars['.'] = saved_register_dot;
14882   allow_naked_reg = saved_naked_reg;
14883
14884   if (exp->X_op == O_register && exp->X_add_number >= 0)
14885     {
14886       if ((addressT) exp->X_add_number < i386_regtab_size)
14887         {
14888           exp->X_op = O_constant;
14889           exp->X_add_number = i386_regtab[exp->X_add_number]
14890                               .dw2_regnum[flag_code >> 1];
14891         }
14892       else
14893         exp->X_op = O_illegal;
14894     }
14895 }
14896
14897 void
14898 tc_x86_frame_initial_instructions (void)
14899 {
14900   static unsigned int sp_regno[2];
14901
14902   if (!sp_regno[flag_code >> 1])
14903     {
14904       char *saved_input = input_line_pointer;
14905       char sp[][4] = {"esp", "rsp"};
14906       expressionS exp;
14907
14908       input_line_pointer = sp[flag_code >> 1];
14909       tc_x86_parse_to_dw2regnum (&exp);
14910       gas_assert (exp.X_op == O_constant);
14911       sp_regno[flag_code >> 1] = exp.X_add_number;
14912       input_line_pointer = saved_input;
14913     }
14914
14915   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
14916   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
14917 }
14918
14919 int
14920 x86_dwarf2_addr_size (void)
14921 {
14922 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14923   if (x86_elf_abi == X86_64_X32_ABI)
14924     return 4;
14925 #endif
14926   return bfd_arch_bits_per_address (stdoutput) / 8;
14927 }
14928
14929 int
14930 i386_elf_section_type (const char *str, size_t len)
14931 {
14932   if (flag_code == CODE_64BIT
14933       && len == sizeof ("unwind") - 1
14934       && startswith (str, "unwind"))
14935     return SHT_X86_64_UNWIND;
14936
14937   return -1;
14938 }
14939
14940 #ifdef TE_SOLARIS
14941 void
14942 i386_solaris_fix_up_eh_frame (segT sec)
14943 {
14944   if (flag_code == CODE_64BIT)
14945     elf_section_type (sec) = SHT_X86_64_UNWIND;
14946 }
14947 #endif
14948
14949 #ifdef TE_PE
14950 void
14951 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
14952 {
14953   expressionS exp;
14954
14955   exp.X_op = O_secrel;
14956   exp.X_add_symbol = symbol;
14957   exp.X_add_number = 0;
14958   emit_expr (&exp, size);
14959 }
14960 #endif
14961
14962 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14963 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
14964
14965 bfd_vma
14966 x86_64_section_letter (int letter, const char **ptr_msg)
14967 {
14968   if (flag_code == CODE_64BIT)
14969     {
14970       if (letter == 'l')
14971         return SHF_X86_64_LARGE;
14972
14973       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
14974     }
14975   else
14976     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
14977   return -1;
14978 }
14979
14980 bfd_vma
14981 x86_64_section_word (char *str, size_t len)
14982 {
14983   if (len == 5 && flag_code == CODE_64BIT && startswith (str, "large"))
14984     return SHF_X86_64_LARGE;
14985
14986   return -1;
14987 }
14988
14989 static void
14990 handle_large_common (int small ATTRIBUTE_UNUSED)
14991 {
14992   if (flag_code != CODE_64BIT)
14993     {
14994       s_comm_internal (0, elf_common_parse);
14995       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
14996     }
14997   else
14998     {
14999       static segT lbss_section;
15000       asection *saved_com_section_ptr = elf_com_section_ptr;
15001       asection *saved_bss_section = bss_section;
15002
15003       if (lbss_section == NULL)
15004         {
15005           flagword applicable;
15006           segT seg = now_seg;
15007           subsegT subseg = now_subseg;
15008
15009           /* The .lbss section is for local .largecomm symbols.  */
15010           lbss_section = subseg_new (".lbss", 0);
15011           applicable = bfd_applicable_section_flags (stdoutput);
15012           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
15013           seg_info (lbss_section)->bss = 1;
15014
15015           subseg_set (seg, subseg);
15016         }
15017
15018       elf_com_section_ptr = &_bfd_elf_large_com_section;
15019       bss_section = lbss_section;
15020
15021       s_comm_internal (0, elf_common_parse);
15022
15023       elf_com_section_ptr = saved_com_section_ptr;
15024       bss_section = saved_bss_section;
15025     }
15026 }
15027 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */