gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2022 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
  23    x86_64 support by Jan Hubicka (jh@suse.cz)
  24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "gen-sframe.h"
  34 #include "sframe.h"
  35 #include "elf/x86-64.h"
  36 #include "opcodes/i386-init.h"
  37 #include <limits.h>
  38
  39 #ifndef INFER_ADDR_PREFIX
  40 #define INFER_ADDR_PREFIX 1
  41 #endif
  42
  43 #ifndef DEFAULT_ARCH
  44 #define DEFAULT_ARCH "i386"
  45 #endif
  46
  47 #ifndef INLINE
  48 #if __GNUC__ >= 2
  49 #define INLINE __inline__
  50 #else
  51 #define INLINE
  52 #endif
  53 #endif
  54
  55 /* Prefixes will be emitted in the order defined below.
  56    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  57    instruction, and so must come before any prefixes.
  58    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  59    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  60 #define WAIT_PREFIX     0
  61 #define SEG_PREFIX      1
  62 #define ADDR_PREFIX     2
  63 #define DATA_PREFIX     3
  64 #define REP_PREFIX      4
  65 #define HLE_PREFIX      REP_PREFIX
  66 #define BND_PREFIX      REP_PREFIX
  67 #define LOCK_PREFIX     5
  68 #define REX_PREFIX      6       /* must come last.  */
  69 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  70
  71 /* we define the syntax here (modulo base,index,scale syntax) */
  72 #define REGISTER_PREFIX '%'
  73 #define IMMEDIATE_PREFIX '$'
  74 #define ABSOLUTE_PREFIX '*'
  75
  76 /* these are the instruction mnemonic suffixes in AT&T syntax or
  77    memory operand size in Intel syntax.  */
  78 #define WORD_MNEM_SUFFIX  'w'
  79 #define BYTE_MNEM_SUFFIX  'b'
  80 #define SHORT_MNEM_SUFFIX 's'
  81 #define LONG_MNEM_SUFFIX  'l'
  82 #define QWORD_MNEM_SUFFIX  'q'
  83
  84 #define END_OF_INSN '\0'
  85
  86 /* This matches the C -> StaticRounding alias in the opcode table.  */
  87 #define commutative staticrounding
  88
  89 /*
  90   'templates' is for grouping together 'template' structures for opcodes
  91   of the same name.  This is only used for storing the insns in the grand
  92   ole hash table of insns.
  93   The templates themselves start at START and range up to (but not including)
  94   END.
  95   */
  96 typedef struct
  97 {
  98   const insn_template *start;
  99   const insn_template *end;
 100 }
 101 templates;
 102
 103 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 104 typedef struct
 105 {
 106   unsigned int regmem;  /* codes register or memory operand */
 107   unsigned int reg;     /* codes register operand (or extended opcode) */
 108   unsigned int mode;    /* how to interpret regmem & reg */
 109 }
 110 modrm_byte;
 111
 112 /* x86-64 extension prefix.  */
 113 typedef int rex_byte;
 114
 115 /* 386 opcode byte to code indirect addressing.  */
 116 typedef struct
 117 {
 118   unsigned base;
 119   unsigned index;
 120   unsigned scale;
 121 }
 122 sib_byte;
 123
 124 /* x86 arch names, types and features */
 125 typedef struct
 126 {
 127   const char *name;             /* arch name */
 128   unsigned int len:8;           /* arch string length */
 129   bool skip:1;                  /* show_arch should skip this. */
 130   enum processor_type type;     /* arch type */
 131   i386_cpu_flags enable;                /* cpu feature enable flags */
 132   i386_cpu_flags disable;       /* cpu feature disable flags */
 133 }
 134 arch_entry;
 135
 136 static void update_code_flag (int, int);
 137 static void set_code_flag (int);
 138 static void set_16bit_gcc_code_flag (int);
 139 static void set_intel_syntax (int);
 140 static void set_intel_mnemonic (int);
 141 static void set_allow_index_reg (int);
 142 static void set_check (int);
 143 static void set_cpu_arch (int);
 144 #ifdef TE_PE
 145 static void pe_directive_secrel (int);
 146 static void pe_directive_secidx (int);
 147 #endif
 148 static void signed_cons (int);
 149 static char *output_invalid (int c);
 150 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 151                                     const char *);
 152 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 153                                        const char *);
 154 static int i386_att_operand (char *);
 155 static int i386_intel_operand (char *, int);
 156 static int i386_intel_simplify (expressionS *);
 157 static int i386_intel_parse_name (const char *, expressionS *);
 158 static const reg_entry *parse_register (char *, char **);
 159 static char *parse_insn (char *, char *);
 160 static char *parse_operands (char *, const char *);
 161 static void swap_operands (void);
 162 static void swap_2_operands (unsigned int, unsigned int);
 163 static enum flag_code i386_addressing_mode (void);
 164 static void optimize_imm (void);
 165 static void optimize_disp (void);
 166 static const insn_template *match_template (char);
 167 static int check_string (void);
 168 static int process_suffix (void);
 169 static int check_byte_reg (void);
 170 static int check_long_reg (void);
 171 static int check_qword_reg (void);
 172 static int check_word_reg (void);
 173 static int finalize_imm (void);
 174 static int process_operands (void);
 175 static const reg_entry *build_modrm_byte (void);
 176 static void output_insn (void);
 177 static void output_imm (fragS *, offsetT);
 178 static void output_disp (fragS *, offsetT);
 179 #ifndef I386COFF
 180 static void s_bss (int);
 181 #endif
 182 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 183 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 184
 185 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 186 static unsigned int x86_isa_1_used;
 187 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 188 static unsigned int x86_feature_2_used;
 189 /* Generate x86 used ISA and feature properties.  */
 190 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 191 #endif
 192
 193 static const char *default_arch = DEFAULT_ARCH;
 194
 195 /* parse_register() returns this when a register alias cannot be used.  */
 196 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 197                                    { Dw2Inval, Dw2Inval } };
 198
 199 static const reg_entry *reg_eax;
 200 static const reg_entry *reg_ds;
 201 static const reg_entry *reg_es;
 202 static const reg_entry *reg_ss;
 203 static const reg_entry *reg_st0;
 204 static const reg_entry *reg_k0;
 205
 206 /* VEX prefix.  */
 207 typedef struct
 208 {
 209   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 210   unsigned char bytes[4];
 211   unsigned int length;
 212   /* Destination or source register specifier.  */
 213   const reg_entry *register_specifier;
 214 } vex_prefix;
 215
 216 /* 'md_assemble ()' gathers together information and puts it into a
 217    i386_insn.  */
 218
 219 union i386_op
 220   {
 221     expressionS *disps;
 222     expressionS *imms;
 223     const reg_entry *regs;
 224   };
 225
 226 enum i386_error
 227   {
 228     no_error, /* Must be first.  */
 229     operand_size_mismatch,
 230     operand_type_mismatch,
 231     register_type_mismatch,
 232     number_of_operands_mismatch,
 233     invalid_instruction_suffix,
 234     bad_imm4,
 235     unsupported_with_intel_mnemonic,
 236     unsupported_syntax,
 237     unsupported,
 238     invalid_sib_address,
 239     invalid_vsib_address,
 240     invalid_vector_register_set,
 241     invalid_tmm_register_set,
 242     invalid_dest_and_src_register_set,
 243     unsupported_vector_index_register,
 244     unsupported_broadcast,
 245     broadcast_needed,
 246     unsupported_masking,
 247     mask_not_on_destination,
 248     no_default_mask,
 249     unsupported_rc_sae,
 250     invalid_register_operand,
 251   };
 252
 253 struct _i386_insn
 254   {
 255     /* TM holds the template for the insn were currently assembling.  */
 256     insn_template tm;
 257
 258     /* SUFFIX holds the instruction size suffix for byte, word, dword
 259        or qword, if given.  */
 260     char suffix;
 261
 262     /* OPCODE_LENGTH holds the number of base opcode bytes.  */
 263     unsigned char opcode_length;
 264
 265     /* OPERANDS gives the number of given operands.  */
 266     unsigned int operands;
 267
 268     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 269        of given register, displacement, memory operands and immediate
 270        operands.  */
 271     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 272
 273     /* TYPES [i] is the type (see above #defines) which tells us how to
 274        use OP[i] for the corresponding operand.  */
 275     i386_operand_type types[MAX_OPERANDS];
 276
 277     /* Displacement expression, immediate expression, or register for each
 278        operand.  */
 279     union i386_op op[MAX_OPERANDS];
 280
 281     /* Flags for operands.  */
 282     unsigned int flags[MAX_OPERANDS];
 283 #define Operand_PCrel 1
 284 #define Operand_Mem   2
 285
 286     /* Relocation type for operand */
 287     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 288
 289     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 290        the base index byte below.  */
 291     const reg_entry *base_reg;
 292     const reg_entry *index_reg;
 293     unsigned int log2_scale_factor;
 294
 295     /* SEG gives the seg_entries of this insn.  They are zero unless
 296        explicit segment overrides are given.  */
 297     const reg_entry *seg[2];
 298
 299     /* Copied first memory operand string, for re-checking.  */
 300     char *memop1_string;
 301
 302     /* PREFIX holds all the given prefix opcodes (usually null).
 303        PREFIXES is the number of prefix opcodes.  */
 304     unsigned int prefixes;
 305     unsigned char prefix[MAX_PREFIXES];
 306
 307     /* Register is in low 3 bits of opcode.  */
 308     bool short_form;
 309
 310     /* The operand to a branch insn indicates an absolute branch.  */
 311     bool jumpabsolute;
 312
 313     /* The operand to a branch insn indicates a far branch.  */
 314     bool far_branch;
 315
 316     /* There is a memory operand of (%dx) which should be only used
 317        with input/output instructions.  */
 318     bool input_output_operand;
 319
 320     /* Extended states.  */
 321     enum
 322       {
 323         /* Use MMX state.  */
 324         xstate_mmx = 1 << 0,
 325         /* Use XMM state.  */
 326         xstate_xmm = 1 << 1,
 327         /* Use YMM state.  */
 328         xstate_ymm = 1 << 2 | xstate_xmm,
 329         /* Use ZMM state.  */
 330         xstate_zmm = 1 << 3 | xstate_ymm,
 331         /* Use TMM state.  */
 332         xstate_tmm = 1 << 4,
 333         /* Use MASK state.  */
 334         xstate_mask = 1 << 5
 335       } xstate;
 336
 337     /* Has GOTPC or TLS relocation.  */
 338     bool has_gotpc_tls_reloc;
 339
 340     /* RM and SIB are the modrm byte and the sib byte where the
 341        addressing modes of this insn are encoded.  */
 342     modrm_byte rm;
 343     rex_byte rex;
 344     rex_byte vrex;
 345     sib_byte sib;
 346     vex_prefix vex;
 347
 348     /* Masking attributes.
 349
 350        The struct describes masking, applied to OPERAND in the instruction.
 351        REG is a pointer to the corresponding mask register.  ZEROING tells
 352        whether merging or zeroing mask is used.  */
 353     struct Mask_Operation
 354     {
 355       const reg_entry *reg;
 356       unsigned int zeroing;
 357       /* The operand where this operation is associated.  */
 358       unsigned int operand;
 359     } mask;
 360
 361     /* Rounding control and SAE attributes.  */
 362     struct RC_Operation
 363     {
 364       enum rc_type
 365         {
 366           rc_none = -1,
 367           rne,
 368           rd,
 369           ru,
 370           rz,
 371           saeonly
 372         } type;
 373       /* In Intel syntax the operand modifier form is supposed to be used, but
 374          we continue to accept the immediate forms as well.  */
 375       bool modifier;
 376     } rounding;
 377
 378     /* Broadcasting attributes.
 379
 380        The struct describes broadcasting, applied to OPERAND.  TYPE is
 381        expresses the broadcast factor.  */
 382     struct Broadcast_Operation
 383     {
 384       /* Type of broadcast: {1to2}, {1to4}, {1to8}, {1to16} or {1to32}.  */
 385       unsigned int type;
 386
 387       /* Index of broadcasted operand.  */
 388       unsigned int operand;
 389
 390       /* Number of bytes to broadcast.  */
 391       unsigned int bytes;
 392     } broadcast;
 393
 394     /* Compressed disp8*N attribute.  */
 395     unsigned int memshift;
 396
 397     /* Prefer load or store in encoding.  */
 398     enum
 399       {
 400         dir_encoding_default = 0,
 401         dir_encoding_load,
 402         dir_encoding_store,
 403         dir_encoding_swap
 404       } dir_encoding;
 405
 406     /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 407     enum
 408       {
 409         disp_encoding_default = 0,
 410         disp_encoding_8bit,
 411         disp_encoding_16bit,
 412         disp_encoding_32bit
 413       } disp_encoding;
 414
 415     /* Prefer the REX byte in encoding.  */
 416     bool rex_encoding;
 417
 418     /* Disable instruction size optimization.  */
 419     bool no_optimize;
 420
 421     /* How to encode vector instructions.  */
 422     enum
 423       {
 424         vex_encoding_default = 0,
 425         vex_encoding_vex,
 426         vex_encoding_vex3,
 427         vex_encoding_evex,
 428         vex_encoding_error
 429       } vec_encoding;
 430
 431     /* REP prefix.  */
 432     const char *rep_prefix;
 433
 434     /* HLE prefix.  */
 435     const char *hle_prefix;
 436
 437     /* Have BND prefix.  */
 438     const char *bnd_prefix;
 439
 440     /* Have NOTRACK prefix.  */
 441     const char *notrack_prefix;
 442
 443     /* Error message.  */
 444     enum i386_error error;
 445   };
 446
 447 typedef struct _i386_insn i386_insn;
 448
 449 /* Link RC type with corresponding string, that'll be looked for in
 450    asm.  */
 451 struct RC_name
 452 {
 453   enum rc_type type;
 454   const char *name;
 455   unsigned int len;
 456 };
 457
 458 static const struct RC_name RC_NamesTable[] =
 459 {
 460   {  rne, STRING_COMMA_LEN ("rn-sae") },
 461   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 462   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 463   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 464   {  saeonly,  STRING_COMMA_LEN ("sae") },
 465 };
 466
 467 /* To be indexed by segment register number.  */
 468 static const unsigned char i386_seg_prefixes[] = {
 469   ES_PREFIX_OPCODE,
 470   CS_PREFIX_OPCODE,
 471   SS_PREFIX_OPCODE,
 472   DS_PREFIX_OPCODE,
 473   FS_PREFIX_OPCODE,
 474   GS_PREFIX_OPCODE
 475 };
 476
 477 /* List of chars besides those in app.c:symbol_chars that can start an
 478    operand.  Used to prevent the scrubber eating vital white-space.  */
 479 const char extra_symbol_chars[] = "*%-([{}"
 480 #ifdef LEX_AT
 481         "@"
 482 #endif
 483 #ifdef LEX_QM
 484         "?"
 485 #endif
 486         ;
 487
 488 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))     \
 489      && !defined (TE_GNU)                               \
 490      && !defined (TE_LINUX)                             \
 491      && !defined (TE_Haiku)                             \
 492      && !defined (TE_FreeBSD)                           \
 493      && !defined (TE_DragonFly)                         \
 494      && !defined (TE_NetBSD))
 495 /* This array holds the chars that always start a comment.  If the
 496    pre-processor is disabled, these aren't very useful.  The option
 497    --divide will remove '/' from this list.  */
 498 const char *i386_comment_chars = "#/";
 499 #define SVR4_COMMENT_CHARS 1
 500 #define PREFIX_SEPARATOR '\\'
 501
 502 #else
 503 const char *i386_comment_chars = "#";
 504 #define PREFIX_SEPARATOR '/'
 505 #endif
 506
 507 /* This array holds the chars that only start a comment at the beginning of
 508    a line.  If the line seems to have the form '# 123 filename'
 509    .line and .file directives will appear in the pre-processed output.
 510    Note that input_file.c hand checks for '#' at the beginning of the
 511    first line of the input file.  This is because the compiler outputs
 512    #NO_APP at the beginning of its output.
 513    Also note that comments started like this one will always work if
 514    '/' isn't otherwise defined.  */
 515 const char line_comment_chars[] = "#/";
 516
 517 const char line_separator_chars[] = ";";
 518
 519 /* Chars that can be used to separate mant from exp in floating point
 520    nums.  */
 521 const char EXP_CHARS[] = "eE";
 522
 523 /* Chars that mean this number is a floating point constant
 524    As in 0f12.456
 525    or    0d1.2345e12.  */
 526 const char FLT_CHARS[] = "fFdDxXhHbB";
 527
 528 /* Tables for lexical analysis.  */
 529 static char mnemonic_chars[256];
 530 static char register_chars[256];
 531 static char operand_chars[256];
 532 static char identifier_chars[256];
 533
 534 /* Lexical macros.  */
 535 #define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
 536 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 537 #define is_register_char(x) (register_chars[(unsigned char) x])
 538 #define is_space_char(x) ((x) == ' ')
 539 #define is_identifier_char(x) (identifier_chars[(unsigned char) x])
 540
 541 /* All non-digit non-letter characters that may occur in an operand.  */
 542 static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!:[@]";
 543
 544 /* md_assemble() always leaves the strings it's passed unaltered.  To
 545    effect this we maintain a stack of saved characters that we've smashed
 546    with '\0's (indicating end of strings for various sub-fields of the
 547    assembler instruction).  */
 548 static char save_stack[32];
 549 static char *save_stack_p;
 550 #define END_STRING_AND_SAVE(s) \
 551         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 552 #define RESTORE_END_STRING(s) \
 553         do { *(s) = *--save_stack_p; } while (0)
 554
 555 /* The instruction we're assembling.  */
 556 static i386_insn i;
 557
 558 /* Possible templates for current insn.  */
 559 static const templates *current_templates;
 560
 561 /* Per instruction expressionS buffers: max displacements & immediates.  */
 562 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 563 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 564
 565 /* Current operand we are working on.  */
 566 static int this_operand = -1;
 567
 568 /* We support four different modes.  FLAG_CODE variable is used to distinguish
 569    these.  */
 570
 571 enum flag_code {
 572         CODE_32BIT,
 573         CODE_16BIT,
 574         CODE_64BIT };
 575
 576 static enum flag_code flag_code;
 577 static unsigned int object_64bit;
 578 static unsigned int disallow_64bit_reloc;
 579 static int use_rela_relocations = 0;
 580 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 581 static const char *tls_get_addr;
 582
 583 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
 584      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
 585      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
 586
 587 /* The ELF ABI to use.  */
 588 enum x86_elf_abi
 589 {
 590   I386_ABI,
 591   X86_64_ABI,
 592   X86_64_X32_ABI
 593 };
 594
 595 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 596 #endif
 597
 598 #if defined (TE_PE) || defined (TE_PEP)
 599 /* Use big object file format.  */
 600 static int use_big_obj = 0;
 601 #endif
 602
 603 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 604 /* 1 if generating code for a shared library.  */
 605 static int shared = 0;
 606
 607 unsigned int x86_sframe_cfa_sp_reg;
 608 /* The other CFA base register for SFrame unwind info.  */
 609 unsigned int x86_sframe_cfa_fp_reg;
 610 unsigned int x86_sframe_cfa_ra_reg;
 611
 612 #endif
 613
 614 /* 1 for intel syntax,
 615    0 if att syntax.  */
 616 static int intel_syntax = 0;
 617
 618 static enum x86_64_isa
 619 {
 620   amd64 = 1,    /* AMD64 ISA.  */
 621   intel64       /* Intel64 ISA.  */
 622 } isa64;
 623
 624 /* 1 for intel mnemonic,
 625    0 if att mnemonic.  */
 626 static int intel_mnemonic = !SYSV386_COMPAT;
 627
 628 /* 1 if pseudo registers are permitted.  */
 629 static int allow_pseudo_reg = 0;
 630
 631 /* 1 if register prefix % not required.  */
 632 static int allow_naked_reg = 0;
 633
 634 /* 1 if the assembler should add BND prefix for all control-transferring
 635    instructions supporting it, even if this prefix wasn't specified
 636    explicitly.  */
 637 static int add_bnd_prefix = 0;
 638
 639 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 640 static int allow_index_reg = 0;
 641
 642 /* 1 if the assembler should ignore LOCK prefix, even if it was
 643    specified explicitly.  */
 644 static int omit_lock_prefix = 0;
 645
 646 /* 1 if the assembler should encode lfence, mfence, and sfence as
 647    "lock addl $0, (%{re}sp)".  */
 648 static int avoid_fence = 0;
 649
 650 /* 1 if lfence should be inserted after every load.  */
 651 static int lfence_after_load = 0;
 652
 653 /* Non-zero if lfence should be inserted before indirect branch.  */
 654 static enum lfence_before_indirect_branch_kind
 655   {
 656     lfence_branch_none = 0,
 657     lfence_branch_register,
 658     lfence_branch_memory,
 659     lfence_branch_all
 660   }
 661 lfence_before_indirect_branch;
 662
 663 /* Non-zero if lfence should be inserted before ret.  */
 664 static enum lfence_before_ret_kind
 665   {
 666     lfence_before_ret_none = 0,
 667     lfence_before_ret_not,
 668     lfence_before_ret_or,
 669     lfence_before_ret_shl
 670   }
 671 lfence_before_ret;
 672
 673 /* Types of previous instruction is .byte or prefix.  */
 674 static struct
 675   {
 676     segT seg;
 677     const char *file;
 678     const char *name;
 679     unsigned int line;
 680     enum last_insn_kind
 681       {
 682         last_insn_other = 0,
 683         last_insn_directive,
 684         last_insn_prefix
 685       } kind;
 686   } last_insn;
 687
 688 /* 1 if the assembler should generate relax relocations.  */
 689
 690 static int generate_relax_relocations
 691   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 692
 693 static enum check_kind
 694   {
 695     check_none = 0,
 696     check_warning,
 697     check_error
 698   }
 699 sse_check, operand_check = check_warning;
 700
 701 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 702 static int align_branch_power = 0;
 703
 704 /* Types of branches to align.  */
 705 enum align_branch_kind
 706   {
 707     align_branch_none = 0,
 708     align_branch_jcc = 1,
 709     align_branch_fused = 2,
 710     align_branch_jmp = 3,
 711     align_branch_call = 4,
 712     align_branch_indirect = 5,
 713     align_branch_ret = 6
 714   };
 715
 716 /* Type bits of branches to align.  */
 717 enum align_branch_bit
 718   {
 719     align_branch_jcc_bit = 1 << align_branch_jcc,
 720     align_branch_fused_bit = 1 << align_branch_fused,
 721     align_branch_jmp_bit = 1 << align_branch_jmp,
 722     align_branch_call_bit = 1 << align_branch_call,
 723     align_branch_indirect_bit = 1 << align_branch_indirect,
 724     align_branch_ret_bit = 1 << align_branch_ret
 725   };
 726
 727 static unsigned int align_branch = (align_branch_jcc_bit
 728                                     | align_branch_fused_bit
 729                                     | align_branch_jmp_bit);
 730
 731 /* Types of condition jump used by macro-fusion.  */
 732 enum mf_jcc_kind
 733   {
 734     mf_jcc_jo = 0,  /* base opcode 0x70  */
 735     mf_jcc_jc,      /* base opcode 0x72  */
 736     mf_jcc_je,      /* base opcode 0x74  */
 737     mf_jcc_jna,     /* base opcode 0x76  */
 738     mf_jcc_js,      /* base opcode 0x78  */
 739     mf_jcc_jp,      /* base opcode 0x7a  */
 740     mf_jcc_jl,      /* base opcode 0x7c  */
 741     mf_jcc_jle,     /* base opcode 0x7e  */
 742   };
 743
 744 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 745 enum mf_cmp_kind
 746   {
 747     mf_cmp_test_and,  /* test/cmp */
 748     mf_cmp_alu_cmp,  /* add/sub/cmp */
 749     mf_cmp_incdec  /* inc/dec */
 750   };
 751
 752 /* The maximum padding size for fused jcc.  CMP like instruction can
 753    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 754    prefixes.   */
 755 #define MAX_FUSED_JCC_PADDING_SIZE 20
 756
 757 /* The maximum number of prefixes added for an instruction.  */
 758 static unsigned int align_branch_prefix_size = 5;
 759
 760 /* Optimization:
 761    1. Clear the REX_W bit with register operand if possible.
 762    2. Above plus use 128bit vector instruction to clear the full vector
 763       register.
 764  */
 765 static int optimize = 0;
 766
 767 /* Optimization:
 768    1. Clear the REX_W bit with register operand if possible.
 769    2. Above plus use 128bit vector instruction to clear the full vector
 770       register.
 771    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 772       "testb $imm7,%r8".
 773  */
 774 static int optimize_for_space = 0;
 775
 776 /* Register prefix used for error message.  */
 777 static const char *register_prefix = "%";
 778
 779 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 780    leave, push, and pop instructions so that gcc has the same stack
 781    frame as in 32 bit mode.  */
 782 static char stackop_size = '\0';
 783
 784 /* Non-zero to optimize code alignment.  */
 785 int optimize_align_code = 1;
 786
 787 /* Non-zero to quieten some warnings.  */
 788 static int quiet_warnings = 0;
 789
 790 /* Guard to avoid repeated warnings about non-16-bit code on 16-bit CPUs.  */
 791 static bool pre_386_16bit_warned;
 792
 793 /* CPU name.  */
 794 static const char *cpu_arch_name = NULL;
 795 static char *cpu_sub_arch_name = NULL;
 796
 797 /* CPU feature flags.  */
 798 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 799
 800 /* If we have selected a cpu we are generating instructions for.  */
 801 static int cpu_arch_tune_set = 0;
 802
 803 /* Cpu we are generating instructions for.  */
 804 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 805
 806 /* CPU feature flags of cpu we are generating instructions for.  */
 807 static i386_cpu_flags cpu_arch_tune_flags;
 808
 809 /* CPU instruction set architecture used.  */
 810 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 811
 812 /* CPU feature flags of instruction set architecture used.  */
 813 i386_cpu_flags cpu_arch_isa_flags;
 814
 815 /* If set, conditional jumps are not automatically promoted to handle
 816    larger than a byte offset.  */
 817 static bool no_cond_jump_promotion = false;
 818
 819 /* Encode SSE instructions with VEX prefix.  */
 820 static unsigned int sse2avx;
 821
 822 /* Encode aligned vector move as unaligned vector move.  */
 823 static unsigned int use_unaligned_vector_move;
 824
 825 /* Encode scalar AVX instructions with specific vector length.  */
 826 static enum
 827   {
 828     vex128 = 0,
 829     vex256
 830   } avxscalar;
 831
 832 /* Encode VEX WIG instructions with specific vex.w.  */
 833 static enum
 834   {
 835     vexw0 = 0,
 836     vexw1
 837   } vexwig;
 838
 839 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 840 static enum
 841   {
 842     evexl128 = 0,
 843     evexl256,
 844     evexl512
 845   } evexlig;
 846
 847 /* Encode EVEX WIG instructions with specific evex.w.  */
 848 static enum
 849   {
 850     evexw0 = 0,
 851     evexw1
 852   } evexwig;
 853
 854 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 855 static enum rc_type evexrcig = rne;
 856
 857 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 858 static symbolS *GOT_symbol;
 859
 860 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 861 unsigned int x86_dwarf2_return_column;
 862
 863 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 864 int x86_cie_data_alignment;
 865
 866 /* Interface to relax_segment.
 867    There are 3 major relax states for 386 jump insns because the
 868    different types of jumps add different sizes to frags when we're
 869    figuring out what sort of jump to choose to reach a given label.
 870
 871    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 872    branches which are handled by md_estimate_size_before_relax() and
 873    i386_generic_table_relax_frag().  */
 874
 875 /* Types.  */
 876 #define UNCOND_JUMP 0
 877 #define COND_JUMP 1
 878 #define COND_JUMP86 2
 879 #define BRANCH_PADDING 3
 880 #define BRANCH_PREFIX 4
 881 #define FUSED_JCC_PADDING 5
 882
 883 /* Sizes.  */
 884 #define CODE16  1
 885 #define SMALL   0
 886 #define SMALL16 (SMALL | CODE16)
 887 #define BIG     2
 888 #define BIG16   (BIG | CODE16)
 889
 890 #ifndef INLINE
 891 #ifdef __GNUC__
 892 #define INLINE __inline__
 893 #else
 894 #define INLINE
 895 #endif
 896 #endif
 897
 898 #define ENCODE_RELAX_STATE(type, size) \
 899   ((relax_substateT) (((type) << 2) | (size)))
 900 #define TYPE_FROM_RELAX_STATE(s) \
 901   ((s) >> 2)
 902 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 903     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 904
 905 /* This table is used by relax_frag to promote short jumps to long
 906    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 907    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 908    don't allow a short jump in a 32 bit code segment to be promoted to
 909    a 16 bit offset jump because it's slower (requires data size
 910    prefix), and doesn't work, unless the destination is in the bottom
 911    64k of the code segment (The top 16 bits of eip are zeroed).  */
 912
 913 const relax_typeS md_relax_table[] =
 914 {
 915   /* The fields are:
 916      1) most positive reach of this state,
 917      2) most negative reach of this state,
 918      3) how many bytes this mode will have in the variable part of the frag
 919      4) which index into the table to try if we can't fit into this one.  */
 920
 921   /* UNCOND_JUMP states.  */
 922   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 923   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 924   /* dword jmp adds 4 bytes to frag:
 925      0 extra opcode bytes, 4 displacement bytes.  */
 926   {0, 0, 4, 0},
 927   /* word jmp adds 2 byte2 to frag:
 928      0 extra opcode bytes, 2 displacement bytes.  */
 929   {0, 0, 2, 0},
 930
 931   /* COND_JUMP states.  */
 932   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 933   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 934   /* dword conditionals adds 5 bytes to frag:
 935      1 extra opcode byte, 4 displacement bytes.  */
 936   {0, 0, 5, 0},
 937   /* word conditionals add 3 bytes to frag:
 938      1 extra opcode byte, 2 displacement bytes.  */
 939   {0, 0, 3, 0},
 940
 941   /* COND_JUMP86 states.  */
 942   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 943   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 944   /* dword conditionals adds 5 bytes to frag:
 945      1 extra opcode byte, 4 displacement bytes.  */
 946   {0, 0, 5, 0},
 947   /* word conditionals add 4 bytes to frag:
 948      1 displacement byte and a 3 byte long branch insn.  */
 949   {0, 0, 4, 0}
 950 };
 951
 952 #define ARCH(n, t, f, s) \
 953   { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, CPU_ ## f ## _FLAGS, \
 954     CPU_NONE_FLAGS }
 955 #define SUBARCH(n, e, d, s) \
 956   { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, CPU_ ## e ## _FLAGS, \
 957     CPU_ ## d ## _FLAGS }
 958
 959 static const arch_entry cpu_arch[] =
 960 {
 961   /* Do not replace the first two entries - i386_target_format() and
 962      set_cpu_arch() rely on them being there in this order.  */
 963   ARCH (generic32, GENERIC32, GENERIC32, false),
 964   ARCH (generic64, GENERIC64, GENERIC64, false),
 965   ARCH (i8086, UNKNOWN, NONE, false),
 966   ARCH (i186, UNKNOWN, I186, false),
 967   ARCH (i286, UNKNOWN, I286, false),
 968   ARCH (i386, I386, I386, false),
 969   ARCH (i486, I486, I486, false),
 970   ARCH (i586, PENTIUM, I586, false),
 971   ARCH (i686, PENTIUMPRO, I686, false),
 972   ARCH (pentium, PENTIUM, I586, false),
 973   ARCH (pentiumpro, PENTIUMPRO, PENTIUMPRO, false),
 974   ARCH (pentiumii, PENTIUMPRO, P2, false),
 975   ARCH (pentiumiii, PENTIUMPRO, P3, false),
 976   ARCH (pentium4, PENTIUM4, P4, false),
 977   ARCH (prescott, NOCONA, CORE, false),
 978   ARCH (nocona, NOCONA, NOCONA, false),
 979   ARCH (yonah, CORE, CORE, true),
 980   ARCH (core, CORE, CORE, false),
 981   ARCH (merom, CORE2, CORE2, true),
 982   ARCH (core2, CORE2, CORE2, false),
 983   ARCH (corei7, COREI7, COREI7, false),
 984   ARCH (iamcu, IAMCU, IAMCU, false),
 985   ARCH (k6, K6, K6, false),
 986   ARCH (k6_2, K6, K6_2, false),
 987   ARCH (athlon, ATHLON, ATHLON, false),
 988   ARCH (sledgehammer, K8, K8, true),
 989   ARCH (opteron, K8, K8, false),
 990   ARCH (k8, K8, K8, false),
 991   ARCH (amdfam10, AMDFAM10, AMDFAM10, false),
 992   ARCH (bdver1, BD, BDVER1, false),
 993   ARCH (bdver2, BD, BDVER2, false),
 994   ARCH (bdver3, BD, BDVER3, false),
 995   ARCH (bdver4, BD, BDVER4, false),
 996   ARCH (znver1, ZNVER, ZNVER1, false),
 997   ARCH (znver2, ZNVER, ZNVER2, false),
 998   ARCH (znver3, ZNVER, ZNVER3, false),
 999   ARCH (znver4, ZNVER, ZNVER4, false),
1000   ARCH (btver1, BT, BTVER1, false),
1001   ARCH (btver2, BT, BTVER2, false),
1002
1003   SUBARCH (8087, 8087, ANY_X87, false),
1004   SUBARCH (87, NONE, ANY_X87, false), /* Disable only!  */
1005   SUBARCH (287, 287, ANY_287, false),
1006   SUBARCH (387, 387, ANY_387, false),
1007   SUBARCH (687, 687, ANY_687, false),
1008   SUBARCH (cmov, CMOV, ANY_CMOV, false),
1009   SUBARCH (fxsr, FXSR, ANY_FXSR, false),
1010   SUBARCH (mmx, MMX, ANY_MMX, false),
1011   SUBARCH (sse, SSE, ANY_SSE, false),
1012   SUBARCH (sse2, SSE2, ANY_SSE2, false),
1013   SUBARCH (sse3, SSE3, ANY_SSE3, false),
1014   SUBARCH (sse4a, SSE4A, ANY_SSE4A, false),
1015   SUBARCH (ssse3, SSSE3, ANY_SSSE3, false),
1016   SUBARCH (sse4.1, SSE4_1, ANY_SSE4_1, false),
1017   SUBARCH (sse4.2, SSE4_2, ANY_SSE4_2, false),
1018   SUBARCH (sse4, SSE4_2, ANY_SSE4_1, false),
1019   SUBARCH (avx, AVX, ANY_AVX, false),
1020   SUBARCH (avx2, AVX2, ANY_AVX2, false),
1021   SUBARCH (avx512f, AVX512F, ANY_AVX512F, false),
1022   SUBARCH (avx512cd, AVX512CD, ANY_AVX512CD, false),
1023   SUBARCH (avx512er, AVX512ER, ANY_AVX512ER, false),
1024   SUBARCH (avx512pf, AVX512PF, ANY_AVX512PF, false),
1025   SUBARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, false),
1026   SUBARCH (avx512bw, AVX512BW, ANY_AVX512BW, false),
1027   SUBARCH (avx512vl, AVX512VL, ANY_AVX512VL, false),
1028   SUBARCH (vmx, VMX, VMX, false),
1029   SUBARCH (vmfunc, VMFUNC, VMFUNC, false),
1030   SUBARCH (smx, SMX, SMX, false),
1031   SUBARCH (xsave, XSAVE, XSAVE, false),
1032   SUBARCH (xsaveopt, XSAVEOPT, XSAVEOPT, false),
1033   SUBARCH (xsavec, XSAVEC, XSAVEC, false),
1034   SUBARCH (xsaves, XSAVES, XSAVES, false),
1035   SUBARCH (aes, AES, AES, false),
1036   SUBARCH (pclmul, PCLMUL, PCLMUL, false),
1037   SUBARCH (clmul, PCLMUL, PCLMUL, true),
1038   SUBARCH (fsgsbase, FSGSBASE, FSGSBASE, false),
1039   SUBARCH (rdrnd, RDRND, RDRND, false),
1040   SUBARCH (f16c, F16C, F16C, false),
1041   SUBARCH (bmi2, BMI2, BMI2, false),
1042   SUBARCH (fma, FMA, FMA, false),
1043   SUBARCH (fma4, FMA4, FMA4, false),
1044   SUBARCH (xop, XOP, XOP, false),
1045   SUBARCH (lwp, LWP, LWP, false),
1046   SUBARCH (movbe, MOVBE, MOVBE, false),
1047   SUBARCH (cx16, CX16, CX16, false),
1048   SUBARCH (ept, EPT, EPT, false),
1049   SUBARCH (lzcnt, LZCNT, LZCNT, false),
1050   SUBARCH (popcnt, POPCNT, POPCNT, false),
1051   SUBARCH (hle, HLE, HLE, false),
1052   SUBARCH (rtm, RTM, RTM, false),
1053   SUBARCH (invpcid, INVPCID, INVPCID, false),
1054   SUBARCH (clflush, CLFLUSH, CLFLUSH, false),
1055   SUBARCH (nop, NOP, NOP, false),
1056   SUBARCH (syscall, SYSCALL, SYSCALL, false),
1057   SUBARCH (rdtscp, RDTSCP, RDTSCP, false),
1058   SUBARCH (3dnow, 3DNOW, 3DNOW, false),
1059   SUBARCH (3dnowa, 3DNOWA, 3DNOWA, false),
1060   SUBARCH (padlock, PADLOCK, PADLOCK, false),
1061   SUBARCH (pacifica, SVME, SVME, true),
1062   SUBARCH (svme, SVME, SVME, false),
1063   SUBARCH (abm, ABM, ABM, false),
1064   SUBARCH (bmi, BMI, BMI, false),
1065   SUBARCH (tbm, TBM, TBM, false),
1066   SUBARCH (adx, ADX, ADX, false),
1067   SUBARCH (rdseed, RDSEED, RDSEED, false),
1068   SUBARCH (prfchw, PRFCHW, PRFCHW, false),
1069   SUBARCH (smap, SMAP, SMAP, false),
1070   SUBARCH (mpx, MPX, MPX, false),
1071   SUBARCH (sha, SHA, SHA, false),
1072   SUBARCH (clflushopt, CLFLUSHOPT, CLFLUSHOPT, false),
1073   SUBARCH (prefetchwt1, PREFETCHWT1, PREFETCHWT1, false),
1074   SUBARCH (se1, SE1, SE1, false),
1075   SUBARCH (clwb, CLWB, CLWB, false),
1076   SUBARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, false),
1077   SUBARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, false),
1078   SUBARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, false),
1079   SUBARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, false),
1080   SUBARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, false),
1081   SUBARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, false),
1082   SUBARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, false),
1083   SUBARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, false),
1084   SUBARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, false),
1085   SUBARCH (clzero, CLZERO, CLZERO, false),
1086   SUBARCH (mwaitx, MWAITX, MWAITX, false),
1087   SUBARCH (ospke, OSPKE, OSPKE, false),
1088   SUBARCH (rdpid, RDPID, RDPID, false),
1089   SUBARCH (ptwrite, PTWRITE, PTWRITE, false),
1090   SUBARCH (ibt, IBT, ANY_IBT, false),
1091   SUBARCH (shstk, SHSTK, ANY_SHSTK, false),
1092   SUBARCH (gfni, GFNI, GFNI, false),
1093   SUBARCH (vaes, VAES, VAES, false),
1094   SUBARCH (vpclmulqdq, VPCLMULQDQ, VPCLMULQDQ, false),
1095   SUBARCH (wbnoinvd, WBNOINVD, WBNOINVD, false),
1096   SUBARCH (pconfig, PCONFIG, PCONFIG, false),
1097   SUBARCH (waitpkg, WAITPKG, WAITPKG, false),
1098   SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
1099   SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
1100   SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
1101   SUBARCH (amx_fp16, AMX_FP16, AMX_FP16, false),
1102   SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
1103   SUBARCH (movdiri, MOVDIRI, ANY_MOVDIRI, false),
1104   SUBARCH (movdir64b, MOVDIR64B, ANY_MOVDIR64B, false),
1105   SUBARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, false),
1106   SUBARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
1107            ANY_AVX512_VP2INTERSECT, false),
1108   SUBARCH (tdx, TDX, ANY_TDX, false),
1109   SUBARCH (enqcmd, ENQCMD, ANY_ENQCMD, false),
1110   SUBARCH (serialize, SERIALIZE, ANY_SERIALIZE, false),
1111   SUBARCH (rdpru, RDPRU, RDPRU, false),
1112   SUBARCH (mcommit, MCOMMIT, MCOMMIT, false),
1113   SUBARCH (sev_es, SEV_ES, SEV_ES, false),
1114   SUBARCH (tsxldtrk, TSXLDTRK, ANY_TSXLDTRK, false),
1115   SUBARCH (kl, KL, ANY_KL, false),
1116   SUBARCH (widekl, WIDEKL, ANY_WIDEKL, false),
1117   SUBARCH (uintr, UINTR, ANY_UINTR, false),
1118   SUBARCH (hreset, HRESET, ANY_HRESET, false),
1119   SUBARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, false),
1120   SUBARCH (prefetchi, PREFETCHI, PREFETCHI, false),
1121   SUBARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, false),
1122   SUBARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, false),
1123   SUBARCH (cmpccxadd, CMPCCXADD, ANY_CMPCCXADD, false),
1124   SUBARCH (wrmsrns, WRMSRNS, ANY_WRMSRNS, false),
1125   SUBARCH (msrlist, MSRLIST, ANY_MSRLIST, false),
1126   SUBARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, false),
1127   SUBARCH (rao_int, RAO_INT, ANY_RAO_INT, false),
1128   SUBARCH (rmpquery, RMPQUERY, RMPQUERY, false),
1129 };
1130
1131 #undef SUBARCH
1132 #undef ARCH
1133
1134 #ifdef I386COFF
1135 /* Like s_lcomm_internal in gas/read.c but the alignment string
1136    is allowed to be optional.  */
1137
1138 static symbolS *
1139 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1140 {
1141   addressT align = 0;
1142
1143   SKIP_WHITESPACE ();
1144
1145   if (needs_align
1146       && *input_line_pointer == ',')
1147     {
1148       align = parse_align (needs_align - 1);
1149
1150       if (align == (addressT) -1)
1151         return NULL;
1152     }
1153   else
1154     {
1155       if (size >= 8)
1156         align = 3;
1157       else if (size >= 4)
1158         align = 2;
1159       else if (size >= 2)
1160         align = 1;
1161       else
1162         align = 0;
1163     }
1164
1165   bss_alloc (symbolP, size, align);
1166   return symbolP;
1167 }
1168
1169 static void
1170 pe_lcomm (int needs_align)
1171 {
1172   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1173 }
1174 #endif
1175
1176 const pseudo_typeS md_pseudo_table[] =
1177 {
1178 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1179   {"align", s_align_bytes, 0},
1180 #else
1181   {"align", s_align_ptwo, 0},
1182 #endif
1183   {"arch", set_cpu_arch, 0},
1184 #ifndef I386COFF
1185   {"bss", s_bss, 0},
1186 #else
1187   {"lcomm", pe_lcomm, 1},
1188 #endif
1189   {"ffloat", float_cons, 'f'},
1190   {"dfloat", float_cons, 'd'},
1191   {"tfloat", float_cons, 'x'},
1192   {"hfloat", float_cons, 'h'},
1193   {"bfloat16", float_cons, 'b'},
1194   {"value", cons, 2},
1195   {"slong", signed_cons, 4},
1196   {"noopt", s_ignore, 0},
1197   {"optim", s_ignore, 0},
1198   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1199   {"code16", set_code_flag, CODE_16BIT},
1200   {"code32", set_code_flag, CODE_32BIT},
1201 #ifdef BFD64
1202   {"code64", set_code_flag, CODE_64BIT},
1203 #endif
1204   {"intel_syntax", set_intel_syntax, 1},
1205   {"att_syntax", set_intel_syntax, 0},
1206   {"intel_mnemonic", set_intel_mnemonic, 1},
1207   {"att_mnemonic", set_intel_mnemonic, 0},
1208   {"allow_index_reg", set_allow_index_reg, 1},
1209   {"disallow_index_reg", set_allow_index_reg, 0},
1210   {"sse_check", set_check, 0},
1211   {"operand_check", set_check, 1},
1212 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1213   {"largecomm", handle_large_common, 0},
1214 #else
1215   {"file", dwarf2_directive_file, 0},
1216   {"loc", dwarf2_directive_loc, 0},
1217   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1218 #endif
1219 #ifdef TE_PE
1220   {"secrel32", pe_directive_secrel, 0},
1221   {"secidx", pe_directive_secidx, 0},
1222 #endif
1223   {0, 0, 0}
1224 };
1225
1226 /* For interface with expression ().  */
1227 extern char *input_line_pointer;
1228
1229 /* Hash table for instruction mnemonic lookup.  */
1230 static htab_t op_hash;
1231
1232 /* Hash table for register lookup.  */
1233 static htab_t reg_hash;
1234 \f
1235   /* Various efficient no-op patterns for aligning code labels.
1236      Note: Don't try to assemble the instructions in the comments.
1237      0L and 0w are not legal.  */
1238 static const unsigned char f32_1[] =
1239   {0x90};                               /* nop                  */
1240 static const unsigned char f32_2[] =
1241   {0x66,0x90};                          /* xchg %ax,%ax         */
1242 static const unsigned char f32_3[] =
1243   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1244 static const unsigned char f32_4[] =
1245   {0x8d,0x74,0x26,0x00};                /* leal 0(%esi,1),%esi  */
1246 static const unsigned char f32_6[] =
1247   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1248 static const unsigned char f32_7[] =
1249   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
1250 static const unsigned char f16_3[] =
1251   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1252 static const unsigned char f16_4[] =
1253   {0x8d,0xb4,0x00,0x00};                /* lea 0W(%si),%si      */
1254 static const unsigned char jump_disp8[] =
1255   {0xeb};                               /* jmp disp8           */
1256 static const unsigned char jump32_disp32[] =
1257   {0xe9};                               /* jmp disp32          */
1258 static const unsigned char jump16_disp32[] =
1259   {0x66,0xe9};                          /* jmp disp32          */
1260 /* 32-bit NOPs patterns.  */
1261 static const unsigned char *const f32_patt[] = {
1262   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1263 };
1264 /* 16-bit NOPs patterns.  */
1265 static const unsigned char *const f16_patt[] = {
1266   f32_1, f32_2, f16_3, f16_4
1267 };
1268 /* nopl (%[re]ax) */
1269 static const unsigned char alt_3[] =
1270   {0x0f,0x1f,0x00};
1271 /* nopl 0(%[re]ax) */
1272 static const unsigned char alt_4[] =
1273   {0x0f,0x1f,0x40,0x00};
1274 /* nopl 0(%[re]ax,%[re]ax,1) */
1275 static const unsigned char alt_5[] =
1276   {0x0f,0x1f,0x44,0x00,0x00};
1277 /* nopw 0(%[re]ax,%[re]ax,1) */
1278 static const unsigned char alt_6[] =
1279   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1280 /* nopl 0L(%[re]ax) */
1281 static const unsigned char alt_7[] =
1282   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1283 /* nopl 0L(%[re]ax,%[re]ax,1) */
1284 static const unsigned char alt_8[] =
1285   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1286 /* nopw 0L(%[re]ax,%[re]ax,1) */
1287 static const unsigned char alt_9[] =
1288   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1289 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1290 static const unsigned char alt_10[] =
1291   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1292 /* data16 nopw %cs:0L(%eax,%eax,1) */
1293 static const unsigned char alt_11[] =
1294   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1295 /* 32-bit and 64-bit NOPs patterns.  */
1296 static const unsigned char *const alt_patt[] = {
1297   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1298   alt_9, alt_10, alt_11
1299 };
1300
1301 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1302    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1303
1304 static void
1305 i386_output_nops (char *where, const unsigned char *const *patt,
1306                   int count, int max_single_nop_size)
1307
1308 {
1309   /* Place the longer NOP first.  */
1310   int last;
1311   int offset;
1312   const unsigned char *nops;
1313
1314   if (max_single_nop_size < 1)
1315     {
1316       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1317                 max_single_nop_size);
1318       return;
1319     }
1320
1321   nops = patt[max_single_nop_size - 1];
1322
1323   /* Use the smaller one if the requsted one isn't available.  */
1324   if (nops == NULL)
1325     {
1326       max_single_nop_size--;
1327       nops = patt[max_single_nop_size - 1];
1328     }
1329
1330   last = count % max_single_nop_size;
1331
1332   count -= last;
1333   for (offset = 0; offset < count; offset += max_single_nop_size)
1334     memcpy (where + offset, nops, max_single_nop_size);
1335
1336   if (last)
1337     {
1338       nops = patt[last - 1];
1339       if (nops == NULL)
1340         {
1341           /* Use the smaller one plus one-byte NOP if the needed one
1342              isn't available.  */
1343           last--;
1344           nops = patt[last - 1];
1345           memcpy (where + offset, nops, last);
1346           where[offset + last] = *patt[0];
1347         }
1348       else
1349         memcpy (where + offset, nops, last);
1350     }
1351 }
1352
1353 static INLINE int
1354 fits_in_imm7 (offsetT num)
1355 {
1356   return (num & 0x7f) == num;
1357 }
1358
1359 static INLINE int
1360 fits_in_imm31 (offsetT num)
1361 {
1362   return (num & 0x7fffffff) == num;
1363 }
1364
1365 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1366    single NOP instruction LIMIT.  */
1367
1368 void
1369 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1370 {
1371   const unsigned char *const *patt = NULL;
1372   int max_single_nop_size;
1373   /* Maximum number of NOPs before switching to jump over NOPs.  */
1374   int max_number_of_nops;
1375
1376   switch (fragP->fr_type)
1377     {
1378     case rs_fill_nop:
1379     case rs_align_code:
1380       break;
1381     case rs_machine_dependent:
1382       /* Allow NOP padding for jumps and calls.  */
1383       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1384           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1385         break;
1386       /* Fall through.  */
1387     default:
1388       return;
1389     }
1390
1391   /* We need to decide which NOP sequence to use for 32bit and
1392      64bit. When -mtune= is used:
1393
1394      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1395      PROCESSOR_GENERIC32, f32_patt will be used.
1396      2. For the rest, alt_patt will be used.
1397
1398      When -mtune= isn't used, alt_patt will be used if
1399      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1400      be used.
1401
1402      When -march= or .arch is used, we can't use anything beyond
1403      cpu_arch_isa_flags.   */
1404
1405   if (flag_code == CODE_16BIT)
1406     {
1407       patt = f16_patt;
1408       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1409       /* Limit number of NOPs to 2 in 16-bit mode.  */
1410       max_number_of_nops = 2;
1411     }
1412   else
1413     {
1414       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1415         {
1416           /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1417           switch (cpu_arch_tune)
1418             {
1419             case PROCESSOR_UNKNOWN:
1420               /* We use cpu_arch_isa_flags to check if we SHOULD
1421                  optimize with nops.  */
1422               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1423                 patt = alt_patt;
1424               else
1425                 patt = f32_patt;
1426               break;
1427             case PROCESSOR_PENTIUM4:
1428             case PROCESSOR_NOCONA:
1429             case PROCESSOR_CORE:
1430             case PROCESSOR_CORE2:
1431             case PROCESSOR_COREI7:
1432             case PROCESSOR_GENERIC64:
1433             case PROCESSOR_K6:
1434             case PROCESSOR_ATHLON:
1435             case PROCESSOR_K8:
1436             case PROCESSOR_AMDFAM10:
1437             case PROCESSOR_BD:
1438             case PROCESSOR_ZNVER:
1439             case PROCESSOR_BT:
1440               patt = alt_patt;
1441               break;
1442             case PROCESSOR_I386:
1443             case PROCESSOR_I486:
1444             case PROCESSOR_PENTIUM:
1445             case PROCESSOR_PENTIUMPRO:
1446             case PROCESSOR_IAMCU:
1447             case PROCESSOR_GENERIC32:
1448               patt = f32_patt;
1449               break;
1450             case PROCESSOR_NONE:
1451               abort ();
1452             }
1453         }
1454       else
1455         {
1456           switch (fragP->tc_frag_data.tune)
1457             {
1458             case PROCESSOR_UNKNOWN:
1459               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1460                  PROCESSOR_UNKNOWN.  */
1461               abort ();
1462               break;
1463
1464             case PROCESSOR_I386:
1465             case PROCESSOR_I486:
1466             case PROCESSOR_PENTIUM:
1467             case PROCESSOR_IAMCU:
1468             case PROCESSOR_K6:
1469             case PROCESSOR_ATHLON:
1470             case PROCESSOR_K8:
1471             case PROCESSOR_AMDFAM10:
1472             case PROCESSOR_BD:
1473             case PROCESSOR_ZNVER:
1474             case PROCESSOR_BT:
1475             case PROCESSOR_GENERIC32:
1476               /* We use cpu_arch_isa_flags to check if we CAN optimize
1477                  with nops.  */
1478               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1479                 patt = alt_patt;
1480               else
1481                 patt = f32_patt;
1482               break;
1483             case PROCESSOR_PENTIUMPRO:
1484             case PROCESSOR_PENTIUM4:
1485             case PROCESSOR_NOCONA:
1486             case PROCESSOR_CORE:
1487             case PROCESSOR_CORE2:
1488             case PROCESSOR_COREI7:
1489               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1490                 patt = alt_patt;
1491               else
1492                 patt = f32_patt;
1493               break;
1494             case PROCESSOR_GENERIC64:
1495               patt = alt_patt;
1496               break;
1497             case PROCESSOR_NONE:
1498               abort ();
1499             }
1500         }
1501
1502       if (patt == f32_patt)
1503         {
1504           max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1505           /* Limit number of NOPs to 2 for older processors.  */
1506           max_number_of_nops = 2;
1507         }
1508       else
1509         {
1510           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1511           /* Limit number of NOPs to 7 for newer processors.  */
1512           max_number_of_nops = 7;
1513         }
1514     }
1515
1516   if (limit == 0)
1517     limit = max_single_nop_size;
1518
1519   if (fragP->fr_type == rs_fill_nop)
1520     {
1521       /* Output NOPs for .nop directive.  */
1522       if (limit > max_single_nop_size)
1523         {
1524           as_bad_where (fragP->fr_file, fragP->fr_line,
1525                         _("invalid single nop size: %d "
1526                           "(expect within [0, %d])"),
1527                         limit, max_single_nop_size);
1528           return;
1529         }
1530     }
1531   else if (fragP->fr_type != rs_machine_dependent)
1532     fragP->fr_var = count;
1533
1534   if ((count / max_single_nop_size) > max_number_of_nops)
1535     {
1536       /* Generate jump over NOPs.  */
1537       offsetT disp = count - 2;
1538       if (fits_in_imm7 (disp))
1539         {
1540           /* Use "jmp disp8" if possible.  */
1541           count = disp;
1542           where[0] = jump_disp8[0];
1543           where[1] = count;
1544           where += 2;
1545         }
1546       else
1547         {
1548           unsigned int size_of_jump;
1549
1550           if (flag_code == CODE_16BIT)
1551             {
1552               where[0] = jump16_disp32[0];
1553               where[1] = jump16_disp32[1];
1554               size_of_jump = 2;
1555             }
1556           else
1557             {
1558               where[0] = jump32_disp32[0];
1559               size_of_jump = 1;
1560             }
1561
1562           count -= size_of_jump + 4;
1563           if (!fits_in_imm31 (count))
1564             {
1565               as_bad_where (fragP->fr_file, fragP->fr_line,
1566                             _("jump over nop padding out of range"));
1567               return;
1568             }
1569
1570           md_number_to_chars (where + size_of_jump, count, 4);
1571           where += size_of_jump + 4;
1572         }
1573     }
1574
1575   /* Generate multiple NOPs.  */
1576   i386_output_nops (where, patt, count, limit);
1577 }
1578
1579 static INLINE int
1580 operand_type_all_zero (const union i386_operand_type *x)
1581 {
1582   switch (ARRAY_SIZE(x->array))
1583     {
1584     case 3:
1585       if (x->array[2])
1586         return 0;
1587       /* Fall through.  */
1588     case 2:
1589       if (x->array[1])
1590         return 0;
1591       /* Fall through.  */
1592     case 1:
1593       return !x->array[0];
1594     default:
1595       abort ();
1596     }
1597 }
1598
1599 static INLINE void
1600 operand_type_set (union i386_operand_type *x, unsigned int v)
1601 {
1602   switch (ARRAY_SIZE(x->array))
1603     {
1604     case 3:
1605       x->array[2] = v;
1606       /* Fall through.  */
1607     case 2:
1608       x->array[1] = v;
1609       /* Fall through.  */
1610     case 1:
1611       x->array[0] = v;
1612       /* Fall through.  */
1613       break;
1614     default:
1615       abort ();
1616     }
1617
1618   x->bitfield.class = ClassNone;
1619   x->bitfield.instance = InstanceNone;
1620 }
1621
1622 static INLINE int
1623 operand_type_equal (const union i386_operand_type *x,
1624                     const union i386_operand_type *y)
1625 {
1626   switch (ARRAY_SIZE(x->array))
1627     {
1628     case 3:
1629       if (x->array[2] != y->array[2])
1630         return 0;
1631       /* Fall through.  */
1632     case 2:
1633       if (x->array[1] != y->array[1])
1634         return 0;
1635       /* Fall through.  */
1636     case 1:
1637       return x->array[0] == y->array[0];
1638       break;
1639     default:
1640       abort ();
1641     }
1642 }
1643
1644 static INLINE int
1645 cpu_flags_all_zero (const union i386_cpu_flags *x)
1646 {
1647   switch (ARRAY_SIZE(x->array))
1648     {
1649     case 5:
1650       if (x->array[4])
1651         return 0;
1652       /* Fall through.  */
1653     case 4:
1654       if (x->array[3])
1655         return 0;
1656       /* Fall through.  */
1657     case 3:
1658       if (x->array[2])
1659         return 0;
1660       /* Fall through.  */
1661     case 2:
1662       if (x->array[1])
1663         return 0;
1664       /* Fall through.  */
1665     case 1:
1666       return !x->array[0];
1667     default:
1668       abort ();
1669     }
1670 }
1671
1672 static INLINE int
1673 cpu_flags_equal (const union i386_cpu_flags *x,
1674                  const union i386_cpu_flags *y)
1675 {
1676   switch (ARRAY_SIZE(x->array))
1677     {
1678     case 5:
1679       if (x->array[4] != y->array[4])
1680         return 0;
1681       /* Fall through.  */
1682     case 4:
1683       if (x->array[3] != y->array[3])
1684         return 0;
1685       /* Fall through.  */
1686     case 3:
1687       if (x->array[2] != y->array[2])
1688         return 0;
1689       /* Fall through.  */
1690     case 2:
1691       if (x->array[1] != y->array[1])
1692         return 0;
1693       /* Fall through.  */
1694     case 1:
1695       return x->array[0] == y->array[0];
1696       break;
1697     default:
1698       abort ();
1699     }
1700 }
1701
1702 static INLINE int
1703 cpu_flags_check_cpu64 (i386_cpu_flags f)
1704 {
1705   return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
1706            || (flag_code != CODE_64BIT && f.bitfield.cpu64));
1707 }
1708
1709 static INLINE i386_cpu_flags
1710 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1711 {
1712   switch (ARRAY_SIZE (x.array))
1713     {
1714     case 5:
1715       x.array [4] &= y.array [4];
1716       /* Fall through.  */
1717     case 4:
1718       x.array [3] &= y.array [3];
1719       /* Fall through.  */
1720     case 3:
1721       x.array [2] &= y.array [2];
1722       /* Fall through.  */
1723     case 2:
1724       x.array [1] &= y.array [1];
1725       /* Fall through.  */
1726     case 1:
1727       x.array [0] &= y.array [0];
1728       break;
1729     default:
1730       abort ();
1731     }
1732   return x;
1733 }
1734
1735 static INLINE i386_cpu_flags
1736 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1737 {
1738   switch (ARRAY_SIZE (x.array))
1739     {
1740     case 5:
1741       x.array [4] |= y.array [4];
1742       /* Fall through.  */
1743     case 4:
1744       x.array [3] |= y.array [3];
1745       /* Fall through.  */
1746     case 3:
1747       x.array [2] |= y.array [2];
1748       /* Fall through.  */
1749     case 2:
1750       x.array [1] |= y.array [1];
1751       /* Fall through.  */
1752     case 1:
1753       x.array [0] |= y.array [0];
1754       break;
1755     default:
1756       abort ();
1757     }
1758   return x;
1759 }
1760
1761 static INLINE i386_cpu_flags
1762 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1763 {
1764   switch (ARRAY_SIZE (x.array))
1765     {
1766     case 5:
1767       x.array [4] &= ~y.array [4];
1768       /* Fall through.  */
1769     case 4:
1770       x.array [3] &= ~y.array [3];
1771       /* Fall through.  */
1772     case 3:
1773       x.array [2] &= ~y.array [2];
1774       /* Fall through.  */
1775     case 2:
1776       x.array [1] &= ~y.array [1];
1777       /* Fall through.  */
1778     case 1:
1779       x.array [0] &= ~y.array [0];
1780       break;
1781     default:
1782       abort ();
1783     }
1784   return x;
1785 }
1786
1787 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1788
1789 #define CPU_FLAGS_ARCH_MATCH            0x1
1790 #define CPU_FLAGS_64BIT_MATCH           0x2
1791
1792 #define CPU_FLAGS_PERFECT_MATCH \
1793   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1794
1795 /* Return CPU flags match bits. */
1796
1797 static int
1798 cpu_flags_match (const insn_template *t)
1799 {
1800   i386_cpu_flags x = t->cpu_flags;
1801   int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
1802
1803   x.bitfield.cpu64 = 0;
1804   x.bitfield.cpuno64 = 0;
1805
1806   if (cpu_flags_all_zero (&x))
1807     {
1808       /* This instruction is available on all archs.  */
1809       match |= CPU_FLAGS_ARCH_MATCH;
1810     }
1811   else
1812     {
1813       /* This instruction is available only on some archs.  */
1814       i386_cpu_flags cpu = cpu_arch_flags;
1815
1816       /* AVX512VL is no standalone feature - match it and then strip it.  */
1817       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1818         return match;
1819       x.bitfield.cpuavx512vl = 0;
1820
1821       /* AVX and AVX2 present at the same time express an operand size
1822          dependency - strip AVX2 for the purposes here.  The operand size
1823          dependent check occurs in check_vecOperands().  */
1824       if (x.bitfield.cpuavx && x.bitfield.cpuavx2)
1825         x.bitfield.cpuavx2 = 0;
1826
1827       cpu = cpu_flags_and (x, cpu);
1828       if (!cpu_flags_all_zero (&cpu))
1829         {
1830           if (x.bitfield.cpuavx)
1831             {
1832               /* We need to check a few extra flags with AVX.  */
1833               if (cpu.bitfield.cpuavx
1834                   && (!t->opcode_modifier.sse2avx
1835                       || (sse2avx && !i.prefix[DATA_PREFIX]))
1836                   && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1837                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1838                   && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
1839                 match |= CPU_FLAGS_ARCH_MATCH;
1840             }
1841           else if (x.bitfield.cpuavx512f)
1842             {
1843               /* We need to check a few extra flags with AVX512F.  */
1844               if (cpu.bitfield.cpuavx512f
1845                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1846                   && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1847                   && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1848                 match |= CPU_FLAGS_ARCH_MATCH;
1849             }
1850           else
1851             match |= CPU_FLAGS_ARCH_MATCH;
1852         }
1853     }
1854   return match;
1855 }
1856
1857 static INLINE i386_operand_type
1858 operand_type_and (i386_operand_type x, i386_operand_type y)
1859 {
1860   if (x.bitfield.class != y.bitfield.class)
1861     x.bitfield.class = ClassNone;
1862   if (x.bitfield.instance != y.bitfield.instance)
1863     x.bitfield.instance = InstanceNone;
1864
1865   switch (ARRAY_SIZE (x.array))
1866     {
1867     case 3:
1868       x.array [2] &= y.array [2];
1869       /* Fall through.  */
1870     case 2:
1871       x.array [1] &= y.array [1];
1872       /* Fall through.  */
1873     case 1:
1874       x.array [0] &= y.array [0];
1875       break;
1876     default:
1877       abort ();
1878     }
1879   return x;
1880 }
1881
1882 static INLINE i386_operand_type
1883 operand_type_and_not (i386_operand_type x, i386_operand_type y)
1884 {
1885   gas_assert (y.bitfield.class == ClassNone);
1886   gas_assert (y.bitfield.instance == InstanceNone);
1887
1888   switch (ARRAY_SIZE (x.array))
1889     {
1890     case 3:
1891       x.array [2] &= ~y.array [2];
1892       /* Fall through.  */
1893     case 2:
1894       x.array [1] &= ~y.array [1];
1895       /* Fall through.  */
1896     case 1:
1897       x.array [0] &= ~y.array [0];
1898       break;
1899     default:
1900       abort ();
1901     }
1902   return x;
1903 }
1904
1905 static INLINE i386_operand_type
1906 operand_type_or (i386_operand_type x, i386_operand_type y)
1907 {
1908   gas_assert (x.bitfield.class == ClassNone ||
1909               y.bitfield.class == ClassNone ||
1910               x.bitfield.class == y.bitfield.class);
1911   gas_assert (x.bitfield.instance == InstanceNone ||
1912               y.bitfield.instance == InstanceNone ||
1913               x.bitfield.instance == y.bitfield.instance);
1914
1915   switch (ARRAY_SIZE (x.array))
1916     {
1917     case 3:
1918       x.array [2] |= y.array [2];
1919       /* Fall through.  */
1920     case 2:
1921       x.array [1] |= y.array [1];
1922       /* Fall through.  */
1923     case 1:
1924       x.array [0] |= y.array [0];
1925       break;
1926     default:
1927       abort ();
1928     }
1929   return x;
1930 }
1931
1932 static INLINE i386_operand_type
1933 operand_type_xor (i386_operand_type x, i386_operand_type y)
1934 {
1935   gas_assert (y.bitfield.class == ClassNone);
1936   gas_assert (y.bitfield.instance == InstanceNone);
1937
1938   switch (ARRAY_SIZE (x.array))
1939     {
1940     case 3:
1941       x.array [2] ^= y.array [2];
1942       /* Fall through.  */
1943     case 2:
1944       x.array [1] ^= y.array [1];
1945       /* Fall through.  */
1946     case 1:
1947       x.array [0] ^= y.array [0];
1948       break;
1949     default:
1950       abort ();
1951     }
1952   return x;
1953 }
1954
1955 static const i386_operand_type disp16_32 = OPERAND_TYPE_DISP16_32;
1956 static const i386_operand_type anydisp = OPERAND_TYPE_ANYDISP;
1957 static const i386_operand_type regxmm = OPERAND_TYPE_REGXMM;
1958 static const i386_operand_type imm16 = OPERAND_TYPE_IMM16;
1959 static const i386_operand_type imm32 = OPERAND_TYPE_IMM32;
1960 static const i386_operand_type imm32s = OPERAND_TYPE_IMM32S;
1961 static const i386_operand_type imm16_32 = OPERAND_TYPE_IMM16_32;
1962 static const i386_operand_type imm16_32s = OPERAND_TYPE_IMM16_32S;
1963 static const i386_operand_type imm16_32_32s = OPERAND_TYPE_IMM16_32_32S;
1964
1965 enum operand_type
1966 {
1967   reg,
1968   imm,
1969   disp,
1970   anymem
1971 };
1972
1973 static INLINE int
1974 operand_type_check (i386_operand_type t, enum operand_type c)
1975 {
1976   switch (c)
1977     {
1978     case reg:
1979       return t.bitfield.class == Reg;
1980
1981     case imm:
1982       return (t.bitfield.imm8
1983               || t.bitfield.imm8s
1984               || t.bitfield.imm16
1985               || t.bitfield.imm32
1986               || t.bitfield.imm32s
1987               || t.bitfield.imm64);
1988
1989     case disp:
1990       return (t.bitfield.disp8
1991               || t.bitfield.disp16
1992               || t.bitfield.disp32
1993               || t.bitfield.disp64);
1994
1995     case anymem:
1996       return (t.bitfield.disp8
1997               || t.bitfield.disp16
1998               || t.bitfield.disp32
1999               || t.bitfield.disp64
2000               || t.bitfield.baseindex);
2001
2002     default:
2003       abort ();
2004     }
2005
2006   return 0;
2007 }
2008
2009 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2010    between operand GIVEN and opeand WANTED for instruction template T.  */
2011
2012 static INLINE int
2013 match_operand_size (const insn_template *t, unsigned int wanted,
2014                     unsigned int given)
2015 {
2016   return !((i.types[given].bitfield.byte
2017             && !t->operand_types[wanted].bitfield.byte)
2018            || (i.types[given].bitfield.word
2019                && !t->operand_types[wanted].bitfield.word)
2020            || (i.types[given].bitfield.dword
2021                && !t->operand_types[wanted].bitfield.dword)
2022            || (i.types[given].bitfield.qword
2023                && !t->operand_types[wanted].bitfield.qword)
2024            || (i.types[given].bitfield.tbyte
2025                && !t->operand_types[wanted].bitfield.tbyte));
2026 }
2027
2028 /* Return 1 if there is no conflict in SIMD register between operand
2029    GIVEN and opeand WANTED for instruction template T.  */
2030
2031 static INLINE int
2032 match_simd_size (const insn_template *t, unsigned int wanted,
2033                  unsigned int given)
2034 {
2035   return !((i.types[given].bitfield.xmmword
2036             && !t->operand_types[wanted].bitfield.xmmword)
2037            || (i.types[given].bitfield.ymmword
2038                && !t->operand_types[wanted].bitfield.ymmword)
2039            || (i.types[given].bitfield.zmmword
2040                && !t->operand_types[wanted].bitfield.zmmword)
2041            || (i.types[given].bitfield.tmmword
2042                && !t->operand_types[wanted].bitfield.tmmword));
2043 }
2044
2045 /* Return 1 if there is no conflict in any size between operand GIVEN
2046    and opeand WANTED for instruction template T.  */
2047
2048 static INLINE int
2049 match_mem_size (const insn_template *t, unsigned int wanted,
2050                 unsigned int given)
2051 {
2052   return (match_operand_size (t, wanted, given)
2053           && !((i.types[given].bitfield.unspecified
2054                 && !i.broadcast.type
2055                 && !i.broadcast.bytes
2056                 && !t->operand_types[wanted].bitfield.unspecified)
2057                || (i.types[given].bitfield.fword
2058                    && !t->operand_types[wanted].bitfield.fword)
2059                /* For scalar opcode templates to allow register and memory
2060                   operands at the same time, some special casing is needed
2061                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2062                   down-conversion vpmov*.  */
2063                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2064                     && t->operand_types[wanted].bitfield.byte
2065                        + t->operand_types[wanted].bitfield.word
2066                        + t->operand_types[wanted].bitfield.dword
2067                        + t->operand_types[wanted].bitfield.qword
2068                        > !!t->opcode_modifier.broadcast)
2069                    ? (i.types[given].bitfield.xmmword
2070                       || i.types[given].bitfield.ymmword
2071                       || i.types[given].bitfield.zmmword)
2072                    : !match_simd_size(t, wanted, given))));
2073 }
2074
2075 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2076    operands for instruction template T, and it has MATCH_REVERSE set if there
2077    is no size conflict on any operands for the template with operands reversed
2078    (and the template allows for reversing in the first place).  */
2079
2080 #define MATCH_STRAIGHT 1
2081 #define MATCH_REVERSE  2
2082
2083 static INLINE unsigned int
2084 operand_size_match (const insn_template *t)
2085 {
2086   unsigned int j, match = MATCH_STRAIGHT;
2087
2088   /* Don't check non-absolute jump instructions.  */
2089   if (t->opcode_modifier.jump
2090       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2091     return match;
2092
2093   /* Check memory and accumulator operand size.  */
2094   for (j = 0; j < i.operands; j++)
2095     {
2096       if (i.types[j].bitfield.class != Reg
2097           && i.types[j].bitfield.class != RegSIMD
2098           && t->opcode_modifier.operandconstraint == ANY_SIZE)
2099         continue;
2100
2101       if (t->operand_types[j].bitfield.class == Reg
2102           && !match_operand_size (t, j, j))
2103         {
2104           match = 0;
2105           break;
2106         }
2107
2108       if (t->operand_types[j].bitfield.class == RegSIMD
2109           && !match_simd_size (t, j, j))
2110         {
2111           match = 0;
2112           break;
2113         }
2114
2115       if (t->operand_types[j].bitfield.instance == Accum
2116           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2117         {
2118           match = 0;
2119           break;
2120         }
2121
2122       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2123         {
2124           match = 0;
2125           break;
2126         }
2127     }
2128
2129   if (!t->opcode_modifier.d)
2130     return match;
2131
2132   /* Check reverse.  */
2133   gas_assert ((i.operands >= 2 && i.operands <= 3)
2134               || t->opcode_modifier.vexsources);
2135
2136   for (j = 0; j < i.operands; j++)
2137     {
2138       unsigned int given = i.operands - j - 1;
2139
2140       /* For 4- and 5-operand insns VEX.W controls just the first two
2141          register operands.  */
2142       if (t->opcode_modifier.vexsources)
2143         given = j < 2 ? 1 - j : j;
2144
2145       if (t->operand_types[j].bitfield.class == Reg
2146           && !match_operand_size (t, j, given))
2147         return match;
2148
2149       if (t->operand_types[j].bitfield.class == RegSIMD
2150           && !match_simd_size (t, j, given))
2151         return match;
2152
2153       if (t->operand_types[j].bitfield.instance == Accum
2154           && (!match_operand_size (t, j, given)
2155               || !match_simd_size (t, j, given)))
2156         return match;
2157
2158       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2159         return match;
2160     }
2161
2162   return match | MATCH_REVERSE;
2163 }
2164
2165 static INLINE int
2166 operand_type_match (i386_operand_type overlap,
2167                     i386_operand_type given)
2168 {
2169   i386_operand_type temp = overlap;
2170
2171   temp.bitfield.unspecified = 0;
2172   temp.bitfield.byte = 0;
2173   temp.bitfield.word = 0;
2174   temp.bitfield.dword = 0;
2175   temp.bitfield.fword = 0;
2176   temp.bitfield.qword = 0;
2177   temp.bitfield.tbyte = 0;
2178   temp.bitfield.xmmword = 0;
2179   temp.bitfield.ymmword = 0;
2180   temp.bitfield.zmmword = 0;
2181   temp.bitfield.tmmword = 0;
2182   if (operand_type_all_zero (&temp))
2183     goto mismatch;
2184
2185   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2186     return 1;
2187
2188  mismatch:
2189   i.error = operand_type_mismatch;
2190   return 0;
2191 }
2192
2193 /* If given types g0 and g1 are registers they must be of the same type
2194    unless the expected operand type register overlap is null.
2195    Intel syntax sized memory operands are also checked here.  */
2196
2197 static INLINE int
2198 operand_type_register_match (i386_operand_type g0,
2199                              i386_operand_type t0,
2200                              i386_operand_type g1,
2201                              i386_operand_type t1)
2202 {
2203   if (g0.bitfield.class != Reg
2204       && g0.bitfield.class != RegSIMD
2205       && (g0.bitfield.unspecified
2206           || !operand_type_check (g0, anymem)))
2207     return 1;
2208
2209   if (g1.bitfield.class != Reg
2210       && g1.bitfield.class != RegSIMD
2211       && (g1.bitfield.unspecified
2212           || !operand_type_check (g1, anymem)))
2213     return 1;
2214
2215   if (g0.bitfield.byte == g1.bitfield.byte
2216       && g0.bitfield.word == g1.bitfield.word
2217       && g0.bitfield.dword == g1.bitfield.dword
2218       && g0.bitfield.qword == g1.bitfield.qword
2219       && g0.bitfield.xmmword == g1.bitfield.xmmword
2220       && g0.bitfield.ymmword == g1.bitfield.ymmword
2221       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2222     return 1;
2223
2224   /* If expectations overlap in no more than a single size, all is fine. */
2225   g0 = operand_type_and (t0, t1);
2226   if (g0.bitfield.byte
2227       + g0.bitfield.word
2228       + g0.bitfield.dword
2229       + g0.bitfield.qword
2230       + g0.bitfield.xmmword
2231       + g0.bitfield.ymmword
2232       + g0.bitfield.zmmword <= 1)
2233     return 1;
2234
2235   i.error = register_type_mismatch;
2236
2237   return 0;
2238 }
2239
2240 static INLINE unsigned int
2241 register_number (const reg_entry *r)
2242 {
2243   unsigned int nr = r->reg_num;
2244
2245   if (r->reg_flags & RegRex)
2246     nr += 8;
2247
2248   if (r->reg_flags & RegVRex)
2249     nr += 16;
2250
2251   return nr;
2252 }
2253
2254 static INLINE unsigned int
2255 mode_from_disp_size (i386_operand_type t)
2256 {
2257   if (t.bitfield.disp8)
2258     return 1;
2259   else if (t.bitfield.disp16
2260            || t.bitfield.disp32)
2261     return 2;
2262   else
2263     return 0;
2264 }
2265
2266 static INLINE int
2267 fits_in_signed_byte (addressT num)
2268 {
2269   return num + 0x80 <= 0xff;
2270 }
2271
2272 static INLINE int
2273 fits_in_unsigned_byte (addressT num)
2274 {
2275   return num <= 0xff;
2276 }
2277
2278 static INLINE int
2279 fits_in_unsigned_word (addressT num)
2280 {
2281   return num <= 0xffff;
2282 }
2283
2284 static INLINE int
2285 fits_in_signed_word (addressT num)
2286 {
2287   return num + 0x8000 <= 0xffff;
2288 }
2289
2290 static INLINE int
2291 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2292 {
2293 #ifndef BFD64
2294   return 1;
2295 #else
2296   return num + 0x80000000 <= 0xffffffff;
2297 #endif
2298 }                               /* fits_in_signed_long() */
2299
2300 static INLINE int
2301 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2302 {
2303 #ifndef BFD64
2304   return 1;
2305 #else
2306   return num <= 0xffffffff;
2307 #endif
2308 }                               /* fits_in_unsigned_long() */
2309
2310 static INLINE valueT extend_to_32bit_address (addressT num)
2311 {
2312 #ifdef BFD64
2313   if (fits_in_unsigned_long(num))
2314     return (num ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2315
2316   if (!fits_in_signed_long (num))
2317     return num & 0xffffffff;
2318 #endif
2319
2320   return num;
2321 }
2322
2323 static INLINE int
2324 fits_in_disp8 (offsetT num)
2325 {
2326   int shift = i.memshift;
2327   unsigned int mask;
2328
2329   if (shift == -1)
2330     abort ();
2331
2332   mask = (1 << shift) - 1;
2333
2334   /* Return 0 if NUM isn't properly aligned.  */
2335   if ((num & mask))
2336     return 0;
2337
2338   /* Check if NUM will fit in 8bit after shift.  */
2339   return fits_in_signed_byte (num >> shift);
2340 }
2341
2342 static INLINE int
2343 fits_in_imm4 (offsetT num)
2344 {
2345   return (num & 0xf) == num;
2346 }
2347
2348 static i386_operand_type
2349 smallest_imm_type (offsetT num)
2350 {
2351   i386_operand_type t;
2352
2353   operand_type_set (&t, 0);
2354   t.bitfield.imm64 = 1;
2355
2356   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2357     {
2358       /* This code is disabled on the 486 because all the Imm1 forms
2359          in the opcode table are slower on the i486.  They're the
2360          versions with the implicitly specified single-position
2361          displacement, which has another syntax if you really want to
2362          use that form.  */
2363       t.bitfield.imm1 = 1;
2364       t.bitfield.imm8 = 1;
2365       t.bitfield.imm8s = 1;
2366       t.bitfield.imm16 = 1;
2367       t.bitfield.imm32 = 1;
2368       t.bitfield.imm32s = 1;
2369     }
2370   else if (fits_in_signed_byte (num))
2371     {
2372       t.bitfield.imm8 = 1;
2373       t.bitfield.imm8s = 1;
2374       t.bitfield.imm16 = 1;
2375       t.bitfield.imm32 = 1;
2376       t.bitfield.imm32s = 1;
2377     }
2378   else if (fits_in_unsigned_byte (num))
2379     {
2380       t.bitfield.imm8 = 1;
2381       t.bitfield.imm16 = 1;
2382       t.bitfield.imm32 = 1;
2383       t.bitfield.imm32s = 1;
2384     }
2385   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2386     {
2387       t.bitfield.imm16 = 1;
2388       t.bitfield.imm32 = 1;
2389       t.bitfield.imm32s = 1;
2390     }
2391   else if (fits_in_signed_long (num))
2392     {
2393       t.bitfield.imm32 = 1;
2394       t.bitfield.imm32s = 1;
2395     }
2396   else if (fits_in_unsigned_long (num))
2397     t.bitfield.imm32 = 1;
2398
2399   return t;
2400 }
2401
2402 static offsetT
2403 offset_in_range (offsetT val, int size)
2404 {
2405   addressT mask;
2406
2407   switch (size)
2408     {
2409     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2410     case 2: mask = ((addressT) 1 << 16) - 1; break;
2411 #ifdef BFD64
2412     case 4: mask = ((addressT) 1 << 32) - 1; break;
2413 #endif
2414     case sizeof (val): return val;
2415     default: abort ();
2416     }
2417
2418   if ((val & ~mask) != 0 && (-val & ~mask) != 0)
2419     as_warn (_("0x%" PRIx64 " shortened to 0x%" PRIx64),
2420              (uint64_t) val, (uint64_t) (val & mask));
2421
2422   return val & mask;
2423 }
2424
2425 enum PREFIX_GROUP
2426 {
2427   PREFIX_EXIST = 0,
2428   PREFIX_LOCK,
2429   PREFIX_REP,
2430   PREFIX_DS,
2431   PREFIX_OTHER
2432 };
2433
2434 /* Returns
2435    a. PREFIX_EXIST if attempting to add a prefix where one from the
2436    same class already exists.
2437    b. PREFIX_LOCK if lock prefix is added.
2438    c. PREFIX_REP if rep/repne prefix is added.
2439    d. PREFIX_DS if ds prefix is added.
2440    e. PREFIX_OTHER if other prefix is added.
2441  */
2442
2443 static enum PREFIX_GROUP
2444 add_prefix (unsigned int prefix)
2445 {
2446   enum PREFIX_GROUP ret = PREFIX_OTHER;
2447   unsigned int q;
2448
2449   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2450       && flag_code == CODE_64BIT)
2451     {
2452       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2453           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2454           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2455           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2456         ret = PREFIX_EXIST;
2457       q = REX_PREFIX;
2458     }
2459   else
2460     {
2461       switch (prefix)
2462         {
2463         default:
2464           abort ();
2465
2466         case DS_PREFIX_OPCODE:
2467           ret = PREFIX_DS;
2468           /* Fall through.  */
2469         case CS_PREFIX_OPCODE:
2470         case ES_PREFIX_OPCODE:
2471         case FS_PREFIX_OPCODE:
2472         case GS_PREFIX_OPCODE:
2473         case SS_PREFIX_OPCODE:
2474           q = SEG_PREFIX;
2475           break;
2476
2477         case REPNE_PREFIX_OPCODE:
2478         case REPE_PREFIX_OPCODE:
2479           q = REP_PREFIX;
2480           ret = PREFIX_REP;
2481           break;
2482
2483         case LOCK_PREFIX_OPCODE:
2484           q = LOCK_PREFIX;
2485           ret = PREFIX_LOCK;
2486           break;
2487
2488         case FWAIT_OPCODE:
2489           q = WAIT_PREFIX;
2490           break;
2491
2492         case ADDR_PREFIX_OPCODE:
2493           q = ADDR_PREFIX;
2494           break;
2495
2496         case DATA_PREFIX_OPCODE:
2497           q = DATA_PREFIX;
2498           break;
2499         }
2500       if (i.prefix[q] != 0)
2501         ret = PREFIX_EXIST;
2502     }
2503
2504   if (ret)
2505     {
2506       if (!i.prefix[q])
2507         ++i.prefixes;
2508       i.prefix[q] |= prefix;
2509     }
2510   else
2511     as_bad (_("same type of prefix used twice"));
2512
2513   return ret;
2514 }
2515
2516 static void
2517 update_code_flag (int value, int check)
2518 {
2519   PRINTF_LIKE ((*as_error));
2520
2521   flag_code = (enum flag_code) value;
2522   if (flag_code == CODE_64BIT)
2523     {
2524       cpu_arch_flags.bitfield.cpu64 = 1;
2525       cpu_arch_flags.bitfield.cpuno64 = 0;
2526     }
2527   else
2528     {
2529       cpu_arch_flags.bitfield.cpu64 = 0;
2530       cpu_arch_flags.bitfield.cpuno64 = 1;
2531     }
2532   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
2533     {
2534       if (check)
2535         as_error = as_fatal;
2536       else
2537         as_error = as_bad;
2538       (*as_error) (_("64bit mode not supported on `%s'."),
2539                    cpu_arch_name ? cpu_arch_name : default_arch);
2540     }
2541   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2542     {
2543       if (check)
2544         as_error = as_fatal;
2545       else
2546         as_error = as_bad;
2547       (*as_error) (_("32bit mode not supported on `%s'."),
2548                    cpu_arch_name ? cpu_arch_name : default_arch);
2549     }
2550   stackop_size = '\0';
2551 }
2552
2553 static void
2554 set_code_flag (int value)
2555 {
2556   update_code_flag (value, 0);
2557 }
2558
2559 static void
2560 set_16bit_gcc_code_flag (int new_code_flag)
2561 {
2562   flag_code = (enum flag_code) new_code_flag;
2563   if (flag_code != CODE_16BIT)
2564     abort ();
2565   cpu_arch_flags.bitfield.cpu64 = 0;
2566   cpu_arch_flags.bitfield.cpuno64 = 1;
2567   stackop_size = LONG_MNEM_SUFFIX;
2568 }
2569
2570 static void
2571 set_intel_syntax (int syntax_flag)
2572 {
2573   /* Find out if register prefixing is specified.  */
2574   int ask_naked_reg = 0;
2575
2576   SKIP_WHITESPACE ();
2577   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2578     {
2579       char *string;
2580       int e = get_symbol_name (&string);
2581
2582       if (strcmp (string, "prefix") == 0)
2583         ask_naked_reg = 1;
2584       else if (strcmp (string, "noprefix") == 0)
2585         ask_naked_reg = -1;
2586       else
2587         as_bad (_("bad argument to syntax directive."));
2588       (void) restore_line_pointer (e);
2589     }
2590   demand_empty_rest_of_line ();
2591
2592   intel_syntax = syntax_flag;
2593
2594   if (ask_naked_reg == 0)
2595     allow_naked_reg = (intel_syntax
2596                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2597   else
2598     allow_naked_reg = (ask_naked_reg < 0);
2599
2600   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2601
2602   identifier_chars['%'] = intel_syntax && allow_naked_reg ? '%' : 0;
2603   identifier_chars['$'] = intel_syntax ? '$' : 0;
2604   register_prefix = allow_naked_reg ? "" : "%";
2605 }
2606
2607 static void
2608 set_intel_mnemonic (int mnemonic_flag)
2609 {
2610   intel_mnemonic = mnemonic_flag;
2611 }
2612
2613 static void
2614 set_allow_index_reg (int flag)
2615 {
2616   allow_index_reg = flag;
2617 }
2618
2619 static void
2620 set_check (int what)
2621 {
2622   enum check_kind *kind;
2623   const char *str;
2624
2625   if (what)
2626     {
2627       kind = &operand_check;
2628       str = "operand";
2629     }
2630   else
2631     {
2632       kind = &sse_check;
2633       str = "sse";
2634     }
2635
2636   SKIP_WHITESPACE ();
2637
2638   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2639     {
2640       char *string;
2641       int e = get_symbol_name (&string);
2642
2643       if (strcmp (string, "none") == 0)
2644         *kind = check_none;
2645       else if (strcmp (string, "warning") == 0)
2646         *kind = check_warning;
2647       else if (strcmp (string, "error") == 0)
2648         *kind = check_error;
2649       else
2650         as_bad (_("bad argument to %s_check directive."), str);
2651       (void) restore_line_pointer (e);
2652     }
2653   else
2654     as_bad (_("missing argument for %s_check directive"), str);
2655
2656   demand_empty_rest_of_line ();
2657 }
2658
2659 static void
2660 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2661                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2662 {
2663 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2664   static const char *arch;
2665
2666   /* Intel MCU is only supported on ELF.  */
2667   if (!IS_ELF)
2668     return;
2669
2670   if (!arch)
2671     {
2672       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2673          use default_arch.  */
2674       arch = cpu_arch_name;
2675       if (!arch)
2676         arch = default_arch;
2677     }
2678
2679   /* If we are targeting Intel MCU, we must enable it.  */
2680   if ((get_elf_backend_data (stdoutput)->elf_machine_code == EM_IAMCU)
2681       == new_flag.bitfield.cpuiamcu)
2682     return;
2683
2684   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2685 #endif
2686 }
2687
2688 static void
2689 extend_cpu_sub_arch_name (const char *name)
2690 {
2691   if (cpu_sub_arch_name)
2692     cpu_sub_arch_name = reconcat (cpu_sub_arch_name, cpu_sub_arch_name,
2693                                   ".", name, (const char *) NULL);
2694   else
2695     cpu_sub_arch_name = concat (".", name, (const char *) NULL);
2696 }
2697
2698 static void
2699 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2700 {
2701   typedef struct arch_stack_entry
2702   {
2703     const struct arch_stack_entry *prev;
2704     const char *name;
2705     char *sub_name;
2706     i386_cpu_flags flags;
2707     i386_cpu_flags isa_flags;
2708     enum processor_type isa;
2709     enum flag_code flag_code;
2710     char stackop_size;
2711     bool no_cond_jump_promotion;
2712   } arch_stack_entry;
2713   static const arch_stack_entry *arch_stack_top;
2714
2715   SKIP_WHITESPACE ();
2716
2717   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2718     {
2719       char *s;
2720       int e = get_symbol_name (&s);
2721       const char *string = s;
2722       unsigned int j = 0;
2723       i386_cpu_flags flags;
2724
2725       if (strcmp (string, "default") == 0)
2726         {
2727           if (strcmp (default_arch, "iamcu") == 0)
2728             string = default_arch;
2729           else
2730             {
2731               static const i386_cpu_flags cpu_unknown_flags = CPU_UNKNOWN_FLAGS;
2732
2733               cpu_arch_name = NULL;
2734               free (cpu_sub_arch_name);
2735               cpu_sub_arch_name = NULL;
2736               cpu_arch_flags = cpu_unknown_flags;
2737               if (flag_code == CODE_64BIT)
2738                 {
2739                   cpu_arch_flags.bitfield.cpu64 = 1;
2740                   cpu_arch_flags.bitfield.cpuno64 = 0;
2741                 }
2742               else
2743                 {
2744                   cpu_arch_flags.bitfield.cpu64 = 0;
2745                   cpu_arch_flags.bitfield.cpuno64 = 1;
2746                 }
2747               cpu_arch_isa = PROCESSOR_UNKNOWN;
2748               cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
2749               if (!cpu_arch_tune_set)
2750                 {
2751                   cpu_arch_tune = cpu_arch_isa;
2752                   cpu_arch_tune_flags = cpu_arch_isa_flags;
2753                 }
2754
2755               j = ARRAY_SIZE (cpu_arch) + 1;
2756             }
2757         }
2758       else if (strcmp (string, "push") == 0)
2759         {
2760           arch_stack_entry *top = XNEW (arch_stack_entry);
2761
2762           top->name = cpu_arch_name;
2763           if (cpu_sub_arch_name)
2764             top->sub_name = xstrdup (cpu_sub_arch_name);
2765           else
2766             top->sub_name = NULL;
2767           top->flags = cpu_arch_flags;
2768           top->isa = cpu_arch_isa;
2769           top->isa_flags = cpu_arch_isa_flags;
2770           top->flag_code = flag_code;
2771           top->stackop_size = stackop_size;
2772           top->no_cond_jump_promotion = no_cond_jump_promotion;
2773
2774           top->prev = arch_stack_top;
2775           arch_stack_top = top;
2776
2777           (void) restore_line_pointer (e);
2778           demand_empty_rest_of_line ();
2779           return;
2780         }
2781       else if (strcmp (string, "pop") == 0)
2782         {
2783           const arch_stack_entry *top = arch_stack_top;
2784
2785           if (!top)
2786             as_bad (_(".arch stack is empty"));
2787           else if (top->flag_code != flag_code
2788                    || top->stackop_size != stackop_size)
2789             {
2790               static const unsigned int bits[] = {
2791                 [CODE_16BIT] = 16,
2792                 [CODE_32BIT] = 32,
2793                 [CODE_64BIT] = 64,
2794               };
2795
2796               as_bad (_("this `.arch pop' requires `.code%u%s' to be in effect"),
2797                       bits[top->flag_code],
2798                       top->stackop_size == LONG_MNEM_SUFFIX ? "gcc" : "");
2799             }
2800           else
2801             {
2802               arch_stack_top = top->prev;
2803
2804               cpu_arch_name = top->name;
2805               free (cpu_sub_arch_name);
2806               cpu_sub_arch_name = top->sub_name;
2807               cpu_arch_flags = top->flags;
2808               cpu_arch_isa = top->isa;
2809               cpu_arch_isa_flags = top->isa_flags;
2810               no_cond_jump_promotion = top->no_cond_jump_promotion;
2811
2812               XDELETE (top);
2813             }
2814
2815           (void) restore_line_pointer (e);
2816           demand_empty_rest_of_line ();
2817           return;
2818         }
2819
2820       for (; j < ARRAY_SIZE (cpu_arch); j++)
2821         {
2822           if (strcmp (string + (*string == '.'), cpu_arch[j].name) == 0
2823              && (*string == '.') == (cpu_arch[j].type == PROCESSOR_NONE))
2824             {
2825               if (*string != '.')
2826                 {
2827                   check_cpu_arch_compatible (string, cpu_arch[j].enable);
2828
2829                   cpu_arch_name = cpu_arch[j].name;
2830                   free (cpu_sub_arch_name);
2831                   cpu_sub_arch_name = NULL;
2832                   cpu_arch_flags = cpu_arch[j].enable;
2833                   if (flag_code == CODE_64BIT)
2834                     {
2835                       cpu_arch_flags.bitfield.cpu64 = 1;
2836                       cpu_arch_flags.bitfield.cpuno64 = 0;
2837                     }
2838                   else
2839                     {
2840                       cpu_arch_flags.bitfield.cpu64 = 0;
2841                       cpu_arch_flags.bitfield.cpuno64 = 1;
2842                     }
2843                   cpu_arch_isa = cpu_arch[j].type;
2844                   cpu_arch_isa_flags = cpu_arch[j].enable;
2845                   if (!cpu_arch_tune_set)
2846                     {
2847                       cpu_arch_tune = cpu_arch_isa;
2848                       cpu_arch_tune_flags = cpu_arch_isa_flags;
2849                     }
2850                   pre_386_16bit_warned = false;
2851                   break;
2852                 }
2853
2854               if (cpu_flags_all_zero (&cpu_arch[j].enable))
2855                 continue;
2856
2857               flags = cpu_flags_or (cpu_arch_flags,
2858                                     cpu_arch[j].enable);
2859
2860               if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2861                 {
2862                   extend_cpu_sub_arch_name (string + 1);
2863                   cpu_arch_flags = flags;
2864                   cpu_arch_isa_flags = flags;
2865                 }
2866               else
2867                 cpu_arch_isa_flags
2868                   = cpu_flags_or (cpu_arch_isa_flags,
2869                                   cpu_arch[j].enable);
2870               (void) restore_line_pointer (e);
2871               demand_empty_rest_of_line ();
2872               return;
2873             }
2874         }
2875
2876       if (startswith (string, ".no") && j >= ARRAY_SIZE (cpu_arch))
2877         {
2878           /* Disable an ISA extension.  */
2879           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
2880             if (cpu_arch[j].type == PROCESSOR_NONE
2881                 && strcmp (string + 3, cpu_arch[j].name) == 0)
2882               {
2883                 flags = cpu_flags_and_not (cpu_arch_flags,
2884                                            cpu_arch[j].disable);
2885                 if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2886                   {
2887                     extend_cpu_sub_arch_name (string + 1);
2888                     cpu_arch_flags = flags;
2889                     cpu_arch_isa_flags = flags;
2890                   }
2891                 (void) restore_line_pointer (e);
2892                 demand_empty_rest_of_line ();
2893                 return;
2894               }
2895         }
2896
2897       if (j == ARRAY_SIZE (cpu_arch))
2898         as_bad (_("no such architecture: `%s'"), string);
2899
2900       *input_line_pointer = e;
2901     }
2902   else
2903     as_bad (_("missing cpu architecture"));
2904
2905   no_cond_jump_promotion = 0;
2906   if (*input_line_pointer == ','
2907       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
2908     {
2909       char *string;
2910       char e;
2911
2912       ++input_line_pointer;
2913       e = get_symbol_name (&string);
2914
2915       if (strcmp (string, "nojumps") == 0)
2916         no_cond_jump_promotion = 1;
2917       else if (strcmp (string, "jumps") == 0)
2918         ;
2919       else
2920         as_bad (_("no such architecture modifier: `%s'"), string);
2921
2922       (void) restore_line_pointer (e);
2923     }
2924
2925   demand_empty_rest_of_line ();
2926 }
2927
2928 enum bfd_architecture
2929 i386_arch (void)
2930 {
2931   if (cpu_arch_isa == PROCESSOR_IAMCU)
2932     {
2933       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2934           || flag_code == CODE_64BIT)
2935         as_fatal (_("Intel MCU is 32bit ELF only"));
2936       return bfd_arch_iamcu;
2937     }
2938   else
2939     return bfd_arch_i386;
2940 }
2941
2942 unsigned long
2943 i386_mach (void)
2944 {
2945   if (startswith (default_arch, "x86_64"))
2946     {
2947       if (default_arch[6] == '\0')
2948         return bfd_mach_x86_64;
2949       else
2950         return bfd_mach_x64_32;
2951     }
2952   else if (!strcmp (default_arch, "i386")
2953            || !strcmp (default_arch, "iamcu"))
2954     {
2955       if (cpu_arch_isa == PROCESSOR_IAMCU)
2956         {
2957           if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
2958             as_fatal (_("Intel MCU is 32bit ELF only"));
2959           return bfd_mach_i386_iamcu;
2960         }
2961       else
2962         return bfd_mach_i386_i386;
2963     }
2964   else
2965     as_fatal (_("unknown architecture"));
2966 }
2967 \f
2968 void
2969 md_begin (void)
2970 {
2971   /* Support pseudo prefixes like {disp32}.  */
2972   lex_type ['{'] = LEX_BEGIN_NAME;
2973
2974   /* Initialize op_hash hash table.  */
2975   op_hash = str_htab_create ();
2976
2977   {
2978     const insn_template *optab;
2979     templates *core_optab;
2980
2981     /* Setup for loop.  */
2982     optab = i386_optab;
2983     core_optab = notes_alloc (sizeof (*core_optab));
2984     core_optab->start = optab;
2985
2986     while (1)
2987       {
2988         ++optab;
2989         if (optab->name == NULL
2990             || strcmp (optab->name, (optab - 1)->name) != 0)
2991           {
2992             /* different name --> ship out current template list;
2993                add to hash table; & begin anew.  */
2994             core_optab->end = optab;
2995             if (str_hash_insert (op_hash, (optab - 1)->name, core_optab, 0))
2996               as_fatal (_("duplicate %s"), (optab - 1)->name);
2997
2998             if (optab->name == NULL)
2999               break;
3000             core_optab = notes_alloc (sizeof (*core_optab));
3001             core_optab->start = optab;
3002           }
3003       }
3004   }
3005
3006   /* Initialize reg_hash hash table.  */
3007   reg_hash = str_htab_create ();
3008   {
3009     const reg_entry *regtab;
3010     unsigned int regtab_size = i386_regtab_size;
3011
3012     for (regtab = i386_regtab; regtab_size--; regtab++)
3013       {
3014         switch (regtab->reg_type.bitfield.class)
3015           {
3016           case Reg:
3017             if (regtab->reg_type.bitfield.dword)
3018               {
3019                 if (regtab->reg_type.bitfield.instance == Accum)
3020                   reg_eax = regtab;
3021               }
3022             else if (regtab->reg_type.bitfield.tbyte)
3023               {
3024                 /* There's no point inserting st(<N>) in the hash table, as
3025                    parentheses aren't included in register_chars[] anyway.  */
3026                 if (regtab->reg_type.bitfield.instance != Accum)
3027                   continue;
3028                 reg_st0 = regtab;
3029               }
3030             break;
3031
3032           case SReg:
3033             switch (regtab->reg_num)
3034               {
3035               case 0: reg_es = regtab; break;
3036               case 2: reg_ss = regtab; break;
3037               case 3: reg_ds = regtab; break;
3038               }
3039             break;
3040
3041           case RegMask:
3042             if (!regtab->reg_num)
3043               reg_k0 = regtab;
3044             break;
3045           }
3046
3047         if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3048           as_fatal (_("duplicate %s"), regtab->reg_name);
3049       }
3050   }
3051
3052   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3053   {
3054     int c;
3055     char *p;
3056
3057     for (c = 0; c < 256; c++)
3058       {
3059         if (ISDIGIT (c) || ISLOWER (c))
3060           {
3061             mnemonic_chars[c] = c;
3062             register_chars[c] = c;
3063             operand_chars[c] = c;
3064           }
3065         else if (ISUPPER (c))
3066           {
3067             mnemonic_chars[c] = TOLOWER (c);
3068             register_chars[c] = mnemonic_chars[c];
3069             operand_chars[c] = c;
3070           }
3071         else if (c == '{' || c == '}')
3072           {
3073             mnemonic_chars[c] = c;
3074             operand_chars[c] = c;
3075           }
3076 #ifdef SVR4_COMMENT_CHARS
3077         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3078           operand_chars[c] = c;
3079 #endif
3080
3081         if (ISALPHA (c) || ISDIGIT (c))
3082           identifier_chars[c] = c;
3083         else if (c >= 128)
3084           {
3085             identifier_chars[c] = c;
3086             operand_chars[c] = c;
3087           }
3088       }
3089
3090 #ifdef LEX_AT
3091     identifier_chars['@'] = '@';
3092 #endif
3093 #ifdef LEX_QM
3094     identifier_chars['?'] = '?';
3095     operand_chars['?'] = '?';
3096 #endif
3097     mnemonic_chars['_'] = '_';
3098     mnemonic_chars['-'] = '-';
3099     mnemonic_chars['.'] = '.';
3100     identifier_chars['_'] = '_';
3101     identifier_chars['.'] = '.';
3102
3103     for (p = operand_special_chars; *p != '\0'; p++)
3104       operand_chars[(unsigned char) *p] = *p;
3105   }
3106
3107   if (flag_code == CODE_64BIT)
3108     {
3109 #if defined (OBJ_COFF) && defined (TE_PE)
3110       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3111                                   ? 32 : 16);
3112 #else
3113       x86_dwarf2_return_column = 16;
3114 #endif
3115       x86_cie_data_alignment = -8;
3116 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3117       x86_sframe_cfa_sp_reg = 7;
3118       x86_sframe_cfa_fp_reg = 6;
3119 #endif
3120     }
3121   else
3122     {
3123       x86_dwarf2_return_column = 8;
3124       x86_cie_data_alignment = -4;
3125     }
3126
3127   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3128      can be turned into BRANCH_PREFIX frag.  */
3129   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3130     abort ();
3131 }
3132
3133 void
3134 i386_print_statistics (FILE *file)
3135 {
3136   htab_print_statistics (file, "i386 opcode", op_hash);
3137   htab_print_statistics (file, "i386 register", reg_hash);
3138 }
3139
3140 void
3141 i386_md_end (void)
3142 {
3143   htab_delete (op_hash);
3144   htab_delete (reg_hash);
3145 }
3146 \f
3147 #ifdef DEBUG386
3148
3149 /* Debugging routines for md_assemble.  */
3150 static void pte (insn_template *);
3151 static void pt (i386_operand_type);
3152 static void pe (expressionS *);
3153 static void ps (symbolS *);
3154
3155 static void
3156 pi (const char *line, i386_insn *x)
3157 {
3158   unsigned int j;
3159
3160   fprintf (stdout, "%s: template ", line);
3161   pte (&x->tm);
3162   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3163            x->base_reg ? x->base_reg->reg_name : "none",
3164            x->index_reg ? x->index_reg->reg_name : "none",
3165            x->log2_scale_factor);
3166   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3167            x->rm.mode, x->rm.reg, x->rm.regmem);
3168   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3169            x->sib.base, x->sib.index, x->sib.scale);
3170   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3171            (x->rex & REX_W) != 0,
3172            (x->rex & REX_R) != 0,
3173            (x->rex & REX_X) != 0,
3174            (x->rex & REX_B) != 0);
3175   for (j = 0; j < x->operands; j++)
3176     {
3177       fprintf (stdout, "    #%d:  ", j + 1);
3178       pt (x->types[j]);
3179       fprintf (stdout, "\n");
3180       if (x->types[j].bitfield.class == Reg
3181           || x->types[j].bitfield.class == RegMMX
3182           || x->types[j].bitfield.class == RegSIMD
3183           || x->types[j].bitfield.class == RegMask
3184           || x->types[j].bitfield.class == SReg
3185           || x->types[j].bitfield.class == RegCR
3186           || x->types[j].bitfield.class == RegDR
3187           || x->types[j].bitfield.class == RegTR
3188           || x->types[j].bitfield.class == RegBND)
3189         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3190       if (operand_type_check (x->types[j], imm))
3191         pe (x->op[j].imms);
3192       if (operand_type_check (x->types[j], disp))
3193         pe (x->op[j].disps);
3194     }
3195 }
3196
3197 static void
3198 pte (insn_template *t)
3199 {
3200   static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
3201   static const char *const opc_spc[] = {
3202     NULL, "0f", "0f38", "0f3a", NULL, "evexmap5", "evexmap6", NULL,
3203     "XOP08", "XOP09", "XOP0A",
3204   };
3205   unsigned int j;
3206
3207   fprintf (stdout, " %d operands ", t->operands);
3208   if (opc_pfx[t->opcode_modifier.opcodeprefix])
3209     fprintf (stdout, "pfx %x ", opc_pfx[t->opcode_modifier.opcodeprefix]);
3210   if (opc_spc[t->opcode_modifier.opcodespace])
3211     fprintf (stdout, "space %s ", opc_spc[t->opcode_modifier.opcodespace]);
3212   fprintf (stdout, "opcode %x ", t->base_opcode);
3213   if (t->extension_opcode != None)
3214     fprintf (stdout, "ext %x ", t->extension_opcode);
3215   if (t->opcode_modifier.d)
3216     fprintf (stdout, "D");
3217   if (t->opcode_modifier.w)
3218     fprintf (stdout, "W");
3219   fprintf (stdout, "\n");
3220   for (j = 0; j < t->operands; j++)
3221     {
3222       fprintf (stdout, "    #%d type ", j + 1);
3223       pt (t->operand_types[j]);
3224       fprintf (stdout, "\n");
3225     }
3226 }
3227
3228 static void
3229 pe (expressionS *e)
3230 {
3231   fprintf (stdout, "    operation     %d\n", e->X_op);
3232   fprintf (stdout, "    add_number    %" PRId64 " (%" PRIx64 ")\n",
3233            (int64_t) e->X_add_number, (uint64_t) (valueT) e->X_add_number);
3234   if (e->X_add_symbol)
3235     {
3236       fprintf (stdout, "    add_symbol    ");
3237       ps (e->X_add_symbol);
3238       fprintf (stdout, "\n");
3239     }
3240   if (e->X_op_symbol)
3241     {
3242       fprintf (stdout, "    op_symbol    ");
3243       ps (e->X_op_symbol);
3244       fprintf (stdout, "\n");
3245     }
3246 }
3247
3248 static void
3249 ps (symbolS *s)
3250 {
3251   fprintf (stdout, "%s type %s%s",
3252            S_GET_NAME (s),
3253            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3254            segment_name (S_GET_SEGMENT (s)));
3255 }
3256
3257 static struct type_name
3258   {
3259     i386_operand_type mask;
3260     const char *name;
3261   }
3262 const type_names[] =
3263 {
3264   { OPERAND_TYPE_REG8, "r8" },
3265   { OPERAND_TYPE_REG16, "r16" },
3266   { OPERAND_TYPE_REG32, "r32" },
3267   { OPERAND_TYPE_REG64, "r64" },
3268   { OPERAND_TYPE_ACC8, "acc8" },
3269   { OPERAND_TYPE_ACC16, "acc16" },
3270   { OPERAND_TYPE_ACC32, "acc32" },
3271   { OPERAND_TYPE_ACC64, "acc64" },
3272   { OPERAND_TYPE_IMM8, "i8" },
3273   { OPERAND_TYPE_IMM8, "i8s" },
3274   { OPERAND_TYPE_IMM16, "i16" },
3275   { OPERAND_TYPE_IMM32, "i32" },
3276   { OPERAND_TYPE_IMM32S, "i32s" },
3277   { OPERAND_TYPE_IMM64, "i64" },
3278   { OPERAND_TYPE_IMM1, "i1" },
3279   { OPERAND_TYPE_BASEINDEX, "BaseIndex" },
3280   { OPERAND_TYPE_DISP8, "d8" },
3281   { OPERAND_TYPE_DISP16, "d16" },
3282   { OPERAND_TYPE_DISP32, "d32" },
3283   { OPERAND_TYPE_DISP64, "d64" },
3284   { OPERAND_TYPE_INOUTPORTREG, "InOutPortReg" },
3285   { OPERAND_TYPE_SHIFTCOUNT, "ShiftCount" },
3286   { OPERAND_TYPE_CONTROL, "control reg" },
3287   { OPERAND_TYPE_TEST, "test reg" },
3288   { OPERAND_TYPE_DEBUG, "debug reg" },
3289   { OPERAND_TYPE_FLOATREG, "FReg" },
3290   { OPERAND_TYPE_FLOATACC, "FAcc" },
3291   { OPERAND_TYPE_SREG, "SReg" },
3292   { OPERAND_TYPE_REGMMX, "rMMX" },
3293   { OPERAND_TYPE_REGXMM, "rXMM" },
3294   { OPERAND_TYPE_REGYMM, "rYMM" },
3295   { OPERAND_TYPE_REGZMM, "rZMM" },
3296   { OPERAND_TYPE_REGTMM, "rTMM" },
3297   { OPERAND_TYPE_REGMASK, "Mask reg" },
3298 };
3299
3300 static void
3301 pt (i386_operand_type t)
3302 {
3303   unsigned int j;
3304   i386_operand_type a;
3305
3306   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3307     {
3308       a = operand_type_and (t, type_names[j].mask);
3309       if (operand_type_equal (&a, &type_names[j].mask))
3310         fprintf (stdout, "%s, ",  type_names[j].name);
3311     }
3312   fflush (stdout);
3313 }
3314
3315 #endif /* DEBUG386 */
3316 \f
3317 static bfd_reloc_code_real_type
3318 reloc (unsigned int size,
3319        int pcrel,
3320        int sign,
3321        bfd_reloc_code_real_type other)
3322 {
3323   if (other != NO_RELOC)
3324     {
3325       reloc_howto_type *rel;
3326
3327       if (size == 8)
3328         switch (other)
3329           {
3330           case BFD_RELOC_X86_64_GOT32:
3331             return BFD_RELOC_X86_64_GOT64;
3332             break;
3333           case BFD_RELOC_X86_64_GOTPLT64:
3334             return BFD_RELOC_X86_64_GOTPLT64;
3335             break;
3336           case BFD_RELOC_X86_64_PLTOFF64:
3337             return BFD_RELOC_X86_64_PLTOFF64;
3338             break;
3339           case BFD_RELOC_X86_64_GOTPC32:
3340             other = BFD_RELOC_X86_64_GOTPC64;
3341             break;
3342           case BFD_RELOC_X86_64_GOTPCREL:
3343             other = BFD_RELOC_X86_64_GOTPCREL64;
3344             break;
3345           case BFD_RELOC_X86_64_TPOFF32:
3346             other = BFD_RELOC_X86_64_TPOFF64;
3347             break;
3348           case BFD_RELOC_X86_64_DTPOFF32:
3349             other = BFD_RELOC_X86_64_DTPOFF64;
3350             break;
3351           default:
3352             break;
3353           }
3354
3355 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3356       if (other == BFD_RELOC_SIZE32)
3357         {
3358           if (size == 8)
3359             other = BFD_RELOC_SIZE64;
3360           if (pcrel)
3361             {
3362               as_bad (_("there are no pc-relative size relocations"));
3363               return NO_RELOC;
3364             }
3365         }
3366 #endif
3367
3368       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3369       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3370         sign = -1;
3371
3372       rel = bfd_reloc_type_lookup (stdoutput, other);
3373       if (!rel)
3374         as_bad (_("unknown relocation (%u)"), other);
3375       else if (size != bfd_get_reloc_size (rel))
3376         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3377                 bfd_get_reloc_size (rel),
3378                 size);
3379       else if (pcrel && !rel->pc_relative)
3380         as_bad (_("non-pc-relative relocation for pc-relative field"));
3381       else if ((rel->complain_on_overflow == complain_overflow_signed
3382                 && !sign)
3383                || (rel->complain_on_overflow == complain_overflow_unsigned
3384                    && sign > 0))
3385         as_bad (_("relocated field and relocation type differ in signedness"));
3386       else
3387         return other;
3388       return NO_RELOC;
3389     }
3390
3391   if (pcrel)
3392     {
3393       if (!sign)
3394         as_bad (_("there are no unsigned pc-relative relocations"));
3395       switch (size)
3396         {
3397         case 1: return BFD_RELOC_8_PCREL;
3398         case 2: return BFD_RELOC_16_PCREL;
3399         case 4: return BFD_RELOC_32_PCREL;
3400         case 8: return BFD_RELOC_64_PCREL;
3401         }
3402       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3403     }
3404   else
3405     {
3406       if (sign > 0)
3407         switch (size)
3408           {
3409           case 4: return BFD_RELOC_X86_64_32S;
3410           }
3411       else
3412         switch (size)
3413           {
3414           case 1: return BFD_RELOC_8;
3415           case 2: return BFD_RELOC_16;
3416           case 4: return BFD_RELOC_32;
3417           case 8: return BFD_RELOC_64;
3418           }
3419       as_bad (_("cannot do %s %u byte relocation"),
3420               sign > 0 ? "signed" : "unsigned", size);
3421     }
3422
3423   return NO_RELOC;
3424 }
3425
3426 /* Here we decide which fixups can be adjusted to make them relative to
3427    the beginning of the section instead of the symbol.  Basically we need
3428    to make sure that the dynamic relocations are done correctly, so in
3429    some cases we force the original symbol to be used.  */
3430
3431 int
3432 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3433 {
3434 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3435   if (!IS_ELF)
3436     return 1;
3437
3438   /* Don't adjust pc-relative references to merge sections in 64-bit
3439      mode.  */
3440   if (use_rela_relocations
3441       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3442       && fixP->fx_pcrel)
3443     return 0;
3444
3445   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3446      and changed later by validate_fix.  */
3447   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3448       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3449     return 0;
3450
3451   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3452      for size relocations.  */
3453   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3454       || fixP->fx_r_type == BFD_RELOC_SIZE64
3455       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3456       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3457       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3458       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3459       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3460       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3461       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3462       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3463       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3464       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3465       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3466       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3467       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3468       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3469       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3470       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3471       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3472       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3473       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3474       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3475       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3476       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3477       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3478       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3479       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3480       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3481       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3482       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3483       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3484     return 0;
3485 #endif
3486   return 1;
3487 }
3488
3489 static INLINE bool
3490 want_disp32 (const insn_template *t)
3491 {
3492   return flag_code != CODE_64BIT
3493          || i.prefix[ADDR_PREFIX]
3494          || (t->base_opcode == 0x8d
3495              && t->opcode_modifier.opcodespace == SPACE_BASE
3496              && (!i.types[1].bitfield.qword
3497                 || t->opcode_modifier.size == SIZE32));
3498 }
3499
3500 static int
3501 intel_float_operand (const char *mnemonic)
3502 {
3503   /* Note that the value returned is meaningful only for opcodes with (memory)
3504      operands, hence the code here is free to improperly handle opcodes that
3505      have no operands (for better performance and smaller code). */
3506
3507   if (mnemonic[0] != 'f')
3508     return 0; /* non-math */
3509
3510   switch (mnemonic[1])
3511     {
3512     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3513        the fs segment override prefix not currently handled because no
3514        call path can make opcodes without operands get here */
3515     case 'i':
3516       return 2 /* integer op */;
3517     case 'l':
3518       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3519         return 3; /* fldcw/fldenv */
3520       break;
3521     case 'n':
3522       if (mnemonic[2] != 'o' /* fnop */)
3523         return 3; /* non-waiting control op */
3524       break;
3525     case 'r':
3526       if (mnemonic[2] == 's')
3527         return 3; /* frstor/frstpm */
3528       break;
3529     case 's':
3530       if (mnemonic[2] == 'a')
3531         return 3; /* fsave */
3532       if (mnemonic[2] == 't')
3533         {
3534           switch (mnemonic[3])
3535             {
3536             case 'c': /* fstcw */
3537             case 'd': /* fstdw */
3538             case 'e': /* fstenv */
3539             case 's': /* fsts[gw] */
3540               return 3;
3541             }
3542         }
3543       break;
3544     case 'x':
3545       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3546         return 0; /* fxsave/fxrstor are not really math ops */
3547       break;
3548     }
3549
3550   return 1;
3551 }
3552
3553 static INLINE void
3554 install_template (const insn_template *t)
3555 {
3556   unsigned int l;
3557
3558   i.tm = *t;
3559
3560   /* Note that for pseudo prefixes this produces a length of 1. But for them
3561      the length isn't interesting at all.  */
3562   for (l = 1; l < 4; ++l)
3563     if (!(t->base_opcode >> (8 * l)))
3564       break;
3565
3566   i.opcode_length = l;
3567 }
3568
3569 /* Build the VEX prefix.  */
3570
3571 static void
3572 build_vex_prefix (const insn_template *t)
3573 {
3574   unsigned int register_specifier;
3575   unsigned int vector_length;
3576   unsigned int w;
3577
3578   /* Check register specifier.  */
3579   if (i.vex.register_specifier)
3580     {
3581       register_specifier =
3582         ~register_number (i.vex.register_specifier) & 0xf;
3583       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3584     }
3585   else
3586     register_specifier = 0xf;
3587
3588   /* Use 2-byte VEX prefix by swapping destination and source operand
3589      if there are more than 1 register operand.  */
3590   if (i.reg_operands > 1
3591       && i.vec_encoding != vex_encoding_vex3
3592       && i.dir_encoding == dir_encoding_default
3593       && i.operands == i.reg_operands
3594       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3595       && i.tm.opcode_modifier.opcodespace == SPACE_0F
3596       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3597       && i.rex == REX_B)
3598     {
3599       unsigned int xchg = i.operands - 1;
3600       union i386_op temp_op;
3601       i386_operand_type temp_type;
3602
3603       temp_type = i.types[xchg];
3604       i.types[xchg] = i.types[0];
3605       i.types[0] = temp_type;
3606       temp_op = i.op[xchg];
3607       i.op[xchg] = i.op[0];
3608       i.op[0] = temp_op;
3609
3610       gas_assert (i.rm.mode == 3);
3611
3612       i.rex = REX_R;
3613       xchg = i.rm.regmem;
3614       i.rm.regmem = i.rm.reg;
3615       i.rm.reg = xchg;
3616
3617       if (i.tm.opcode_modifier.d)
3618         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3619                             ? Opcode_ExtD : Opcode_SIMD_IntD;
3620       else /* Use the next insn.  */
3621         install_template (&t[1]);
3622     }
3623
3624   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3625      are no memory operands and at least 3 register ones.  */
3626   if (i.reg_operands >= 3
3627       && i.vec_encoding != vex_encoding_vex3
3628       && i.reg_operands == i.operands - i.imm_operands
3629       && i.tm.opcode_modifier.vex
3630       && i.tm.opcode_modifier.commutative
3631       && (i.tm.opcode_modifier.sse2avx || optimize > 1)
3632       && i.rex == REX_B
3633       && i.vex.register_specifier
3634       && !(i.vex.register_specifier->reg_flags & RegRex))
3635     {
3636       unsigned int xchg = i.operands - i.reg_operands;
3637       union i386_op temp_op;
3638       i386_operand_type temp_type;
3639
3640       gas_assert (i.tm.opcode_modifier.opcodespace == SPACE_0F);
3641       gas_assert (!i.tm.opcode_modifier.sae);
3642       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3643                                       &i.types[i.operands - 3]));
3644       gas_assert (i.rm.mode == 3);
3645
3646       temp_type = i.types[xchg];
3647       i.types[xchg] = i.types[xchg + 1];
3648       i.types[xchg + 1] = temp_type;
3649       temp_op = i.op[xchg];
3650       i.op[xchg] = i.op[xchg + 1];
3651       i.op[xchg + 1] = temp_op;
3652
3653       i.rex = 0;
3654       xchg = i.rm.regmem | 8;
3655       i.rm.regmem = ~register_specifier & 0xf;
3656       gas_assert (!(i.rm.regmem & 8));
3657       i.vex.register_specifier += xchg - i.rm.regmem;
3658       register_specifier = ~xchg & 0xf;
3659     }
3660
3661   if (i.tm.opcode_modifier.vex == VEXScalar)
3662     vector_length = avxscalar;
3663   else if (i.tm.opcode_modifier.vex == VEX256)
3664     vector_length = 1;
3665   else
3666     {
3667       unsigned int op;
3668
3669       /* Determine vector length from the last multi-length vector
3670          operand.  */
3671       vector_length = 0;
3672       for (op = t->operands; op--;)
3673         if (t->operand_types[op].bitfield.xmmword
3674             && t->operand_types[op].bitfield.ymmword
3675             && i.types[op].bitfield.ymmword)
3676           {
3677             vector_length = 1;
3678             break;
3679           }
3680     }
3681
3682   /* Check the REX.W bit and VEXW.  */
3683   if (i.tm.opcode_modifier.vexw == VEXWIG)
3684     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3685   else if (i.tm.opcode_modifier.vexw)
3686     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3687   else
3688     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3689
3690   /* Use 2-byte VEX prefix if possible.  */
3691   if (w == 0
3692       && i.vec_encoding != vex_encoding_vex3
3693       && i.tm.opcode_modifier.opcodespace == SPACE_0F
3694       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3695     {
3696       /* 2-byte VEX prefix.  */
3697       unsigned int r;
3698
3699       i.vex.length = 2;
3700       i.vex.bytes[0] = 0xc5;
3701
3702       /* Check the REX.R bit.  */
3703       r = (i.rex & REX_R) ? 0 : 1;
3704       i.vex.bytes[1] = (r << 7
3705                         | register_specifier << 3
3706                         | vector_length << 2
3707                         | i.tm.opcode_modifier.opcodeprefix);
3708     }
3709   else
3710     {
3711       /* 3-byte VEX prefix.  */
3712       i.vex.length = 3;
3713
3714       switch (i.tm.opcode_modifier.opcodespace)
3715         {
3716         case SPACE_0F:
3717         case SPACE_0F38:
3718         case SPACE_0F3A:
3719           i.vex.bytes[0] = 0xc4;
3720           break;
3721         case SPACE_XOP08:
3722         case SPACE_XOP09:
3723         case SPACE_XOP0A:
3724           i.vex.bytes[0] = 0x8f;
3725           break;
3726         default:
3727           abort ();
3728         }
3729
3730       /* The high 3 bits of the second VEX byte are 1's compliment
3731          of RXB bits from REX.  */
3732       i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace;
3733
3734       i.vex.bytes[2] = (w << 7
3735                         | register_specifier << 3
3736                         | vector_length << 2
3737                         | i.tm.opcode_modifier.opcodeprefix);
3738     }
3739 }
3740
3741 static INLINE bool
3742 is_evex_encoding (const insn_template *t)
3743 {
3744   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3745          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3746          || t->opcode_modifier.sae;
3747 }
3748
3749 static INLINE bool
3750 is_any_vex_encoding (const insn_template *t)
3751 {
3752   return t->opcode_modifier.vex || is_evex_encoding (t);
3753 }
3754
3755 static unsigned int
3756 get_broadcast_bytes (const insn_template *t, bool diag)
3757 {
3758   unsigned int op, bytes;
3759   const i386_operand_type *types;
3760
3761   if (i.broadcast.type)
3762     return i.broadcast.bytes = ((1 << (t->opcode_modifier.broadcast - 1))
3763                                 * i.broadcast.type);
3764
3765   gas_assert (intel_syntax);
3766
3767   for (op = 0; op < t->operands; ++op)
3768     if (t->operand_types[op].bitfield.baseindex)
3769       break;
3770
3771   gas_assert (op < t->operands);
3772
3773   if (t->opcode_modifier.evex
3774       && t->opcode_modifier.evex != EVEXDYN)
3775     switch (i.broadcast.bytes)
3776       {
3777       case 1:
3778         if (t->operand_types[op].bitfield.word)
3779           return 2;
3780       /* Fall through.  */
3781       case 2:
3782         if (t->operand_types[op].bitfield.dword)
3783           return 4;
3784       /* Fall through.  */
3785       case 4:
3786         if (t->operand_types[op].bitfield.qword)
3787           return 8;
3788       /* Fall through.  */
3789       case 8:
3790         if (t->operand_types[op].bitfield.xmmword)
3791           return 16;
3792         if (t->operand_types[op].bitfield.ymmword)
3793           return 32;
3794         if (t->operand_types[op].bitfield.zmmword)
3795           return 64;
3796       /* Fall through.  */
3797       default:
3798         abort ();
3799       }
3800
3801   gas_assert (op + 1 < t->operands);
3802
3803   if (t->operand_types[op + 1].bitfield.xmmword
3804       + t->operand_types[op + 1].bitfield.ymmword
3805       + t->operand_types[op + 1].bitfield.zmmword > 1)
3806     {
3807       types = &i.types[op + 1];
3808       diag = false;
3809     }
3810   else /* Ambiguous - guess with a preference to non-AVX512VL forms.  */
3811     types = &t->operand_types[op];
3812
3813   if (types->bitfield.zmmword)
3814     bytes = 64;
3815   else if (types->bitfield.ymmword)
3816     bytes = 32;
3817   else
3818     bytes = 16;
3819
3820   if (diag)
3821     as_warn (_("ambiguous broadcast for `%s', using %u-bit form"),
3822              t->name, bytes * 8);
3823
3824   return bytes;
3825 }
3826
3827 /* Build the EVEX prefix.  */
3828
3829 static void
3830 build_evex_prefix (void)
3831 {
3832   unsigned int register_specifier, w;
3833   rex_byte vrex_used = 0;
3834
3835   /* Check register specifier.  */
3836   if (i.vex.register_specifier)
3837     {
3838       gas_assert ((i.vrex & REX_X) == 0);
3839
3840       register_specifier = i.vex.register_specifier->reg_num;
3841       if ((i.vex.register_specifier->reg_flags & RegRex))
3842         register_specifier += 8;
3843       /* The upper 16 registers are encoded in the fourth byte of the
3844          EVEX prefix.  */
3845       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3846         i.vex.bytes[3] = 0x8;
3847       register_specifier = ~register_specifier & 0xf;
3848     }
3849   else
3850     {
3851       register_specifier = 0xf;
3852
3853       /* Encode upper 16 vector index register in the fourth byte of
3854          the EVEX prefix.  */
3855       if (!(i.vrex & REX_X))
3856         i.vex.bytes[3] = 0x8;
3857       else
3858         vrex_used |= REX_X;
3859     }
3860
3861   /* 4 byte EVEX prefix.  */
3862   i.vex.length = 4;
3863   i.vex.bytes[0] = 0x62;
3864
3865   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3866      bits from REX.  */
3867   gas_assert (i.tm.opcode_modifier.opcodespace >= SPACE_0F);
3868   gas_assert (i.tm.opcode_modifier.opcodespace <= SPACE_EVEXMAP6);
3869   i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace;
3870
3871   /* The fifth bit of the second EVEX byte is 1's compliment of the
3872      REX_R bit in VREX.  */
3873   if (!(i.vrex & REX_R))
3874     i.vex.bytes[1] |= 0x10;
3875   else
3876     vrex_used |= REX_R;
3877
3878   if ((i.reg_operands + i.imm_operands) == i.operands)
3879     {
3880       /* When all operands are registers, the REX_X bit in REX is not
3881          used.  We reuse it to encode the upper 16 registers, which is
3882          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3883          as 1's compliment.  */
3884       if ((i.vrex & REX_B))
3885         {
3886           vrex_used |= REX_B;
3887           i.vex.bytes[1] &= ~0x40;
3888         }
3889     }
3890
3891   /* EVEX instructions shouldn't need the REX prefix.  */
3892   i.vrex &= ~vrex_used;
3893   gas_assert (i.vrex == 0);
3894
3895   /* Check the REX.W bit and VEXW.  */
3896   if (i.tm.opcode_modifier.vexw == VEXWIG)
3897     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
3898   else if (i.tm.opcode_modifier.vexw)
3899     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3900   else
3901     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
3902
3903   /* The third byte of the EVEX prefix.  */
3904   i.vex.bytes[2] = ((w << 7)
3905                     | (register_specifier << 3)
3906                     | 4 /* Encode the U bit.  */
3907                     | i.tm.opcode_modifier.opcodeprefix);
3908
3909   /* The fourth byte of the EVEX prefix.  */
3910   /* The zeroing-masking bit.  */
3911   if (i.mask.reg && i.mask.zeroing)
3912     i.vex.bytes[3] |= 0x80;
3913
3914   /* Don't always set the broadcast bit if there is no RC.  */
3915   if (i.rounding.type == rc_none)
3916     {
3917       /* Encode the vector length.  */
3918       unsigned int vec_length;
3919
3920       if (!i.tm.opcode_modifier.evex
3921           || i.tm.opcode_modifier.evex == EVEXDYN)
3922         {
3923           unsigned int op;
3924
3925           /* Determine vector length from the last multi-length vector
3926              operand.  */
3927           for (op = i.operands; op--;)
3928             if (i.tm.operand_types[op].bitfield.xmmword
3929                 + i.tm.operand_types[op].bitfield.ymmword
3930                 + i.tm.operand_types[op].bitfield.zmmword > 1)
3931               {
3932                 if (i.types[op].bitfield.zmmword)
3933                   {
3934                     i.tm.opcode_modifier.evex = EVEX512;
3935                     break;
3936                   }
3937                 else if (i.types[op].bitfield.ymmword)
3938                   {
3939                     i.tm.opcode_modifier.evex = EVEX256;
3940                     break;
3941                   }
3942                 else if (i.types[op].bitfield.xmmword)
3943                   {
3944                     i.tm.opcode_modifier.evex = EVEX128;
3945                     break;
3946                   }
3947                 else if (i.broadcast.bytes && op == i.broadcast.operand)
3948                   {
3949                     switch (get_broadcast_bytes (&i.tm, true))
3950                       {
3951                         case 64:
3952                           i.tm.opcode_modifier.evex = EVEX512;
3953                           break;
3954                         case 32:
3955                           i.tm.opcode_modifier.evex = EVEX256;
3956                           break;
3957                         case 16:
3958                           i.tm.opcode_modifier.evex = EVEX128;
3959                           break;
3960                         default:
3961                           abort ();
3962                       }
3963                     break;
3964                   }
3965               }
3966
3967           if (op >= MAX_OPERANDS)
3968             abort ();
3969         }
3970
3971       switch (i.tm.opcode_modifier.evex)
3972         {
3973         case EVEXLIG: /* LL' is ignored */
3974           vec_length = evexlig << 5;
3975           break;
3976         case EVEX128:
3977           vec_length = 0 << 5;
3978           break;
3979         case EVEX256:
3980           vec_length = 1 << 5;
3981           break;
3982         case EVEX512:
3983           vec_length = 2 << 5;
3984           break;
3985         default:
3986           abort ();
3987           break;
3988         }
3989       i.vex.bytes[3] |= vec_length;
3990       /* Encode the broadcast bit.  */
3991       if (i.broadcast.bytes)
3992         i.vex.bytes[3] |= 0x10;
3993     }
3994   else if (i.rounding.type != saeonly)
3995     i.vex.bytes[3] |= 0x10 | (i.rounding.type << 5);
3996   else
3997     i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
3998
3999   if (i.mask.reg)
4000     i.vex.bytes[3] |= i.mask.reg->reg_num;
4001 }
4002
4003 static void
4004 process_immext (void)
4005 {
4006   expressionS *exp;
4007
4008   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
4009      which is coded in the same place as an 8-bit immediate field
4010      would be.  Here we fake an 8-bit immediate operand from the
4011      opcode suffix stored in tm.extension_opcode.
4012
4013      AVX instructions also use this encoding, for some of
4014      3 argument instructions.  */
4015
4016   gas_assert (i.imm_operands <= 1
4017               && (i.operands <= 2
4018                   || (is_any_vex_encoding (&i.tm)
4019                       && i.operands <= 4)));
4020
4021   exp = &im_expressions[i.imm_operands++];
4022   i.op[i.operands].imms = exp;
4023   i.types[i.operands].bitfield.imm8 = 1;
4024   i.operands++;
4025   exp->X_op = O_constant;
4026   exp->X_add_number = i.tm.extension_opcode;
4027   i.tm.extension_opcode = None;
4028 }
4029
4030
4031 static int
4032 check_hle (void)
4033 {
4034   switch (i.tm.opcode_modifier.prefixok)
4035     {
4036     default:
4037       abort ();
4038     case PrefixLock:
4039     case PrefixNone:
4040     case PrefixNoTrack:
4041     case PrefixRep:
4042       as_bad (_("invalid instruction `%s' after `%s'"),
4043               i.tm.name, i.hle_prefix);
4044       return 0;
4045     case PrefixHLELock:
4046       if (i.prefix[LOCK_PREFIX])
4047         return 1;
4048       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4049       return 0;
4050     case PrefixHLEAny:
4051       return 1;
4052     case PrefixHLERelease:
4053       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4054         {
4055           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4056                   i.tm.name);
4057           return 0;
4058         }
4059       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4060         {
4061           as_bad (_("memory destination needed for instruction `%s'"
4062                     " after `xrelease'"), i.tm.name);
4063           return 0;
4064         }
4065       return 1;
4066     }
4067 }
4068
4069 /* Encode aligned vector move as unaligned vector move.  */
4070
4071 static void
4072 encode_with_unaligned_vector_move (void)
4073 {
4074   switch (i.tm.base_opcode)
4075     {
4076     case 0x28:  /* Load instructions.  */
4077     case 0x29:  /* Store instructions.  */
4078       /* movaps/movapd/vmovaps/vmovapd.  */
4079       if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4080           && i.tm.opcode_modifier.opcodeprefix <= PREFIX_0X66)
4081         i.tm.base_opcode = 0x10 | (i.tm.base_opcode & 1);
4082       break;
4083     case 0x6f:  /* Load instructions.  */
4084     case 0x7f:  /* Store instructions.  */
4085       /* movdqa/vmovdqa/vmovdqa64/vmovdqa32. */
4086       if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4087           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66)
4088         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4089       break;
4090     default:
4091       break;
4092     }
4093 }
4094
4095 /* Try the shortest encoding by shortening operand size.  */
4096
4097 static void
4098 optimize_encoding (void)
4099 {
4100   unsigned int j;
4101
4102   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
4103       && i.tm.base_opcode == 0x8d)
4104     {
4105       /* Optimize: -O:
4106            lea symbol, %rN    -> mov $symbol, %rN
4107            lea (%rM), %rN     -> mov %rM, %rN
4108            lea (,%rM,1), %rN  -> mov %rM, %rN
4109
4110            and in 32-bit mode for 16-bit addressing
4111
4112            lea (%rM), %rN     -> movzx %rM, %rN
4113
4114            and in 64-bit mode zap 32-bit addressing in favor of using a
4115            32-bit (or less) destination.
4116        */
4117       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4118         {
4119           if (!i.op[1].regs->reg_type.bitfield.word)
4120             i.tm.opcode_modifier.size = SIZE32;
4121           i.prefix[ADDR_PREFIX] = 0;
4122         }
4123
4124       if (!i.index_reg && !i.base_reg)
4125         {
4126           /* Handle:
4127                lea symbol, %rN    -> mov $symbol, %rN
4128            */
4129           if (flag_code == CODE_64BIT)
4130             {
4131               /* Don't transform a relocation to a 16-bit one.  */
4132               if (i.op[0].disps
4133                   && i.op[0].disps->X_op != O_constant
4134                   && i.op[1].regs->reg_type.bitfield.word)
4135                 return;
4136
4137               if (!i.op[1].regs->reg_type.bitfield.qword
4138                   || i.tm.opcode_modifier.size == SIZE32)
4139                 {
4140                   i.tm.base_opcode = 0xb8;
4141                   i.tm.opcode_modifier.modrm = 0;
4142                   if (!i.op[1].regs->reg_type.bitfield.word)
4143                     i.types[0].bitfield.imm32 = 1;
4144                   else
4145                     {
4146                       i.tm.opcode_modifier.size = SIZE16;
4147                       i.types[0].bitfield.imm16 = 1;
4148                     }
4149                 }
4150               else
4151                 {
4152                   /* Subject to further optimization below.  */
4153                   i.tm.base_opcode = 0xc7;
4154                   i.tm.extension_opcode = 0;
4155                   i.types[0].bitfield.imm32s = 1;
4156                   i.types[0].bitfield.baseindex = 0;
4157                 }
4158             }
4159           /* Outside of 64-bit mode address and operand sizes have to match if
4160              a relocation is involved, as otherwise we wouldn't (currently) or
4161              even couldn't express the relocation correctly.  */
4162           else if (i.op[0].disps
4163                    && i.op[0].disps->X_op != O_constant
4164                    && ((!i.prefix[ADDR_PREFIX])
4165                        != (flag_code == CODE_32BIT
4166                            ? i.op[1].regs->reg_type.bitfield.dword
4167                            : i.op[1].regs->reg_type.bitfield.word)))
4168             return;
4169           /* In 16-bit mode converting LEA with 16-bit addressing and a 32-bit
4170              destination is going to grow encoding size.  */
4171           else if (flag_code == CODE_16BIT
4172                    && (optimize <= 1 || optimize_for_space)
4173                    && !i.prefix[ADDR_PREFIX]
4174                    && i.op[1].regs->reg_type.bitfield.dword)
4175             return;
4176           else
4177             {
4178               i.tm.base_opcode = 0xb8;
4179               i.tm.opcode_modifier.modrm = 0;
4180               if (i.op[1].regs->reg_type.bitfield.dword)
4181                 i.types[0].bitfield.imm32 = 1;
4182               else
4183                 i.types[0].bitfield.imm16 = 1;
4184
4185               if (i.op[0].disps
4186                   && i.op[0].disps->X_op == O_constant
4187                   && i.op[1].regs->reg_type.bitfield.dword
4188                   /* NB: Add () to !i.prefix[ADDR_PREFIX] to silence
4189                      GCC 5. */
4190                   && (!i.prefix[ADDR_PREFIX]) != (flag_code == CODE_32BIT))
4191                 i.op[0].disps->X_add_number &= 0xffff;
4192             }
4193
4194           i.tm.operand_types[0] = i.types[0];
4195           i.imm_operands = 1;
4196           if (!i.op[0].imms)
4197             {
4198               i.op[0].imms = &im_expressions[0];
4199               i.op[0].imms->X_op = O_absent;
4200             }
4201         }
4202       else if (i.op[0].disps
4203                   && (i.op[0].disps->X_op != O_constant
4204                       || i.op[0].disps->X_add_number))
4205         return;
4206       else
4207         {
4208           /* Handle:
4209                lea (%rM), %rN     -> mov %rM, %rN
4210                lea (,%rM,1), %rN  -> mov %rM, %rN
4211                lea (%rM), %rN     -> movzx %rM, %rN
4212            */
4213           const reg_entry *addr_reg;
4214
4215           if (!i.index_reg && i.base_reg->reg_num != RegIP)
4216             addr_reg = i.base_reg;
4217           else if (!i.base_reg
4218                    && i.index_reg->reg_num != RegIZ
4219                    && !i.log2_scale_factor)
4220             addr_reg = i.index_reg;
4221           else
4222             return;
4223
4224           if (addr_reg->reg_type.bitfield.word
4225               && i.op[1].regs->reg_type.bitfield.dword)
4226             {
4227               if (flag_code != CODE_32BIT)
4228                 return;
4229               i.tm.opcode_modifier.opcodespace = SPACE_0F;
4230               i.tm.base_opcode = 0xb7;
4231             }
4232           else
4233             i.tm.base_opcode = 0x8b;
4234
4235           if (addr_reg->reg_type.bitfield.dword
4236               && i.op[1].regs->reg_type.bitfield.qword)
4237             i.tm.opcode_modifier.size = SIZE32;
4238
4239           i.op[0].regs = addr_reg;
4240           i.reg_operands = 2;
4241         }
4242
4243       i.mem_operands = 0;
4244       i.disp_operands = 0;
4245       i.prefix[ADDR_PREFIX] = 0;
4246       i.prefix[SEG_PREFIX] = 0;
4247       i.seg[0] = NULL;
4248     }
4249
4250   if (optimize_for_space
4251       && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4252       && i.reg_operands == 1
4253       && i.imm_operands == 1
4254       && !i.types[1].bitfield.byte
4255       && i.op[0].imms->X_op == O_constant
4256       && fits_in_imm7 (i.op[0].imms->X_add_number)
4257       && (i.tm.base_opcode == 0xa8
4258           || (i.tm.base_opcode == 0xf6
4259               && i.tm.extension_opcode == 0x0)))
4260     {
4261       /* Optimize: -Os:
4262            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4263        */
4264       unsigned int base_regnum = i.op[1].regs->reg_num;
4265       if (flag_code == CODE_64BIT || base_regnum < 4)
4266         {
4267           i.types[1].bitfield.byte = 1;
4268           /* Ignore the suffix.  */
4269           i.suffix = 0;
4270           /* Convert to byte registers.  */
4271           if (i.types[1].bitfield.word)
4272             j = 16;
4273           else if (i.types[1].bitfield.dword)
4274             j = 32;
4275           else
4276             j = 48;
4277           if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4278             j += 8;
4279           i.op[1].regs -= j;
4280         }
4281     }
4282   else if (flag_code == CODE_64BIT
4283            && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4284            && ((i.types[1].bitfield.qword
4285                 && i.reg_operands == 1
4286                 && i.imm_operands == 1
4287                 && i.op[0].imms->X_op == O_constant
4288                 && ((i.tm.base_opcode == 0xb8
4289                      && i.tm.extension_opcode == None
4290                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4291                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4292                         && ((i.tm.base_opcode == 0x24
4293                              || i.tm.base_opcode == 0xa8)
4294                             || (i.tm.base_opcode == 0x80
4295                                 && i.tm.extension_opcode == 0x4)
4296                             || ((i.tm.base_opcode == 0xf6
4297                                  || (i.tm.base_opcode | 1) == 0xc7)
4298                                 && i.tm.extension_opcode == 0x0)))
4299                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4300                         && i.tm.base_opcode == 0x83
4301                         && i.tm.extension_opcode == 0x4)))
4302                || (i.types[0].bitfield.qword
4303                    && ((i.reg_operands == 2
4304                         && i.op[0].regs == i.op[1].regs
4305                         && (i.tm.base_opcode == 0x30
4306                             || i.tm.base_opcode == 0x28))
4307                        || (i.reg_operands == 1
4308                            && i.operands == 1
4309                            && i.tm.base_opcode == 0x30)))))
4310     {
4311       /* Optimize: -O:
4312            andq $imm31, %r64   -> andl $imm31, %r32
4313            andq $imm7, %r64    -> andl $imm7, %r32
4314            testq $imm31, %r64  -> testl $imm31, %r32
4315            xorq %r64, %r64     -> xorl %r32, %r32
4316            subq %r64, %r64     -> subl %r32, %r32
4317            movq $imm31, %r64   -> movl $imm31, %r32
4318            movq $imm32, %r64   -> movl $imm32, %r32
4319         */
4320       i.tm.opcode_modifier.norex64 = 1;
4321       if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)
4322         {
4323           /* Handle
4324                movq $imm31, %r64   -> movl $imm31, %r32
4325                movq $imm32, %r64   -> movl $imm32, %r32
4326            */
4327           i.tm.operand_types[0].bitfield.imm32 = 1;
4328           i.tm.operand_types[0].bitfield.imm32s = 0;
4329           i.tm.operand_types[0].bitfield.imm64 = 0;
4330           i.types[0].bitfield.imm32 = 1;
4331           i.types[0].bitfield.imm32s = 0;
4332           i.types[0].bitfield.imm64 = 0;
4333           i.types[1].bitfield.dword = 1;
4334           i.types[1].bitfield.qword = 0;
4335           if ((i.tm.base_opcode | 1) == 0xc7)
4336             {
4337               /* Handle
4338                    movq $imm31, %r64   -> movl $imm31, %r32
4339                */
4340               i.tm.base_opcode = 0xb8;
4341               i.tm.extension_opcode = None;
4342               i.tm.opcode_modifier.w = 0;
4343               i.tm.opcode_modifier.modrm = 0;
4344             }
4345         }
4346     }
4347   else if (optimize > 1
4348            && !optimize_for_space
4349            && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4350            && i.reg_operands == 2
4351            && i.op[0].regs == i.op[1].regs
4352            && ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8
4353                || (i.tm.base_opcode & ~(Opcode_D | 1)) == 0x20)
4354            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4355     {
4356       /* Optimize: -O2:
4357            andb %rN, %rN  -> testb %rN, %rN
4358            andw %rN, %rN  -> testw %rN, %rN
4359            andq %rN, %rN  -> testq %rN, %rN
4360            orb %rN, %rN   -> testb %rN, %rN
4361            orw %rN, %rN   -> testw %rN, %rN
4362            orq %rN, %rN   -> testq %rN, %rN
4363
4364            and outside of 64-bit mode
4365
4366            andl %rN, %rN  -> testl %rN, %rN
4367            orl %rN, %rN   -> testl %rN, %rN
4368        */
4369       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4370     }
4371   else if (i.reg_operands == 3
4372            && i.op[0].regs == i.op[1].regs
4373            && !i.types[2].bitfield.xmmword
4374            && (i.tm.opcode_modifier.vex
4375                || ((!i.mask.reg || i.mask.zeroing)
4376                    && is_evex_encoding (&i.tm)
4377                    && (i.vec_encoding != vex_encoding_evex
4378                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4379                        || i.tm.cpu_flags.bitfield.cpuavx512vl
4380                        || (i.tm.operand_types[2].bitfield.zmmword
4381                            && i.types[2].bitfield.ymmword))))
4382            && i.tm.opcode_modifier.opcodespace == SPACE_0F
4383            && ((i.tm.base_opcode | 2) == 0x57
4384                || i.tm.base_opcode == 0xdf
4385                || i.tm.base_opcode == 0xef
4386                || (i.tm.base_opcode | 3) == 0xfb
4387                || i.tm.base_opcode == 0x42
4388                || i.tm.base_opcode == 0x47))
4389     {
4390       /* Optimize: -O1:
4391            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4392            vpsubq and vpsubw:
4393              EVEX VOP %zmmM, %zmmM, %zmmN
4394                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4395                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4396              EVEX VOP %ymmM, %ymmM, %ymmN
4397                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4398                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4399              VEX VOP %ymmM, %ymmM, %ymmN
4400                -> VEX VOP %xmmM, %xmmM, %xmmN
4401            VOP, one of vpandn and vpxor:
4402              VEX VOP %ymmM, %ymmM, %ymmN
4403                -> VEX VOP %xmmM, %xmmM, %xmmN
4404            VOP, one of vpandnd and vpandnq:
4405              EVEX VOP %zmmM, %zmmM, %zmmN
4406                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4407                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4408              EVEX VOP %ymmM, %ymmM, %ymmN
4409                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4410                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4411            VOP, one of vpxord and vpxorq:
4412              EVEX VOP %zmmM, %zmmM, %zmmN
4413                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4414                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4415              EVEX VOP %ymmM, %ymmM, %ymmN
4416                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4417                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4418            VOP, one of kxord and kxorq:
4419              VEX VOP %kM, %kM, %kN
4420                -> VEX kxorw %kM, %kM, %kN
4421            VOP, one of kandnd and kandnq:
4422              VEX VOP %kM, %kM, %kN
4423                -> VEX kandnw %kM, %kM, %kN
4424        */
4425       if (is_evex_encoding (&i.tm))
4426         {
4427           if (i.vec_encoding != vex_encoding_evex)
4428             {
4429               i.tm.opcode_modifier.vex = VEX128;
4430               i.tm.opcode_modifier.vexw = VEXW0;
4431               i.tm.opcode_modifier.evex = 0;
4432             }
4433           else if (optimize > 1)
4434             i.tm.opcode_modifier.evex = EVEX128;
4435           else
4436             return;
4437         }
4438       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4439         {
4440           i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
4441           i.tm.opcode_modifier.vexw = VEXW0;
4442         }
4443       else
4444         i.tm.opcode_modifier.vex = VEX128;
4445
4446       if (i.tm.opcode_modifier.vex)
4447         for (j = 0; j < 3; j++)
4448           {
4449             i.types[j].bitfield.xmmword = 1;
4450             i.types[j].bitfield.ymmword = 0;
4451           }
4452     }
4453   else if (i.vec_encoding != vex_encoding_evex
4454            && !i.types[0].bitfield.zmmword
4455            && !i.types[1].bitfield.zmmword
4456            && !i.mask.reg
4457            && !i.broadcast.bytes
4458            && is_evex_encoding (&i.tm)
4459            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4460                || (i.tm.base_opcode & ~4) == 0xdb
4461                || (i.tm.base_opcode & ~4) == 0xeb)
4462            && i.tm.extension_opcode == None)
4463     {
4464       /* Optimize: -O1:
4465            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4466            vmovdqu32 and vmovdqu64:
4467              EVEX VOP %xmmM, %xmmN
4468                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4469              EVEX VOP %ymmM, %ymmN
4470                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4471              EVEX VOP %xmmM, mem
4472                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4473              EVEX VOP %ymmM, mem
4474                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4475              EVEX VOP mem, %xmmN
4476                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4477              EVEX VOP mem, %ymmN
4478                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4479            VOP, one of vpand, vpandn, vpor, vpxor:
4480              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4481                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4482              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4483                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4484              EVEX VOP{d,q} mem, %xmmM, %xmmN
4485                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4486              EVEX VOP{d,q} mem, %ymmM, %ymmN
4487                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4488        */
4489       for (j = 0; j < i.operands; j++)
4490         if (operand_type_check (i.types[j], disp)
4491             && i.op[j].disps->X_op == O_constant)
4492           {
4493             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4494                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4495                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4496             int evex_disp8, vex_disp8;
4497             unsigned int memshift = i.memshift;
4498             offsetT n = i.op[j].disps->X_add_number;
4499
4500             evex_disp8 = fits_in_disp8 (n);
4501             i.memshift = 0;
4502             vex_disp8 = fits_in_disp8 (n);
4503             if (evex_disp8 != vex_disp8)
4504               {
4505                 i.memshift = memshift;
4506                 return;
4507               }
4508
4509             i.types[j].bitfield.disp8 = vex_disp8;
4510             break;
4511           }
4512       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4513           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
4514         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4515       i.tm.opcode_modifier.vex
4516         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4517       i.tm.opcode_modifier.vexw = VEXW0;
4518       /* VPAND, VPOR, and VPXOR are commutative.  */
4519       if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
4520         i.tm.opcode_modifier.commutative = 1;
4521       i.tm.opcode_modifier.evex = 0;
4522       i.tm.opcode_modifier.masking = 0;
4523       i.tm.opcode_modifier.broadcast = 0;
4524       i.tm.opcode_modifier.disp8memshift = 0;
4525       i.memshift = 0;
4526       if (j < i.operands)
4527         i.types[j].bitfield.disp8
4528           = fits_in_disp8 (i.op[j].disps->X_add_number);
4529     }
4530 }
4531
4532 /* Return non-zero for load instruction.  */
4533
4534 static int
4535 load_insn_p (void)
4536 {
4537   unsigned int dest;
4538   int any_vex_p = is_any_vex_encoding (&i.tm);
4539   unsigned int base_opcode = i.tm.base_opcode | 1;
4540
4541   if (!any_vex_p)
4542     {
4543       /* Anysize insns: lea, invlpg, clflush, prefetch*, bndmk, bndcl, bndcu,
4544          bndcn, bndstx, bndldx, clflushopt, clwb, cldemote.  */
4545       if (i.tm.opcode_modifier.operandconstraint == ANY_SIZE)
4546         return 0;
4547
4548       /* pop.   */
4549       if (strcmp (i.tm.name, "pop") == 0)
4550         return 1;
4551     }
4552
4553   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE)
4554     {
4555       /* popf, popa.   */
4556       if (i.tm.base_opcode == 0x9d
4557           || i.tm.base_opcode == 0x61)
4558         return 1;
4559
4560       /* movs, cmps, lods, scas.  */
4561       if ((i.tm.base_opcode | 0xb) == 0xaf)
4562         return 1;
4563
4564       /* outs, xlatb.  */
4565       if (base_opcode == 0x6f
4566           || i.tm.base_opcode == 0xd7)
4567         return 1;
4568       /* NB: For AMD-specific insns with implicit memory operands,
4569          they're intentionally not covered.  */
4570     }
4571
4572   /* No memory operand.  */
4573   if (!i.mem_operands)
4574     return 0;
4575
4576   if (any_vex_p)
4577     {
4578       /* vldmxcsr.  */
4579       if (i.tm.base_opcode == 0xae
4580           && i.tm.opcode_modifier.vex
4581           && i.tm.opcode_modifier.opcodespace == SPACE_0F
4582           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4583           && i.tm.extension_opcode == 2)
4584         return 1;
4585     }
4586   else if (i.tm.opcode_modifier.opcodespace == SPACE_BASE)
4587     {
4588       /* test, not, neg, mul, imul, div, idiv.  */
4589       if ((i.tm.base_opcode == 0xf6 || i.tm.base_opcode == 0xf7)
4590           && i.tm.extension_opcode != 1)
4591         return 1;
4592
4593       /* inc, dec.  */
4594       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
4595         return 1;
4596
4597       /* add, or, adc, sbb, and, sub, xor, cmp.  */
4598       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
4599         return 1;
4600
4601       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
4602       if ((base_opcode == 0xc1
4603            || (i.tm.base_opcode >= 0xd0 && i.tm.base_opcode <= 0xd3))
4604           && i.tm.extension_opcode != 6)
4605         return 1;
4606
4607       /* Check for x87 instructions.  */
4608       if (base_opcode >= 0xd8 && base_opcode <= 0xdf)
4609         {
4610           /* Skip fst, fstp, fstenv, fstcw.  */
4611           if (i.tm.base_opcode == 0xd9
4612               && (i.tm.extension_opcode == 2
4613                   || i.tm.extension_opcode == 3
4614                   || i.tm.extension_opcode == 6
4615                   || i.tm.extension_opcode == 7))
4616             return 0;
4617
4618           /* Skip fisttp, fist, fistp, fstp.  */
4619           if (i.tm.base_opcode == 0xdb
4620               && (i.tm.extension_opcode == 1
4621                   || i.tm.extension_opcode == 2
4622                   || i.tm.extension_opcode == 3
4623                   || i.tm.extension_opcode == 7))
4624             return 0;
4625
4626           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
4627           if (i.tm.base_opcode == 0xdd
4628               && (i.tm.extension_opcode == 1
4629                   || i.tm.extension_opcode == 2
4630                   || i.tm.extension_opcode == 3
4631                   || i.tm.extension_opcode == 6
4632                   || i.tm.extension_opcode == 7))
4633             return 0;
4634
4635           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
4636           if (i.tm.base_opcode == 0xdf
4637               && (i.tm.extension_opcode == 1
4638                   || i.tm.extension_opcode == 2
4639                   || i.tm.extension_opcode == 3
4640                   || i.tm.extension_opcode == 6
4641                   || i.tm.extension_opcode == 7))
4642             return 0;
4643
4644           return 1;
4645         }
4646     }
4647   else if (i.tm.opcode_modifier.opcodespace == SPACE_0F)
4648     {
4649       /* bt, bts, btr, btc.  */
4650       if (i.tm.base_opcode == 0xba
4651           && (i.tm.extension_opcode >= 4 && i.tm.extension_opcode <= 7))
4652         return 1;
4653
4654       /* cmpxchg8b, cmpxchg16b, xrstors, vmptrld.  */
4655       if (i.tm.base_opcode == 0xc7
4656           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4657           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3
4658               || i.tm.extension_opcode == 6))
4659         return 1;
4660
4661       /* fxrstor, ldmxcsr, xrstor.  */
4662       if (i.tm.base_opcode == 0xae
4663           && (i.tm.extension_opcode == 1
4664               || i.tm.extension_opcode == 2
4665               || i.tm.extension_opcode == 5))
4666         return 1;
4667
4668       /* lgdt, lidt, lmsw.  */
4669       if (i.tm.base_opcode == 0x01
4670           && (i.tm.extension_opcode == 2
4671               || i.tm.extension_opcode == 3
4672               || i.tm.extension_opcode == 6))
4673         return 1;
4674     }
4675
4676   dest = i.operands - 1;
4677
4678   /* Check fake imm8 operand and 3 source operands.  */
4679   if ((i.tm.opcode_modifier.immext
4680        || i.tm.opcode_modifier.vexsources == VEX3SOURCES)
4681       && i.types[dest].bitfield.imm8)
4682     dest--;
4683
4684   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg.  */
4685   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
4686       && (base_opcode == 0x1
4687           || base_opcode == 0x9
4688           || base_opcode == 0x11
4689           || base_opcode == 0x19
4690           || base_opcode == 0x21
4691           || base_opcode == 0x29
4692           || base_opcode == 0x31
4693           || base_opcode == 0x39
4694           || (base_opcode | 2) == 0x87))
4695     return 1;
4696
4697   /* xadd.  */
4698   if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4699       && base_opcode == 0xc1)
4700     return 1;
4701
4702   /* Check for load instruction.  */
4703   return (i.types[dest].bitfield.class != ClassNone
4704           || i.types[dest].bitfield.instance == Accum);
4705 }
4706
4707 /* Output lfence, 0xfaee8, after instruction.  */
4708
4709 static void
4710 insert_lfence_after (void)
4711 {
4712   if (lfence_after_load && load_insn_p ())
4713     {
4714       /* There are also two REP string instructions that require
4715          special treatment. Specifically, the compare string (CMPS)
4716          and scan string (SCAS) instructions set EFLAGS in a manner
4717          that depends on the data being compared/scanned. When used
4718          with a REP prefix, the number of iterations may therefore
4719          vary depending on this data. If the data is a program secret
4720          chosen by the adversary using an LVI method,
4721          then this data-dependent behavior may leak some aspect
4722          of the secret.  */
4723       if (((i.tm.base_opcode | 0x1) == 0xa7
4724            || (i.tm.base_opcode | 0x1) == 0xaf)
4725           && i.prefix[REP_PREFIX])
4726         {
4727             as_warn (_("`%s` changes flags which would affect control flow behavior"),
4728                      i.tm.name);
4729         }
4730       char *p = frag_more (3);
4731       *p++ = 0xf;
4732       *p++ = 0xae;
4733       *p = 0xe8;
4734     }
4735 }
4736
4737 /* Output lfence, 0xfaee8, before instruction.  */
4738
4739 static void
4740 insert_lfence_before (void)
4741 {
4742   char *p;
4743
4744   if (i.tm.opcode_modifier.opcodespace != SPACE_BASE)
4745     return;
4746
4747   if (i.tm.base_opcode == 0xff
4748       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
4749     {
4750       /* Insert lfence before indirect branch if needed.  */
4751
4752       if (lfence_before_indirect_branch == lfence_branch_none)
4753         return;
4754
4755       if (i.operands != 1)
4756         abort ();
4757
4758       if (i.reg_operands == 1)
4759         {
4760           /* Indirect branch via register.  Don't insert lfence with
4761              -mlfence-after-load=yes.  */
4762           if (lfence_after_load
4763               || lfence_before_indirect_branch == lfence_branch_memory)
4764             return;
4765         }
4766       else if (i.mem_operands == 1
4767                && lfence_before_indirect_branch != lfence_branch_register)
4768         {
4769           as_warn (_("indirect `%s` with memory operand should be avoided"),
4770                    i.tm.name);
4771           return;
4772         }
4773       else
4774         return;
4775
4776       if (last_insn.kind != last_insn_other
4777           && last_insn.seg == now_seg)
4778         {
4779           as_warn_where (last_insn.file, last_insn.line,
4780                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
4781                          last_insn.name, i.tm.name);
4782           return;
4783         }
4784
4785       p = frag_more (3);
4786       *p++ = 0xf;
4787       *p++ = 0xae;
4788       *p = 0xe8;
4789       return;
4790     }
4791
4792   /* Output or/not/shl and lfence before near ret.  */
4793   if (lfence_before_ret != lfence_before_ret_none
4794       && (i.tm.base_opcode == 0xc2
4795           || i.tm.base_opcode == 0xc3))
4796     {
4797       if (last_insn.kind != last_insn_other
4798           && last_insn.seg == now_seg)
4799         {
4800           as_warn_where (last_insn.file, last_insn.line,
4801                          _("`%s` skips -mlfence-before-ret on `%s`"),
4802                          last_insn.name, i.tm.name);
4803           return;
4804         }
4805
4806       /* Near ret ingore operand size override under CPU64.  */
4807       char prefix = flag_code == CODE_64BIT
4808                     ? 0x48
4809                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
4810
4811       if (lfence_before_ret == lfence_before_ret_not)
4812         {
4813           /* not: 0xf71424, may add prefix
4814              for operand size override or 64-bit code.  */
4815           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
4816           if (prefix)
4817             *p++ = prefix;
4818           *p++ = 0xf7;
4819           *p++ = 0x14;
4820           *p++ = 0x24;
4821           if (prefix)
4822             *p++ = prefix;
4823           *p++ = 0xf7;
4824           *p++ = 0x14;
4825           *p++ = 0x24;
4826         }
4827       else
4828         {
4829           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
4830           if (prefix)
4831             *p++ = prefix;
4832           if (lfence_before_ret == lfence_before_ret_or)
4833             {
4834               /* or: 0x830c2400, may add prefix
4835                  for operand size override or 64-bit code.  */
4836               *p++ = 0x83;
4837               *p++ = 0x0c;
4838             }
4839           else
4840             {
4841               /* shl: 0xc1242400, may add prefix
4842                  for operand size override or 64-bit code.  */
4843               *p++ = 0xc1;
4844               *p++ = 0x24;
4845             }
4846
4847           *p++ = 0x24;
4848           *p++ = 0x0;
4849         }
4850
4851       *p++ = 0xf;
4852       *p++ = 0xae;
4853       *p = 0xe8;
4854     }
4855 }
4856
4857 /* This is the guts of the machine-dependent assembler.  LINE points to a
4858    machine dependent instruction.  This function is supposed to emit
4859    the frags/bytes it assembles to.  */
4860
4861 void
4862 md_assemble (char *line)
4863 {
4864   unsigned int j;
4865   char mnemonic[MAX_MNEM_SIZE], mnem_suffix;
4866   const insn_template *t;
4867
4868   /* Initialize globals.  */
4869   memset (&i, '\0', sizeof (i));
4870   i.rounding.type = rc_none;
4871   for (j = 0; j < MAX_OPERANDS; j++)
4872     i.reloc[j] = NO_RELOC;
4873   memset (disp_expressions, '\0', sizeof (disp_expressions));
4874   memset (im_expressions, '\0', sizeof (im_expressions));
4875   save_stack_p = save_stack;
4876
4877   /* First parse an instruction mnemonic & call i386_operand for the operands.
4878      We assume that the scrubber has arranged it so that line[0] is the valid
4879      start of a (possibly prefixed) mnemonic.  */
4880
4881   line = parse_insn (line, mnemonic);
4882   if (line == NULL)
4883     return;
4884   mnem_suffix = i.suffix;
4885
4886   line = parse_operands (line, mnemonic);
4887   this_operand = -1;
4888   xfree (i.memop1_string);
4889   i.memop1_string = NULL;
4890   if (line == NULL)
4891     return;
4892
4893   /* Now we've parsed the mnemonic into a set of templates, and have the
4894      operands at hand.  */
4895
4896   /* All Intel opcodes have reversed operands except for "bound", "enter",
4897      "invlpg*", "monitor*", "mwait*", "tpause", "umwait", "pvalidate",
4898      "rmpadjust", "rmpupdate", and "rmpquery".  We also don't reverse
4899      intersegment "jmp" and "call" instructions with 2 immediate operands so
4900      that the immediate segment precedes the offset consistently in Intel and
4901      AT&T modes.  */
4902   if (intel_syntax
4903       && i.operands > 1
4904       && (strcmp (mnemonic, "bound") != 0)
4905       && (strncmp (mnemonic, "invlpg", 6) != 0)
4906       && !startswith (mnemonic, "monitor")
4907       && !startswith (mnemonic, "mwait")
4908       && (strcmp (mnemonic, "pvalidate") != 0)
4909       && !startswith (mnemonic, "rmp")
4910       && (strcmp (mnemonic, "tpause") != 0)
4911       && (strcmp (mnemonic, "umwait") != 0)
4912       && !(i.operands == 2
4913            && operand_type_check (i.types[0], imm)
4914            && operand_type_check (i.types[1], imm)))
4915     swap_operands ();
4916
4917   /* The order of the immediates should be reversed
4918      for 2 immediates extrq and insertq instructions */
4919   if (i.imm_operands == 2
4920       && (strcmp (mnemonic, "extrq") == 0
4921           || strcmp (mnemonic, "insertq") == 0))
4922       swap_2_operands (0, 1);
4923
4924   if (i.imm_operands)
4925     optimize_imm ();
4926
4927   if (i.disp_operands && !want_disp32 (current_templates->start)
4928       && (!current_templates->start->opcode_modifier.jump
4929           || i.jumpabsolute || i.types[0].bitfield.baseindex))
4930     {
4931       for (j = 0; j < i.operands; ++j)
4932         {
4933           const expressionS *exp = i.op[j].disps;
4934
4935           if (!operand_type_check (i.types[j], disp))
4936             continue;
4937
4938           if (exp->X_op != O_constant)
4939             continue;
4940
4941           /* Since displacement is signed extended to 64bit, don't allow
4942              disp32 if it is out of range.  */
4943           if (fits_in_signed_long (exp->X_add_number))
4944             continue;
4945
4946           i.types[j].bitfield.disp32 = 0;
4947           if (i.types[j].bitfield.baseindex)
4948             {
4949               as_bad (_("0x%" PRIx64 " out of range of signed 32bit displacement"),
4950                       (uint64_t) exp->X_add_number);
4951               return;
4952             }
4953         }
4954     }
4955
4956   /* Don't optimize displacement for movabs since it only takes 64bit
4957      displacement.  */
4958   if (i.disp_operands
4959       && i.disp_encoding <= disp_encoding_8bit
4960       && (flag_code != CODE_64BIT
4961           || strcmp (mnemonic, "movabs") != 0))
4962     optimize_disp ();
4963
4964   /* Next, we find a template that matches the given insn,
4965      making sure the overlap of the given operands types is consistent
4966      with the template operand types.  */
4967
4968   if (!(t = match_template (mnem_suffix)))
4969     return;
4970
4971   if (sse_check != check_none
4972       /* The opcode space check isn't strictly needed; it's there only to
4973          bypass the logic below when easily possible.  */
4974       && t->opcode_modifier.opcodespace >= SPACE_0F
4975       && t->opcode_modifier.opcodespace <= SPACE_0F3A
4976       && !i.tm.cpu_flags.bitfield.cpusse4a
4977       && !is_any_vex_encoding (t))
4978     {
4979       bool simd = false;
4980
4981       for (j = 0; j < t->operands; ++j)
4982         {
4983           if (t->operand_types[j].bitfield.class == RegMMX)
4984             break;
4985           if (t->operand_types[j].bitfield.class == RegSIMD)
4986             simd = true;
4987         }
4988
4989       if (j >= t->operands && simd)
4990         (sse_check == check_warning
4991          ? as_warn
4992          : as_bad) (_("SSE instruction `%s' is used"), i.tm.name);
4993     }
4994
4995   if (i.tm.opcode_modifier.fwait)
4996     if (!add_prefix (FWAIT_OPCODE))
4997       return;
4998
4999   /* Check if REP prefix is OK.  */
5000   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep)
5001     {
5002       as_bad (_("invalid instruction `%s' after `%s'"),
5003                 i.tm.name, i.rep_prefix);
5004       return;
5005     }
5006
5007   /* Check for lock without a lockable instruction.  Destination operand
5008      must be memory unless it is xchg (0x86).  */
5009   if (i.prefix[LOCK_PREFIX]
5010       && (i.tm.opcode_modifier.prefixok < PrefixLock
5011           || i.mem_operands == 0
5012           || (i.tm.base_opcode != 0x86
5013               && !(i.flags[i.operands - 1] & Operand_Mem))))
5014     {
5015       as_bad (_("expecting lockable instruction after `lock'"));
5016       return;
5017     }
5018
5019   /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
5020   if (i.prefix[DATA_PREFIX]
5021       && (is_any_vex_encoding (&i.tm)
5022           || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
5023           || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX))
5024     {
5025       as_bad (_("data size prefix invalid with `%s'"), i.tm.name);
5026       return;
5027     }
5028
5029   /* Check if HLE prefix is OK.  */
5030   if (i.hle_prefix && !check_hle ())
5031     return;
5032
5033   /* Check BND prefix.  */
5034   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
5035     as_bad (_("expecting valid branch instruction after `bnd'"));
5036
5037   /* Check NOTRACK prefix.  */
5038   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
5039     as_bad (_("expecting indirect branch instruction after `notrack'"));
5040
5041   if (i.tm.cpu_flags.bitfield.cpumpx)
5042     {
5043       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
5044         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
5045       else if (flag_code != CODE_16BIT
5046                ? i.prefix[ADDR_PREFIX]
5047                : i.mem_operands && !i.prefix[ADDR_PREFIX])
5048         as_bad (_("16-bit address isn't allowed in MPX instructions"));
5049     }
5050
5051   /* Insert BND prefix.  */
5052   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
5053     {
5054       if (!i.prefix[BND_PREFIX])
5055         add_prefix (BND_PREFIX_OPCODE);
5056       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
5057         {
5058           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
5059           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
5060         }
5061     }
5062
5063   /* Check string instruction segment overrides.  */
5064   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
5065     {
5066       gas_assert (i.mem_operands);
5067       if (!check_string ())
5068         return;
5069       i.disp_operands = 0;
5070     }
5071
5072   /* The memory operand of (%dx) should be only used with input/output
5073      instructions (base opcodes: 0x6c, 0x6e, 0xec, 0xee).  */
5074   if (i.input_output_operand
5075       && ((i.tm.base_opcode | 0x82) != 0xee
5076           || i.tm.opcode_modifier.opcodespace != SPACE_BASE))
5077     {
5078       as_bad (_("input/output port address isn't allowed with `%s'"),
5079               i.tm.name);
5080       return;
5081     }
5082
5083   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
5084     optimize_encoding ();
5085
5086   if (use_unaligned_vector_move)
5087     encode_with_unaligned_vector_move ();
5088
5089   if (!process_suffix ())
5090     return;
5091
5092   /* Check if IP-relative addressing requirements can be satisfied.  */
5093   if (i.tm.cpu_flags.bitfield.cpuprefetchi
5094       && !(i.base_reg && i.base_reg->reg_num == RegIP))
5095     as_warn (_("'%s' only supports RIP-relative address"), i.tm.name);
5096
5097   /* Update operand types and check extended states.  */
5098   for (j = 0; j < i.operands; j++)
5099     {
5100       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
5101       switch (i.tm.operand_types[j].bitfield.class)
5102         {
5103         default:
5104           break;
5105         case RegMMX:
5106           i.xstate |= xstate_mmx;
5107           break;
5108         case RegMask:
5109           i.xstate |= xstate_mask;
5110           break;
5111         case RegSIMD:
5112           if (i.tm.operand_types[j].bitfield.tmmword)
5113             i.xstate |= xstate_tmm;
5114           else if (i.tm.operand_types[j].bitfield.zmmword)
5115             i.xstate |= xstate_zmm;
5116           else if (i.tm.operand_types[j].bitfield.ymmword)
5117             i.xstate |= xstate_ymm;
5118           else if (i.tm.operand_types[j].bitfield.xmmword)
5119             i.xstate |= xstate_xmm;
5120           break;
5121         }
5122     }
5123
5124   /* Make still unresolved immediate matches conform to size of immediate
5125      given in i.suffix.  */
5126   if (!finalize_imm ())
5127     return;
5128
5129   if (i.types[0].bitfield.imm1)
5130     i.imm_operands = 0; /* kludge for shift insns.  */
5131
5132   /* We only need to check those implicit registers for instructions
5133      with 3 operands or less.  */
5134   if (i.operands <= 3)
5135     for (j = 0; j < i.operands; j++)
5136       if (i.types[j].bitfield.instance != InstanceNone
5137           && !i.types[j].bitfield.xmmword)
5138         i.reg_operands--;
5139
5140   /* For insns with operands there are more diddles to do to the opcode.  */
5141   if (i.operands)
5142     {
5143       if (!process_operands ())
5144         return;
5145     }
5146   else if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
5147     {
5148       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
5149       as_warn (_("translating to `%sp'"), i.tm.name);
5150     }
5151
5152   if (is_any_vex_encoding (&i.tm))
5153     {
5154       if (!cpu_arch_flags.bitfield.cpui286)
5155         {
5156           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
5157                   i.tm.name);
5158           return;
5159         }
5160
5161       /* Check for explicit REX prefix.  */
5162       if (i.prefix[REX_PREFIX] || i.rex_encoding)
5163         {
5164           as_bad (_("REX prefix invalid with `%s'"), i.tm.name);
5165           return;
5166         }
5167
5168       if (i.tm.opcode_modifier.vex)
5169         build_vex_prefix (t);
5170       else
5171         build_evex_prefix ();
5172
5173       /* The individual REX.RXBW bits got consumed.  */
5174       i.rex &= REX_OPCODE;
5175     }
5176
5177   /* Handle conversion of 'int $3' --> special int3 insn.  XOP or FMA4
5178      instructions may define INT_OPCODE as well, so avoid this corner
5179      case for those instructions that use MODRM.  */
5180   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
5181       && i.tm.base_opcode == INT_OPCODE
5182       && !i.tm.opcode_modifier.modrm
5183       && i.op[0].imms->X_add_number == 3)
5184     {
5185       i.tm.base_opcode = INT3_OPCODE;
5186       i.imm_operands = 0;
5187     }
5188
5189   if ((i.tm.opcode_modifier.jump == JUMP
5190        || i.tm.opcode_modifier.jump == JUMP_BYTE
5191        || i.tm.opcode_modifier.jump == JUMP_DWORD)
5192       && i.op[0].disps->X_op == O_constant)
5193     {
5194       /* Convert "jmp constant" (and "call constant") to a jump (call) to
5195          the absolute address given by the constant.  Since ix86 jumps and
5196          calls are pc relative, we need to generate a reloc.  */
5197       i.op[0].disps->X_add_symbol = &abs_symbol;
5198       i.op[0].disps->X_op = O_symbol;
5199     }
5200
5201   /* For 8 bit registers we need an empty rex prefix.  Also if the
5202      instruction already has a prefix, we need to convert old
5203      registers to new ones.  */
5204
5205   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
5206        && (i.op[0].regs->reg_flags & RegRex64) != 0)
5207       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
5208           && (i.op[1].regs->reg_flags & RegRex64) != 0)
5209       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
5210            || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
5211           && i.rex != 0))
5212     {
5213       int x;
5214
5215       i.rex |= REX_OPCODE;
5216       for (x = 0; x < 2; x++)
5217         {
5218           /* Look for 8 bit operand that uses old registers.  */
5219           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
5220               && (i.op[x].regs->reg_flags & RegRex64) == 0)
5221             {
5222               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5223               /* In case it is "hi" register, give up.  */
5224               if (i.op[x].regs->reg_num > 3)
5225                 as_bad (_("can't encode register '%s%s' in an "
5226                           "instruction requiring REX prefix."),
5227                         register_prefix, i.op[x].regs->reg_name);
5228
5229               /* Otherwise it is equivalent to the extended register.
5230                  Since the encoding doesn't change this is merely
5231                  cosmetic cleanup for debug output.  */
5232
5233               i.op[x].regs = i.op[x].regs + 8;
5234             }
5235         }
5236     }
5237
5238   if (i.rex == 0 && i.rex_encoding)
5239     {
5240       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
5241          that uses legacy register.  If it is "hi" register, don't add
5242          the REX_OPCODE byte.  */
5243       int x;
5244       for (x = 0; x < 2; x++)
5245         if (i.types[x].bitfield.class == Reg
5246             && i.types[x].bitfield.byte
5247             && (i.op[x].regs->reg_flags & RegRex64) == 0
5248             && i.op[x].regs->reg_num > 3)
5249           {
5250             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5251             i.rex_encoding = false;
5252             break;
5253           }
5254
5255       if (i.rex_encoding)
5256         i.rex = REX_OPCODE;
5257     }
5258
5259   if (i.rex != 0)
5260     add_prefix (REX_OPCODE | i.rex);
5261
5262   insert_lfence_before ();
5263
5264   /* We are ready to output the insn.  */
5265   output_insn ();
5266
5267   insert_lfence_after ();
5268
5269   last_insn.seg = now_seg;
5270
5271   if (i.tm.opcode_modifier.isprefix)
5272     {
5273       last_insn.kind = last_insn_prefix;
5274       last_insn.name = i.tm.name;
5275       last_insn.file = as_where (&last_insn.line);
5276     }
5277   else
5278     last_insn.kind = last_insn_other;
5279 }
5280
5281 static char *
5282 parse_insn (char *line, char *mnemonic)
5283 {
5284   char *l = line;
5285   char *token_start = l;
5286   char *mnem_p;
5287   int supported;
5288   const insn_template *t;
5289   char *dot_p = NULL;
5290
5291   while (1)
5292     {
5293       mnem_p = mnemonic;
5294       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
5295         {
5296           if (*mnem_p == '.')
5297             dot_p = mnem_p;
5298           mnem_p++;
5299           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5300             {
5301               as_bad (_("no such instruction: `%s'"), token_start);
5302               return NULL;
5303             }
5304           l++;
5305         }
5306       if (!is_space_char (*l)
5307           && *l != END_OF_INSN
5308           && (intel_syntax
5309               || (*l != PREFIX_SEPARATOR
5310                   && *l != ',')))
5311         {
5312           as_bad (_("invalid character %s in mnemonic"),
5313                   output_invalid (*l));
5314           return NULL;
5315         }
5316       if (token_start == l)
5317         {
5318           if (!intel_syntax && *l == PREFIX_SEPARATOR)
5319             as_bad (_("expecting prefix; got nothing"));
5320           else
5321             as_bad (_("expecting mnemonic; got nothing"));
5322           return NULL;
5323         }
5324
5325       /* Look up instruction (or prefix) via hash table.  */
5326       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5327
5328       if (*l != END_OF_INSN
5329           && (!is_space_char (*l) || l[1] != END_OF_INSN)
5330           && current_templates
5331           && current_templates->start->opcode_modifier.isprefix)
5332         {
5333           if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
5334             {
5335               as_bad ((flag_code != CODE_64BIT
5336                        ? _("`%s' is only supported in 64-bit mode")
5337                        : _("`%s' is not supported in 64-bit mode")),
5338                       current_templates->start->name);
5339               return NULL;
5340             }
5341           /* If we are in 16-bit mode, do not allow addr16 or data16.
5342              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
5343           if ((current_templates->start->opcode_modifier.size == SIZE16
5344                || current_templates->start->opcode_modifier.size == SIZE32)
5345               && flag_code != CODE_64BIT
5346               && ((current_templates->start->opcode_modifier.size == SIZE32)
5347                   ^ (flag_code == CODE_16BIT)))
5348             {
5349               as_bad (_("redundant %s prefix"),
5350                       current_templates->start->name);
5351               return NULL;
5352             }
5353
5354           if (current_templates->start->base_opcode == PSEUDO_PREFIX)
5355             {
5356               /* Handle pseudo prefixes.  */
5357               switch (current_templates->start->extension_opcode)
5358                 {
5359                 case Prefix_Disp8:
5360                   /* {disp8} */
5361                   i.disp_encoding = disp_encoding_8bit;
5362                   break;
5363                 case Prefix_Disp16:
5364                   /* {disp16} */
5365                   i.disp_encoding = disp_encoding_16bit;
5366                   break;
5367                 case Prefix_Disp32:
5368                   /* {disp32} */
5369                   i.disp_encoding = disp_encoding_32bit;
5370                   break;
5371                 case Prefix_Load:
5372                   /* {load} */
5373                   i.dir_encoding = dir_encoding_load;
5374                   break;
5375                 case Prefix_Store:
5376                   /* {store} */
5377                   i.dir_encoding = dir_encoding_store;
5378                   break;
5379                 case Prefix_VEX:
5380                   /* {vex} */
5381                   i.vec_encoding = vex_encoding_vex;
5382                   break;
5383                 case Prefix_VEX3:
5384                   /* {vex3} */
5385                   i.vec_encoding = vex_encoding_vex3;
5386                   break;
5387                 case Prefix_EVEX:
5388                   /* {evex} */
5389                   i.vec_encoding = vex_encoding_evex;
5390                   break;
5391                 case Prefix_REX:
5392                   /* {rex} */
5393                   i.rex_encoding = true;
5394                   break;
5395                 case Prefix_NoOptimize:
5396                   /* {nooptimize} */
5397                   i.no_optimize = true;
5398                   break;
5399                 default:
5400                   abort ();
5401                 }
5402             }
5403           else
5404             {
5405               /* Add prefix, checking for repeated prefixes.  */
5406               switch (add_prefix (current_templates->start->base_opcode))
5407                 {
5408                 case PREFIX_EXIST:
5409                   return NULL;
5410                 case PREFIX_DS:
5411                   if (current_templates->start->cpu_flags.bitfield.cpuibt)
5412                     i.notrack_prefix = current_templates->start->name;
5413                   break;
5414                 case PREFIX_REP:
5415                   if (current_templates->start->cpu_flags.bitfield.cpuhle)
5416                     i.hle_prefix = current_templates->start->name;
5417                   else if (current_templates->start->cpu_flags.bitfield.cpumpx)
5418                     i.bnd_prefix = current_templates->start->name;
5419                   else
5420                     i.rep_prefix = current_templates->start->name;
5421                   break;
5422                 default:
5423                   break;
5424                 }
5425             }
5426           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
5427           token_start = ++l;
5428         }
5429       else
5430         break;
5431     }
5432
5433   if (!current_templates)
5434     {
5435       /* Deprecated functionality (new code should use pseudo-prefixes instead):
5436          Check if we should swap operand or force 32bit displacement in
5437          encoding.  */
5438       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
5439         i.dir_encoding = dir_encoding_swap;
5440       else if (mnem_p - 3 == dot_p
5441                && dot_p[1] == 'd'
5442                && dot_p[2] == '8')
5443         i.disp_encoding = disp_encoding_8bit;
5444       else if (mnem_p - 4 == dot_p
5445                && dot_p[1] == 'd'
5446                && dot_p[2] == '3'
5447                && dot_p[3] == '2')
5448         i.disp_encoding = disp_encoding_32bit;
5449       else
5450         goto check_suffix;
5451       mnem_p = dot_p;
5452       *dot_p = '\0';
5453       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5454     }
5455
5456   if (!current_templates)
5457     {
5458     check_suffix:
5459       if (mnem_p > mnemonic)
5460         {
5461           /* See if we can get a match by trimming off a suffix.  */
5462           switch (mnem_p[-1])
5463             {
5464             case WORD_MNEM_SUFFIX:
5465               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
5466                 i.suffix = SHORT_MNEM_SUFFIX;
5467               else
5468                 /* Fall through.  */
5469               case BYTE_MNEM_SUFFIX:
5470               case QWORD_MNEM_SUFFIX:
5471                 i.suffix = mnem_p[-1];
5472               mnem_p[-1] = '\0';
5473               current_templates
5474                 = (const templates *) str_hash_find (op_hash, mnemonic);
5475               break;
5476             case SHORT_MNEM_SUFFIX:
5477             case LONG_MNEM_SUFFIX:
5478               if (!intel_syntax)
5479                 {
5480                   i.suffix = mnem_p[-1];
5481                   mnem_p[-1] = '\0';
5482                   current_templates
5483                     = (const templates *) str_hash_find (op_hash, mnemonic);
5484                 }
5485               break;
5486
5487               /* Intel Syntax.  */
5488             case 'd':
5489               if (intel_syntax)
5490                 {
5491                   if (intel_float_operand (mnemonic) == 1)
5492                     i.suffix = SHORT_MNEM_SUFFIX;
5493                   else
5494                     i.suffix = LONG_MNEM_SUFFIX;
5495                   mnem_p[-1] = '\0';
5496                   current_templates
5497                     = (const templates *) str_hash_find (op_hash, mnemonic);
5498                 }
5499               break;
5500             }
5501         }
5502
5503       if (!current_templates)
5504         {
5505           as_bad (_("no such instruction: `%s'"), token_start);
5506           return NULL;
5507         }
5508     }
5509
5510   if (current_templates->start->opcode_modifier.jump == JUMP
5511       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
5512     {
5513       /* Check for a branch hint.  We allow ",pt" and ",pn" for
5514          predict taken and predict not taken respectively.
5515          I'm not sure that branch hints actually do anything on loop
5516          and jcxz insns (JumpByte) for current Pentium4 chips.  They
5517          may work in the future and it doesn't hurt to accept them
5518          now.  */
5519       if (l[0] == ',' && l[1] == 'p')
5520         {
5521           if (l[2] == 't')
5522             {
5523               if (!add_prefix (DS_PREFIX_OPCODE))
5524                 return NULL;
5525               l += 3;
5526             }
5527           else if (l[2] == 'n')
5528             {
5529               if (!add_prefix (CS_PREFIX_OPCODE))
5530                 return NULL;
5531               l += 3;
5532             }
5533         }
5534     }
5535   /* Any other comma loses.  */
5536   if (*l == ',')
5537     {
5538       as_bad (_("invalid character %s in mnemonic"),
5539               output_invalid (*l));
5540       return NULL;
5541     }
5542
5543   /* Check if instruction is supported on specified architecture.  */
5544   supported = 0;
5545   for (t = current_templates->start; t < current_templates->end; ++t)
5546     {
5547       supported |= cpu_flags_match (t);
5548       if (supported == CPU_FLAGS_PERFECT_MATCH)
5549         return l;
5550     }
5551
5552   if (!(supported & CPU_FLAGS_64BIT_MATCH))
5553     as_bad (flag_code == CODE_64BIT
5554             ? _("`%s' is not supported in 64-bit mode")
5555             : _("`%s' is only supported in 64-bit mode"),
5556             current_templates->start->name);
5557   else
5558     as_bad (_("`%s' is not supported on `%s%s'"),
5559             current_templates->start->name,
5560             cpu_arch_name ? cpu_arch_name : default_arch,
5561             cpu_sub_arch_name ? cpu_sub_arch_name : "");
5562
5563   return NULL;
5564 }
5565
5566 static char *
5567 parse_operands (char *l, const char *mnemonic)
5568 {
5569   char *token_start;
5570
5571   /* 1 if operand is pending after ','.  */
5572   unsigned int expecting_operand = 0;
5573
5574   while (*l != END_OF_INSN)
5575     {
5576       /* Non-zero if operand parens not balanced.  */
5577       unsigned int paren_not_balanced = 0;
5578       /* True if inside double quotes.  */
5579       bool in_quotes = false;
5580
5581       /* Skip optional white space before operand.  */
5582       if (is_space_char (*l))
5583         ++l;
5584       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
5585         {
5586           as_bad (_("invalid character %s before operand %d"),
5587                   output_invalid (*l),
5588                   i.operands + 1);
5589           return NULL;
5590         }
5591       token_start = l;  /* After white space.  */
5592       while (in_quotes || paren_not_balanced || *l != ',')
5593         {
5594           if (*l == END_OF_INSN)
5595             {
5596               if (in_quotes)
5597                 {
5598                   as_bad (_("unbalanced double quotes in operand %d."),
5599                           i.operands + 1);
5600                   return NULL;
5601                 }
5602               if (paren_not_balanced)
5603                 {
5604                   know (!intel_syntax);
5605                   as_bad (_("unbalanced parenthesis in operand %d."),
5606                           i.operands + 1);
5607                   return NULL;
5608                 }
5609               else
5610                 break;  /* we are done */
5611             }
5612           else if (*l == '\\' && l[1] == '"')
5613             ++l;
5614           else if (*l == '"')
5615             in_quotes = !in_quotes;
5616           else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l))
5617             {
5618               as_bad (_("invalid character %s in operand %d"),
5619                       output_invalid (*l),
5620                       i.operands + 1);
5621               return NULL;
5622             }
5623           if (!intel_syntax && !in_quotes)
5624             {
5625               if (*l == '(')
5626                 ++paren_not_balanced;
5627               if (*l == ')')
5628                 --paren_not_balanced;
5629             }
5630           l++;
5631         }
5632       if (l != token_start)
5633         {                       /* Yes, we've read in another operand.  */
5634           unsigned int operand_ok;
5635           this_operand = i.operands++;
5636           if (i.operands > MAX_OPERANDS)
5637             {
5638               as_bad (_("spurious operands; (%d operands/instruction max)"),
5639                       MAX_OPERANDS);
5640               return NULL;
5641             }
5642           i.types[this_operand].bitfield.unspecified = 1;
5643           /* Now parse operand adding info to 'i' as we go along.  */
5644           END_STRING_AND_SAVE (l);
5645
5646           if (i.mem_operands > 1)
5647             {
5648               as_bad (_("too many memory references for `%s'"),
5649                       mnemonic);
5650               return 0;
5651             }
5652
5653           if (intel_syntax)
5654             operand_ok =
5655               i386_intel_operand (token_start,
5656                                   intel_float_operand (mnemonic));
5657           else
5658             operand_ok = i386_att_operand (token_start);
5659
5660           RESTORE_END_STRING (l);
5661           if (!operand_ok)
5662             return NULL;
5663         }
5664       else
5665         {
5666           if (expecting_operand)
5667             {
5668             expecting_operand_after_comma:
5669               as_bad (_("expecting operand after ','; got nothing"));
5670               return NULL;
5671             }
5672           if (*l == ',')
5673             {
5674               as_bad (_("expecting operand before ','; got nothing"));
5675               return NULL;
5676             }
5677         }
5678
5679       /* Now *l must be either ',' or END_OF_INSN.  */
5680       if (*l == ',')
5681         {
5682           if (*++l == END_OF_INSN)
5683             {
5684               /* Just skip it, if it's \n complain.  */
5685               goto expecting_operand_after_comma;
5686             }
5687           expecting_operand = 1;
5688         }
5689     }
5690   return l;
5691 }
5692
5693 static void
5694 swap_2_operands (unsigned int xchg1, unsigned int xchg2)
5695 {
5696   union i386_op temp_op;
5697   i386_operand_type temp_type;
5698   unsigned int temp_flags;
5699   enum bfd_reloc_code_real temp_reloc;
5700
5701   temp_type = i.types[xchg2];
5702   i.types[xchg2] = i.types[xchg1];
5703   i.types[xchg1] = temp_type;
5704
5705   temp_flags = i.flags[xchg2];
5706   i.flags[xchg2] = i.flags[xchg1];
5707   i.flags[xchg1] = temp_flags;
5708
5709   temp_op = i.op[xchg2];
5710   i.op[xchg2] = i.op[xchg1];
5711   i.op[xchg1] = temp_op;
5712
5713   temp_reloc = i.reloc[xchg2];
5714   i.reloc[xchg2] = i.reloc[xchg1];
5715   i.reloc[xchg1] = temp_reloc;
5716
5717   if (i.mask.reg)
5718     {
5719       if (i.mask.operand == xchg1)
5720         i.mask.operand = xchg2;
5721       else if (i.mask.operand == xchg2)
5722         i.mask.operand = xchg1;
5723     }
5724   if (i.broadcast.type || i.broadcast.bytes)
5725     {
5726       if (i.broadcast.operand == xchg1)
5727         i.broadcast.operand = xchg2;
5728       else if (i.broadcast.operand == xchg2)
5729         i.broadcast.operand = xchg1;
5730     }
5731 }
5732
5733 static void
5734 swap_operands (void)
5735 {
5736   switch (i.operands)
5737     {
5738     case 5:
5739     case 4:
5740       swap_2_operands (1, i.operands - 2);
5741       /* Fall through.  */
5742     case 3:
5743     case 2:
5744       swap_2_operands (0, i.operands - 1);
5745       break;
5746     default:
5747       abort ();
5748     }
5749
5750   if (i.mem_operands == 2)
5751     {
5752       const reg_entry *temp_seg;
5753       temp_seg = i.seg[0];
5754       i.seg[0] = i.seg[1];
5755       i.seg[1] = temp_seg;
5756     }
5757 }
5758
5759 /* Try to ensure constant immediates are represented in the smallest
5760    opcode possible.  */
5761 static void
5762 optimize_imm (void)
5763 {
5764   char guess_suffix = 0;
5765   int op;
5766
5767   if (i.suffix)
5768     guess_suffix = i.suffix;
5769   else if (i.reg_operands)
5770     {
5771       /* Figure out a suffix from the last register operand specified.
5772          We can't do this properly yet, i.e. excluding special register
5773          instances, but the following works for instructions with
5774          immediates.  In any case, we can't set i.suffix yet.  */
5775       for (op = i.operands; --op >= 0;)
5776         if (i.types[op].bitfield.class != Reg)
5777           continue;
5778         else if (i.types[op].bitfield.byte)
5779           {
5780             guess_suffix = BYTE_MNEM_SUFFIX;
5781             break;
5782           }
5783         else if (i.types[op].bitfield.word)
5784           {
5785             guess_suffix = WORD_MNEM_SUFFIX;
5786             break;
5787           }
5788         else if (i.types[op].bitfield.dword)
5789           {
5790             guess_suffix = LONG_MNEM_SUFFIX;
5791             break;
5792           }
5793         else if (i.types[op].bitfield.qword)
5794           {
5795             guess_suffix = QWORD_MNEM_SUFFIX;
5796             break;
5797           }
5798     }
5799   else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
5800     guess_suffix = WORD_MNEM_SUFFIX;
5801
5802   for (op = i.operands; --op >= 0;)
5803     if (operand_type_check (i.types[op], imm))
5804       {
5805         switch (i.op[op].imms->X_op)
5806           {
5807           case O_constant:
5808             /* If a suffix is given, this operand may be shortened.  */
5809             switch (guess_suffix)
5810               {
5811               case LONG_MNEM_SUFFIX:
5812                 i.types[op].bitfield.imm32 = 1;
5813                 i.types[op].bitfield.imm64 = 1;
5814                 break;
5815               case WORD_MNEM_SUFFIX:
5816                 i.types[op].bitfield.imm16 = 1;
5817                 i.types[op].bitfield.imm32 = 1;
5818                 i.types[op].bitfield.imm32s = 1;
5819                 i.types[op].bitfield.imm64 = 1;
5820                 break;
5821               case BYTE_MNEM_SUFFIX:
5822                 i.types[op].bitfield.imm8 = 1;
5823                 i.types[op].bitfield.imm8s = 1;
5824                 i.types[op].bitfield.imm16 = 1;
5825                 i.types[op].bitfield.imm32 = 1;
5826                 i.types[op].bitfield.imm32s = 1;
5827                 i.types[op].bitfield.imm64 = 1;
5828                 break;
5829               }
5830
5831             /* If this operand is at most 16 bits, convert it
5832                to a signed 16 bit number before trying to see
5833                whether it will fit in an even smaller size.
5834                This allows a 16-bit operand such as $0xffe0 to
5835                be recognised as within Imm8S range.  */
5836             if ((i.types[op].bitfield.imm16)
5837                 && fits_in_unsigned_word (i.op[op].imms->X_add_number))
5838               {
5839                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
5840                                                 ^ 0x8000) - 0x8000);
5841               }
5842 #ifdef BFD64
5843             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
5844             if ((i.types[op].bitfield.imm32)
5845                 && fits_in_unsigned_long (i.op[op].imms->X_add_number))
5846               {
5847                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
5848                                                 ^ ((offsetT) 1 << 31))
5849                                                - ((offsetT) 1 << 31));
5850               }
5851 #endif
5852             i.types[op]
5853               = operand_type_or (i.types[op],
5854                                  smallest_imm_type (i.op[op].imms->X_add_number));
5855
5856             /* We must avoid matching of Imm32 templates when 64bit
5857                only immediate is available.  */
5858             if (guess_suffix == QWORD_MNEM_SUFFIX)
5859               i.types[op].bitfield.imm32 = 0;
5860             break;
5861
5862           case O_absent:
5863           case O_register:
5864             abort ();
5865
5866             /* Symbols and expressions.  */
5867           default:
5868             /* Convert symbolic operand to proper sizes for matching, but don't
5869                prevent matching a set of insns that only supports sizes other
5870                than those matching the insn suffix.  */
5871             {
5872               i386_operand_type mask, allowed;
5873               const insn_template *t = current_templates->start;
5874
5875               operand_type_set (&mask, 0);
5876               switch (guess_suffix)
5877                 {
5878                 case QWORD_MNEM_SUFFIX:
5879                   mask.bitfield.imm64 = 1;
5880                   mask.bitfield.imm32s = 1;
5881                   break;
5882                 case LONG_MNEM_SUFFIX:
5883                   mask.bitfield.imm32 = 1;
5884                   break;
5885                 case WORD_MNEM_SUFFIX:
5886                   mask.bitfield.imm16 = 1;
5887                   break;
5888                 case BYTE_MNEM_SUFFIX:
5889                   mask.bitfield.imm8 = 1;
5890                   break;
5891                 default:
5892                   break;
5893                 }
5894
5895               allowed = operand_type_and (t->operand_types[op], mask);
5896               while (++t < current_templates->end)
5897                 {
5898                   allowed = operand_type_or (allowed, t->operand_types[op]);
5899                   allowed = operand_type_and (allowed, mask);
5900                 }
5901
5902               if (!operand_type_all_zero (&allowed))
5903                 i.types[op] = operand_type_and (i.types[op], mask);
5904             }
5905             break;
5906           }
5907       }
5908 }
5909
5910 /* Try to use the smallest displacement type too.  */
5911 static void
5912 optimize_disp (void)
5913 {
5914   int op;
5915
5916   for (op = i.operands; --op >= 0;)
5917     if (operand_type_check (i.types[op], disp))
5918       {
5919         if (i.op[op].disps->X_op == O_constant)
5920           {
5921             offsetT op_disp = i.op[op].disps->X_add_number;
5922
5923             if (!op_disp && i.types[op].bitfield.baseindex)
5924               {
5925                 i.types[op] = operand_type_and_not (i.types[op], anydisp);
5926                 i.op[op].disps = NULL;
5927                 i.disp_operands--;
5928                 continue;
5929               }
5930
5931             if (i.types[op].bitfield.disp16
5932                 && fits_in_unsigned_word (op_disp))
5933               {
5934                 /* If this operand is at most 16 bits, convert
5935                    to a signed 16 bit number and don't use 64bit
5936                    displacement.  */
5937                 op_disp = ((op_disp ^ 0x8000) - 0x8000);
5938                 i.types[op].bitfield.disp64 = 0;
5939               }
5940
5941 #ifdef BFD64
5942             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
5943             if ((flag_code != CODE_64BIT
5944                  ? i.types[op].bitfield.disp32
5945                  : want_disp32 (current_templates->start)
5946                    && (!current_templates->start->opcode_modifier.jump
5947                        || i.jumpabsolute || i.types[op].bitfield.baseindex))
5948                 && fits_in_unsigned_long (op_disp))
5949               {
5950                 /* If this operand is at most 32 bits, convert
5951                    to a signed 32 bit number and don't use 64bit
5952                    displacement.  */
5953                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
5954                 i.types[op].bitfield.disp64 = 0;
5955                 i.types[op].bitfield.disp32 = 1;
5956               }
5957
5958             if (flag_code == CODE_64BIT && fits_in_signed_long (op_disp))
5959               {
5960                 i.types[op].bitfield.disp64 = 0;
5961                 i.types[op].bitfield.disp32 = 1;
5962               }
5963 #endif
5964             if ((i.types[op].bitfield.disp32
5965                  || i.types[op].bitfield.disp16)
5966                 && fits_in_disp8 (op_disp))
5967               i.types[op].bitfield.disp8 = 1;
5968
5969             i.op[op].disps->X_add_number = op_disp;
5970           }
5971         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
5972                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
5973           {
5974             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
5975                          i.op[op].disps, 0, i.reloc[op]);
5976             i.types[op] = operand_type_and_not (i.types[op], anydisp);
5977           }
5978         else
5979           /* We only support 64bit displacement on constants.  */
5980           i.types[op].bitfield.disp64 = 0;
5981       }
5982 }
5983
5984 /* Return 1 if there is a match in broadcast bytes between operand
5985    GIVEN and instruction template T.   */
5986
5987 static INLINE int
5988 match_broadcast_size (const insn_template *t, unsigned int given)
5989 {
5990   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
5991            && i.types[given].bitfield.byte)
5992           || (t->opcode_modifier.broadcast == WORD_BROADCAST
5993               && i.types[given].bitfield.word)
5994           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
5995               && i.types[given].bitfield.dword)
5996           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
5997               && i.types[given].bitfield.qword));
5998 }
5999
6000 /* Check if operands are valid for the instruction.  */
6001
6002 static int
6003 check_VecOperands (const insn_template *t)
6004 {
6005   unsigned int op;
6006   i386_cpu_flags cpu;
6007
6008   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
6009      any one operand are implicity requiring AVX512VL support if the actual
6010      operand size is YMMword or XMMword.  Since this function runs after
6011      template matching, there's no need to check for YMMword/XMMword in
6012      the template.  */
6013   cpu = cpu_flags_and (t->cpu_flags, avx512);
6014   if (!cpu_flags_all_zero (&cpu)
6015       && !t->cpu_flags.bitfield.cpuavx512vl
6016       && !cpu_arch_flags.bitfield.cpuavx512vl)
6017     {
6018       for (op = 0; op < t->operands; ++op)
6019         {
6020           if (t->operand_types[op].bitfield.zmmword
6021               && (i.types[op].bitfield.ymmword
6022                   || i.types[op].bitfield.xmmword))
6023             {
6024               i.error = unsupported;
6025               return 1;
6026             }
6027         }
6028     }
6029
6030   /* Somewhat similarly, templates specifying both AVX and AVX2 are
6031      requiring AVX2 support if the actual operand size is YMMword.  */
6032   if (t->cpu_flags.bitfield.cpuavx
6033       && t->cpu_flags.bitfield.cpuavx2
6034       && !cpu_arch_flags.bitfield.cpuavx2)
6035     {
6036       for (op = 0; op < t->operands; ++op)
6037         {
6038           if (t->operand_types[op].bitfield.xmmword
6039               && i.types[op].bitfield.ymmword)
6040             {
6041               i.error = unsupported;
6042               return 1;
6043             }
6044         }
6045     }
6046
6047   /* Without VSIB byte, we can't have a vector register for index.  */
6048   if (!t->opcode_modifier.sib
6049       && i.index_reg
6050       && (i.index_reg->reg_type.bitfield.xmmword
6051           || i.index_reg->reg_type.bitfield.ymmword
6052           || i.index_reg->reg_type.bitfield.zmmword))
6053     {
6054       i.error = unsupported_vector_index_register;
6055       return 1;
6056     }
6057
6058   /* Check if default mask is allowed.  */
6059   if (t->opcode_modifier.operandconstraint == NO_DEFAULT_MASK
6060       && (!i.mask.reg || i.mask.reg->reg_num == 0))
6061     {
6062       i.error = no_default_mask;
6063       return 1;
6064     }
6065
6066   /* For VSIB byte, we need a vector register for index, and all vector
6067      registers must be distinct.  */
6068   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
6069     {
6070       if (!i.index_reg
6071           || !((t->opcode_modifier.sib == VECSIB128
6072                 && i.index_reg->reg_type.bitfield.xmmword)
6073                || (t->opcode_modifier.sib == VECSIB256
6074                    && i.index_reg->reg_type.bitfield.ymmword)
6075                || (t->opcode_modifier.sib == VECSIB512
6076                    && i.index_reg->reg_type.bitfield.zmmword)))
6077       {
6078         i.error = invalid_vsib_address;
6079         return 1;
6080       }
6081
6082       gas_assert (i.reg_operands == 2 || i.mask.reg);
6083       if (i.reg_operands == 2 && !i.mask.reg)
6084         {
6085           gas_assert (i.types[0].bitfield.class == RegSIMD);
6086           gas_assert (i.types[0].bitfield.xmmword
6087                       || i.types[0].bitfield.ymmword);
6088           gas_assert (i.types[2].bitfield.class == RegSIMD);
6089           gas_assert (i.types[2].bitfield.xmmword
6090                       || i.types[2].bitfield.ymmword);
6091           if (operand_check == check_none)
6092             return 0;
6093           if (register_number (i.op[0].regs)
6094               != register_number (i.index_reg)
6095               && register_number (i.op[2].regs)
6096                  != register_number (i.index_reg)
6097               && register_number (i.op[0].regs)
6098                  != register_number (i.op[2].regs))
6099             return 0;
6100           if (operand_check == check_error)
6101             {
6102               i.error = invalid_vector_register_set;
6103               return 1;
6104             }
6105           as_warn (_("mask, index, and destination registers should be distinct"));
6106         }
6107       else if (i.reg_operands == 1 && i.mask.reg)
6108         {
6109           if (i.types[1].bitfield.class == RegSIMD
6110               && (i.types[1].bitfield.xmmword
6111                   || i.types[1].bitfield.ymmword
6112                   || i.types[1].bitfield.zmmword)
6113               && (register_number (i.op[1].regs)
6114                   == register_number (i.index_reg)))
6115             {
6116               if (operand_check == check_error)
6117                 {
6118                   i.error = invalid_vector_register_set;
6119                   return 1;
6120                 }
6121               if (operand_check != check_none)
6122                 as_warn (_("index and destination registers should be distinct"));
6123             }
6124         }
6125     }
6126
6127   /* For AMX instructions with 3 TMM register operands, all operands
6128       must be distinct.  */
6129   if (i.reg_operands == 3
6130       && t->operand_types[0].bitfield.tmmword
6131       && (i.op[0].regs == i.op[1].regs
6132           || i.op[0].regs == i.op[2].regs
6133           || i.op[1].regs == i.op[2].regs))
6134     {
6135       i.error = invalid_tmm_register_set;
6136       return 1;
6137     }
6138
6139   /* For some special instructions require that destination must be distinct
6140      from source registers.  */
6141   if (t->opcode_modifier.operandconstraint == DISTINCT_DEST)
6142     {
6143       unsigned int dest_reg = i.operands - 1;
6144
6145       know (i.operands >= 3);
6146
6147       /* #UD if dest_reg == src1_reg or dest_reg == src2_reg.  */
6148       if (i.op[dest_reg - 1].regs == i.op[dest_reg].regs
6149           || (i.reg_operands > 2
6150               && i.op[dest_reg - 2].regs == i.op[dest_reg].regs))
6151         {
6152           i.error = invalid_dest_and_src_register_set;
6153           return 1;
6154         }
6155     }
6156
6157   /* Check if broadcast is supported by the instruction and is applied
6158      to the memory operand.  */
6159   if (i.broadcast.type || i.broadcast.bytes)
6160     {
6161       i386_operand_type type, overlap;
6162
6163       /* Check if specified broadcast is supported in this instruction,
6164          and its broadcast bytes match the memory operand.  */
6165       op = i.broadcast.operand;
6166       if (!t->opcode_modifier.broadcast
6167           || !(i.flags[op] & Operand_Mem)
6168           || (!i.types[op].bitfield.unspecified
6169               && !match_broadcast_size (t, op)))
6170         {
6171         bad_broadcast:
6172           i.error = unsupported_broadcast;
6173           return 1;
6174         }
6175
6176       if (i.broadcast.type)
6177         i.broadcast.bytes = ((1 << (t->opcode_modifier.broadcast - 1))
6178                              * i.broadcast.type);
6179       operand_type_set (&type, 0);
6180       switch (get_broadcast_bytes (t, false))
6181         {
6182         case 2:
6183           type.bitfield.word = 1;
6184           break;
6185         case 4:
6186           type.bitfield.dword = 1;
6187           break;
6188         case 8:
6189           type.bitfield.qword = 1;
6190           break;
6191         case 16:
6192           type.bitfield.xmmword = 1;
6193           break;
6194         case 32:
6195           type.bitfield.ymmword = 1;
6196           break;
6197         case 64:
6198           type.bitfield.zmmword = 1;
6199           break;
6200         default:
6201           goto bad_broadcast;
6202         }
6203
6204       overlap = operand_type_and (type, t->operand_types[op]);
6205       if (t->operand_types[op].bitfield.class == RegSIMD
6206           && t->operand_types[op].bitfield.byte
6207              + t->operand_types[op].bitfield.word
6208              + t->operand_types[op].bitfield.dword
6209              + t->operand_types[op].bitfield.qword > 1)
6210         {
6211           overlap.bitfield.xmmword = 0;
6212           overlap.bitfield.ymmword = 0;
6213           overlap.bitfield.zmmword = 0;
6214         }
6215       if (operand_type_all_zero (&overlap))
6216           goto bad_broadcast;
6217
6218       if (t->opcode_modifier.checkregsize)
6219         {
6220           unsigned int j;
6221
6222           type.bitfield.baseindex = 1;
6223           for (j = 0; j < i.operands; ++j)
6224             {
6225               if (j != op
6226                   && !operand_type_register_match(i.types[j],
6227                                                   t->operand_types[j],
6228                                                   type,
6229                                                   t->operand_types[op]))
6230                 goto bad_broadcast;
6231             }
6232         }
6233     }
6234   /* If broadcast is supported in this instruction, we need to check if
6235      operand of one-element size isn't specified without broadcast.  */
6236   else if (t->opcode_modifier.broadcast && i.mem_operands)
6237     {
6238       /* Find memory operand.  */
6239       for (op = 0; op < i.operands; op++)
6240         if (i.flags[op] & Operand_Mem)
6241           break;
6242       gas_assert (op < i.operands);
6243       /* Check size of the memory operand.  */
6244       if (match_broadcast_size (t, op))
6245         {
6246           i.error = broadcast_needed;
6247           return 1;
6248         }
6249     }
6250   else
6251     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
6252
6253   /* Check if requested masking is supported.  */
6254   if (i.mask.reg)
6255     {
6256       switch (t->opcode_modifier.masking)
6257         {
6258         case BOTH_MASKING:
6259           break;
6260         case MERGING_MASKING:
6261           if (i.mask.zeroing)
6262             {
6263         case 0:
6264               i.error = unsupported_masking;
6265               return 1;
6266             }
6267           break;
6268         case DYNAMIC_MASKING:
6269           /* Memory destinations allow only merging masking.  */
6270           if (i.mask.zeroing && i.mem_operands)
6271             {
6272               /* Find memory operand.  */
6273               for (op = 0; op < i.operands; op++)
6274                 if (i.flags[op] & Operand_Mem)
6275                   break;
6276               gas_assert (op < i.operands);
6277               if (op == i.operands - 1)
6278                 {
6279                   i.error = unsupported_masking;
6280                   return 1;
6281                 }
6282             }
6283           break;
6284         default:
6285           abort ();
6286         }
6287     }
6288
6289   /* Check if masking is applied to dest operand.  */
6290   if (i.mask.reg && (i.mask.operand != i.operands - 1))
6291     {
6292       i.error = mask_not_on_destination;
6293       return 1;
6294     }
6295
6296   /* Check RC/SAE.  */
6297   if (i.rounding.type != rc_none)
6298     {
6299       if (!t->opcode_modifier.sae
6300           || ((i.rounding.type != saeonly) != t->opcode_modifier.staticrounding)
6301           || i.mem_operands)
6302         {
6303           i.error = unsupported_rc_sae;
6304           return 1;
6305         }
6306
6307       /* Non-EVEX.LIG forms need to have a ZMM register as at least one
6308          operand.  */
6309       if (t->opcode_modifier.evex != EVEXLIG)
6310         {
6311           for (op = 0; op < t->operands; ++op)
6312             if (i.types[op].bitfield.zmmword)
6313               break;
6314           if (op >= t->operands)
6315             {
6316               i.error = operand_size_mismatch;
6317               return 1;
6318             }
6319         }
6320     }
6321
6322   /* Check the special Imm4 cases; must be the first operand.  */
6323   if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
6324     {
6325       if (i.op[0].imms->X_op != O_constant
6326           || !fits_in_imm4 (i.op[0].imms->X_add_number))
6327         {
6328           i.error = bad_imm4;
6329           return 1;
6330         }
6331
6332       /* Turn off Imm<N> so that update_imm won't complain.  */
6333       operand_type_set (&i.types[0], 0);
6334     }
6335
6336   /* Check vector Disp8 operand.  */
6337   if (t->opcode_modifier.disp8memshift
6338       && i.disp_encoding <= disp_encoding_8bit)
6339     {
6340       if (i.broadcast.bytes)
6341         i.memshift = t->opcode_modifier.broadcast - 1;
6342       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
6343         i.memshift = t->opcode_modifier.disp8memshift;
6344       else
6345         {
6346           const i386_operand_type *type = NULL, *fallback = NULL;
6347
6348           i.memshift = 0;
6349           for (op = 0; op < i.operands; op++)
6350             if (i.flags[op] & Operand_Mem)
6351               {
6352                 if (t->opcode_modifier.evex == EVEXLIG)
6353                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
6354                 else if (t->operand_types[op].bitfield.xmmword
6355                          + t->operand_types[op].bitfield.ymmword
6356                          + t->operand_types[op].bitfield.zmmword <= 1)
6357                   type = &t->operand_types[op];
6358                 else if (!i.types[op].bitfield.unspecified)
6359                   type = &i.types[op];
6360                 else /* Ambiguities get resolved elsewhere.  */
6361                   fallback = &t->operand_types[op];
6362               }
6363             else if (i.types[op].bitfield.class == RegSIMD
6364                      && t->opcode_modifier.evex != EVEXLIG)
6365               {
6366                 if (i.types[op].bitfield.zmmword)
6367                   i.memshift = 6;
6368                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
6369                   i.memshift = 5;
6370                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
6371                   i.memshift = 4;
6372               }
6373
6374           if (!type && !i.memshift)
6375             type = fallback;
6376           if (type)
6377             {
6378               if (type->bitfield.zmmword)
6379                 i.memshift = 6;
6380               else if (type->bitfield.ymmword)
6381                 i.memshift = 5;
6382               else if (type->bitfield.xmmword)
6383                 i.memshift = 4;
6384             }
6385
6386           /* For the check in fits_in_disp8().  */
6387           if (i.memshift == 0)
6388             i.memshift = -1;
6389         }
6390
6391       for (op = 0; op < i.operands; op++)
6392         if (operand_type_check (i.types[op], disp)
6393             && i.op[op].disps->X_op == O_constant)
6394           {
6395             if (fits_in_disp8 (i.op[op].disps->X_add_number))
6396               {
6397                 i.types[op].bitfield.disp8 = 1;
6398                 return 0;
6399               }
6400             i.types[op].bitfield.disp8 = 0;
6401           }
6402     }
6403
6404   i.memshift = 0;
6405
6406   return 0;
6407 }
6408
6409 /* Check if encoding requirements are met by the instruction.  */
6410
6411 static int
6412 VEX_check_encoding (const insn_template *t)
6413 {
6414   if (i.vec_encoding == vex_encoding_error)
6415     {
6416       i.error = unsupported;
6417       return 1;
6418     }
6419
6420   if (i.vec_encoding == vex_encoding_evex)
6421     {
6422       /* This instruction must be encoded with EVEX prefix.  */
6423       if (!is_evex_encoding (t))
6424         {
6425           i.error = unsupported;
6426           return 1;
6427         }
6428       return 0;
6429     }
6430
6431   if (!t->opcode_modifier.vex)
6432     {
6433       /* This instruction template doesn't have VEX prefix.  */
6434       if (i.vec_encoding != vex_encoding_default)
6435         {
6436           i.error = unsupported;
6437           return 1;
6438         }
6439       return 0;
6440     }
6441
6442   return 0;
6443 }
6444
6445 /* Helper function for the progress() macro in match_template().  */
6446 static INLINE enum i386_error progress (enum i386_error new,
6447                                         enum i386_error last,
6448                                         unsigned int line, unsigned int *line_p)
6449 {
6450   if (line <= *line_p)
6451     return last;
6452   *line_p = line;
6453   return new;
6454 }
6455
6456 static const insn_template *
6457 match_template (char mnem_suffix)
6458 {
6459   /* Points to template once we've found it.  */
6460   const insn_template *t;
6461   i386_operand_type overlap0, overlap1, overlap2, overlap3;
6462   i386_operand_type overlap4;
6463   unsigned int found_reverse_match;
6464   i386_operand_type operand_types [MAX_OPERANDS];
6465   int addr_prefix_disp;
6466   unsigned int j, size_match, check_register, errline = __LINE__;
6467   enum i386_error specific_error = number_of_operands_mismatch;
6468 #define progress(err) progress (err, specific_error, __LINE__, &errline)
6469
6470 #if MAX_OPERANDS != 5
6471 # error "MAX_OPERANDS must be 5."
6472 #endif
6473
6474   found_reverse_match = 0;
6475   addr_prefix_disp = -1;
6476
6477   for (t = current_templates->start; t < current_templates->end; t++)
6478     {
6479       addr_prefix_disp = -1;
6480       found_reverse_match = 0;
6481
6482       /* Must have right number of operands.  */
6483       if (i.operands != t->operands)
6484         continue;
6485
6486       /* Check processor support.  */
6487       specific_error = progress (unsupported);
6488       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
6489         continue;
6490
6491       /* Check AT&T mnemonic.   */
6492       specific_error = progress (unsupported_with_intel_mnemonic);
6493       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
6494         continue;
6495
6496       /* Check AT&T/Intel syntax.  */
6497       specific_error = progress (unsupported_syntax);
6498       if ((intel_syntax && t->opcode_modifier.attsyntax)
6499           || (!intel_syntax && t->opcode_modifier.intelsyntax))
6500         continue;
6501
6502       /* Check Intel64/AMD64 ISA.   */
6503       switch (isa64)
6504         {
6505         default:
6506           /* Default: Don't accept Intel64.  */
6507           if (t->opcode_modifier.isa64 == INTEL64)
6508             continue;
6509           break;
6510         case amd64:
6511           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
6512           if (t->opcode_modifier.isa64 >= INTEL64)
6513             continue;
6514           break;
6515         case intel64:
6516           /* -mintel64: Don't accept AMD64.  */
6517           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
6518             continue;
6519           break;
6520         }
6521
6522       /* Check the suffix.  */
6523       specific_error = progress (invalid_instruction_suffix);
6524       if ((t->opcode_modifier.no_bsuf && mnem_suffix == BYTE_MNEM_SUFFIX)
6525           || (t->opcode_modifier.no_wsuf && mnem_suffix == WORD_MNEM_SUFFIX)
6526           || (t->opcode_modifier.no_lsuf && mnem_suffix == LONG_MNEM_SUFFIX)
6527           || (t->opcode_modifier.no_ssuf && mnem_suffix == SHORT_MNEM_SUFFIX)
6528           || (t->opcode_modifier.no_qsuf && mnem_suffix == QWORD_MNEM_SUFFIX))
6529         continue;
6530
6531       specific_error = progress (operand_size_mismatch);
6532       size_match = operand_size_match (t);
6533       if (!size_match)
6534         continue;
6535
6536       /* This is intentionally not
6537
6538          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
6539
6540          as the case of a missing * on the operand is accepted (perhaps with
6541          a warning, issued further down).  */
6542       specific_error = progress (operand_type_mismatch);
6543       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
6544         continue;
6545
6546       /* In Intel syntax, normally we can check for memory operand size when
6547          there is no mnemonic suffix.  But jmp and call have 2 different
6548          encodings with Dword memory operand size.  Skip the "near" one
6549          (permitting a register operand) when "far" was requested.  */
6550       if (i.far_branch
6551           && t->opcode_modifier.jump == JUMP_ABSOLUTE
6552           && t->operand_types[0].bitfield.class == Reg)
6553         continue;
6554
6555       for (j = 0; j < MAX_OPERANDS; j++)
6556         operand_types[j] = t->operand_types[j];
6557
6558       /* In general, don't allow
6559          - 64-bit operands outside of 64-bit mode,
6560          - 32-bit operands on pre-386.  */
6561       specific_error = progress (mnem_suffix ? invalid_instruction_suffix
6562                                              : operand_size_mismatch);
6563       j = i.imm_operands + (t->operands > i.imm_operands + 1);
6564       if (((i.suffix == QWORD_MNEM_SUFFIX
6565             && flag_code != CODE_64BIT
6566             && !(t->opcode_modifier.opcodespace == SPACE_0F
6567                  && t->base_opcode == 0xc7
6568                  && t->opcode_modifier.opcodeprefix == PREFIX_NONE
6569                  && t->extension_opcode == 1) /* cmpxchg8b */)
6570            || (i.suffix == LONG_MNEM_SUFFIX
6571                && !cpu_arch_flags.bitfield.cpui386))
6572           && (intel_syntax
6573               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
6574                  && !intel_float_operand (t->name))
6575               : intel_float_operand (t->name) != 2)
6576           && (t->operands == i.imm_operands
6577               || (operand_types[i.imm_operands].bitfield.class != RegMMX
6578                && operand_types[i.imm_operands].bitfield.class != RegSIMD
6579                && operand_types[i.imm_operands].bitfield.class != RegMask)
6580               || (operand_types[j].bitfield.class != RegMMX
6581                   && operand_types[j].bitfield.class != RegSIMD
6582                   && operand_types[j].bitfield.class != RegMask))
6583           && !t->opcode_modifier.sib)
6584         continue;
6585
6586       /* Do not verify operands when there are none.  */
6587       if (!t->operands)
6588         {
6589           if (VEX_check_encoding (t))
6590             {
6591               specific_error = progress (i.error);
6592               continue;
6593             }
6594
6595           /* We've found a match; break out of loop.  */
6596           break;
6597         }
6598
6599       if (!t->opcode_modifier.jump
6600           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
6601         {
6602           /* There should be only one Disp operand.  */
6603           for (j = 0; j < MAX_OPERANDS; j++)
6604             if (operand_type_check (operand_types[j], disp))
6605               break;
6606           if (j < MAX_OPERANDS)
6607             {
6608               bool override = (i.prefix[ADDR_PREFIX] != 0);
6609
6610               addr_prefix_disp = j;
6611
6612               /* Address size prefix will turn Disp64 operand into Disp32 and
6613                  Disp32/Disp16 one into Disp16/Disp32 respectively.  */
6614               switch (flag_code)
6615                 {
6616                 case CODE_16BIT:
6617                   override = !override;
6618                   /* Fall through.  */
6619                 case CODE_32BIT:
6620                   if (operand_types[j].bitfield.disp32
6621                       && operand_types[j].bitfield.disp16)
6622                     {
6623                       operand_types[j].bitfield.disp16 = override;
6624                       operand_types[j].bitfield.disp32 = !override;
6625                     }
6626                   gas_assert (!operand_types[j].bitfield.disp64);
6627                   break;
6628
6629                 case CODE_64BIT:
6630                   if (operand_types[j].bitfield.disp64)
6631                     {
6632                       gas_assert (!operand_types[j].bitfield.disp32);
6633                       operand_types[j].bitfield.disp32 = override;
6634                       operand_types[j].bitfield.disp64 = !override;
6635                     }
6636                   operand_types[j].bitfield.disp16 = 0;
6637                   break;
6638                 }
6639             }
6640         }
6641
6642       switch (i.reloc[0])
6643         {
6644         case BFD_RELOC_386_GOT32:
6645           /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
6646           if (t->base_opcode == 0xa0
6647               && t->opcode_modifier.opcodespace == SPACE_BASE)
6648             continue;
6649           break;
6650         case BFD_RELOC_386_TLS_GOTIE:
6651         case BFD_RELOC_386_TLS_LE_32:
6652         case BFD_RELOC_X86_64_GOTTPOFF:
6653         case BFD_RELOC_X86_64_TLSLD:
6654           /* Don't allow KMOV in TLS code sequences.  */
6655           if (t->opcode_modifier.vex)
6656             continue;
6657           break;
6658         default:
6659           break;
6660         }
6661
6662       /* We check register size if needed.  */
6663       if (t->opcode_modifier.checkregsize)
6664         {
6665           check_register = (1 << t->operands) - 1;
6666           if (i.broadcast.type || i.broadcast.bytes)
6667             check_register &= ~(1 << i.broadcast.operand);
6668         }
6669       else
6670         check_register = 0;
6671
6672       overlap0 = operand_type_and (i.types[0], operand_types[0]);
6673       switch (t->operands)
6674         {
6675         case 1:
6676           if (!operand_type_match (overlap0, i.types[0]))
6677             continue;
6678           break;
6679         case 2:
6680           /* xchg %eax, %eax is a special case. It is an alias for nop
6681              only in 32bit mode and we can use opcode 0x90.  In 64bit
6682              mode, we can't use 0x90 for xchg %eax, %eax since it should
6683              zero-extend %eax to %rax.  */
6684           if (flag_code == CODE_64BIT
6685               && t->base_opcode == 0x90
6686               && t->opcode_modifier.opcodespace == SPACE_BASE
6687               && i.types[0].bitfield.instance == Accum
6688               && i.types[0].bitfield.dword
6689               && i.types[1].bitfield.instance == Accum
6690               && i.types[1].bitfield.dword)
6691             continue;
6692           /* xrelease mov %eax, <disp> is another special case. It must not
6693              match the accumulator-only encoding of mov.  */
6694           if (flag_code != CODE_64BIT
6695               && i.hle_prefix
6696               && t->base_opcode == 0xa0
6697               && t->opcode_modifier.opcodespace == SPACE_BASE
6698               && i.types[0].bitfield.instance == Accum
6699               && (i.flags[1] & Operand_Mem))
6700             continue;
6701           /* Fall through.  */
6702
6703         case 3:
6704           if (!(size_match & MATCH_STRAIGHT))
6705             goto check_reverse;
6706           /* Reverse direction of operands if swapping is possible in the first
6707              place (operands need to be symmetric) and
6708              - the load form is requested, and the template is a store form,
6709              - the store form is requested, and the template is a load form,
6710              - the non-default (swapped) form is requested.  */
6711           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
6712           if (t->opcode_modifier.d && i.reg_operands == i.operands
6713               && !operand_type_all_zero (&overlap1))
6714             switch (i.dir_encoding)
6715               {
6716               case dir_encoding_load:
6717                 if (operand_type_check (operand_types[i.operands - 1], anymem)
6718                     || t->opcode_modifier.regmem)
6719                   goto check_reverse;
6720                 break;
6721
6722               case dir_encoding_store:
6723                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
6724                     && !t->opcode_modifier.regmem)
6725                   goto check_reverse;
6726                 break;
6727
6728               case dir_encoding_swap:
6729                 goto check_reverse;
6730
6731               case dir_encoding_default:
6732                 break;
6733               }
6734           /* If we want store form, we skip the current load.  */
6735           if ((i.dir_encoding == dir_encoding_store
6736                || i.dir_encoding == dir_encoding_swap)
6737               && i.mem_operands == 0
6738               && t->opcode_modifier.load)
6739             continue;
6740           /* Fall through.  */
6741         case 4:
6742         case 5:
6743           overlap1 = operand_type_and (i.types[1], operand_types[1]);
6744           if (!operand_type_match (overlap0, i.types[0])
6745               || !operand_type_match (overlap1, i.types[1])
6746               || ((check_register & 3) == 3
6747                   && !operand_type_register_match (i.types[0],
6748                                                    operand_types[0],
6749                                                    i.types[1],
6750                                                    operand_types[1])))
6751             {
6752               specific_error = progress (i.error);
6753
6754               /* Check if other direction is valid ...  */
6755               if (!t->opcode_modifier.d)
6756                 continue;
6757
6758             check_reverse:
6759               if (!(size_match & MATCH_REVERSE))
6760                 continue;
6761               /* Try reversing direction of operands.  */
6762               j = t->opcode_modifier.vexsources ? 1 : i.operands - 1;
6763               overlap0 = operand_type_and (i.types[0], operand_types[j]);
6764               overlap1 = operand_type_and (i.types[j], operand_types[0]);
6765               overlap2 = operand_type_and (i.types[1], operand_types[1]);
6766               gas_assert (t->operands != 3 || !check_register);
6767               if (!operand_type_match (overlap0, i.types[0])
6768                   || !operand_type_match (overlap1, i.types[j])
6769                   || (t->operands == 3
6770                       && !operand_type_match (overlap2, i.types[1]))
6771                   || (check_register
6772                       && !operand_type_register_match (i.types[0],
6773                                                        operand_types[j],
6774                                                        i.types[j],
6775                                                        operand_types[0])))
6776                 {
6777                   /* Does not match either direction.  */
6778                   specific_error = progress (i.error);
6779                   continue;
6780                 }
6781               /* found_reverse_match holds which variant of D
6782                  we've found.  */
6783               if (!t->opcode_modifier.d)
6784                 found_reverse_match = 0;
6785               else if (operand_types[0].bitfield.tbyte)
6786                 {
6787                   if (t->opcode_modifier.operandconstraint != UGH)
6788                     found_reverse_match = Opcode_FloatD;
6789                   /* FSUB{,R} and FDIV{,R} may need a 2nd bit flipped.  */
6790                   if ((t->base_opcode & 0x20)
6791                       && (intel_syntax || intel_mnemonic))
6792                     found_reverse_match |= Opcode_FloatR;
6793                 }
6794               else if (t->opcode_modifier.vexsources)
6795                 {
6796                   found_reverse_match = Opcode_VexW;
6797                   goto check_operands_345;
6798                 }
6799               else if (t->opcode_modifier.opcodespace != SPACE_BASE
6800                        && (t->opcode_modifier.opcodespace != SPACE_0F
6801                            /* MOV to/from CR/DR/TR, as an exception, follow
6802                               the base opcode space encoding model.  */
6803                            || (t->base_opcode | 7) != 0x27))
6804                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
6805                                       ? Opcode_ExtD : Opcode_SIMD_IntD;
6806               else if (!t->opcode_modifier.commutative)
6807                 found_reverse_match = Opcode_D;
6808               else
6809                 found_reverse_match = ~0;
6810             }
6811           else
6812             {
6813               /* Found a forward 2 operand match here.  */
6814             check_operands_345:
6815               switch (t->operands)
6816                 {
6817                 case 5:
6818                   overlap4 = operand_type_and (i.types[4], operand_types[4]);
6819                   if (!operand_type_match (overlap4, i.types[4])
6820                       || !operand_type_register_match (i.types[3],
6821                                                        operand_types[3],
6822                                                        i.types[4],
6823                                                        operand_types[4]))
6824                     {
6825                       specific_error = progress (i.error);
6826                       continue;
6827                     }
6828                   /* Fall through.  */
6829                 case 4:
6830                   overlap3 = operand_type_and (i.types[3], operand_types[3]);
6831                   if (!operand_type_match (overlap3, i.types[3])
6832                       || ((check_register & 0xa) == 0xa
6833                           && !operand_type_register_match (i.types[1],
6834                                                             operand_types[1],
6835                                                             i.types[3],
6836                                                             operand_types[3]))
6837                       || ((check_register & 0xc) == 0xc
6838                           && !operand_type_register_match (i.types[2],
6839                                                             operand_types[2],
6840                                                             i.types[3],
6841                                                             operand_types[3])))
6842                     {
6843                       specific_error = progress (i.error);
6844                       continue;
6845                     }
6846                   /* Fall through.  */
6847                 case 3:
6848                   overlap2 = operand_type_and (i.types[2], operand_types[2]);
6849                   if (!operand_type_match (overlap2, i.types[2])
6850                       || ((check_register & 5) == 5
6851                           && !operand_type_register_match (i.types[0],
6852                                                             operand_types[0],
6853                                                             i.types[2],
6854                                                             operand_types[2]))
6855                       || ((check_register & 6) == 6
6856                           && !operand_type_register_match (i.types[1],
6857                                                             operand_types[1],
6858                                                             i.types[2],
6859                                                             operand_types[2])))
6860                     {
6861                       specific_error = progress (i.error);
6862                       continue;
6863                     }
6864                   break;
6865                 }
6866             }
6867           /* Found either forward/reverse 2, 3 or 4 operand match here:
6868              slip through to break.  */
6869         }
6870
6871       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
6872       if (VEX_check_encoding (t))
6873         {
6874           specific_error = progress (i.error);
6875           continue;
6876         }
6877
6878       /* Check if vector operands are valid.  */
6879       if (check_VecOperands (t))
6880         {
6881           specific_error = progress (i.error);
6882           continue;
6883         }
6884
6885       /* We've found a match; break out of loop.  */
6886       break;
6887     }
6888
6889 #undef progress
6890
6891   if (t == current_templates->end)
6892     {
6893       /* We found no match.  */
6894       const char *err_msg;
6895       switch (specific_error)
6896         {
6897         default:
6898           abort ();
6899         case operand_size_mismatch:
6900           err_msg = _("operand size mismatch");
6901           break;
6902         case operand_type_mismatch:
6903           err_msg = _("operand type mismatch");
6904           break;
6905         case register_type_mismatch:
6906           err_msg = _("register type mismatch");
6907           break;
6908         case number_of_operands_mismatch:
6909           err_msg = _("number of operands mismatch");
6910           break;
6911         case invalid_instruction_suffix:
6912           err_msg = _("invalid instruction suffix");
6913           break;
6914         case bad_imm4:
6915           err_msg = _("constant doesn't fit in 4 bits");
6916           break;
6917         case unsupported_with_intel_mnemonic:
6918           err_msg = _("unsupported with Intel mnemonic");
6919           break;
6920         case unsupported_syntax:
6921           err_msg = _("unsupported syntax");
6922           break;
6923         case unsupported:
6924           as_bad (_("unsupported instruction `%s'"),
6925                   current_templates->start->name);
6926           return NULL;
6927         case invalid_sib_address:
6928           err_msg = _("invalid SIB address");
6929           break;
6930         case invalid_vsib_address:
6931           err_msg = _("invalid VSIB address");
6932           break;
6933         case invalid_vector_register_set:
6934           err_msg = _("mask, index, and destination registers must be distinct");
6935           break;
6936         case invalid_tmm_register_set:
6937           err_msg = _("all tmm registers must be distinct");
6938           break;
6939         case invalid_dest_and_src_register_set:
6940           err_msg = _("destination and source registers must be distinct");
6941           break;
6942         case unsupported_vector_index_register:
6943           err_msg = _("unsupported vector index register");
6944           break;
6945         case unsupported_broadcast:
6946           err_msg = _("unsupported broadcast");
6947           break;
6948         case broadcast_needed:
6949           err_msg = _("broadcast is needed for operand of such type");
6950           break;
6951         case unsupported_masking:
6952           err_msg = _("unsupported masking");
6953           break;
6954         case mask_not_on_destination:
6955           err_msg = _("mask not on destination operand");
6956           break;
6957         case no_default_mask:
6958           err_msg = _("default mask isn't allowed");
6959           break;
6960         case unsupported_rc_sae:
6961           err_msg = _("unsupported static rounding/sae");
6962           break;
6963         case invalid_register_operand:
6964           err_msg = _("invalid register operand");
6965           break;
6966         }
6967       as_bad (_("%s for `%s'"), err_msg,
6968               current_templates->start->name);
6969       return NULL;
6970     }
6971
6972   if (!quiet_warnings)
6973     {
6974       if (!intel_syntax
6975           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
6976         as_warn (_("indirect %s without `*'"), t->name);
6977
6978       if (t->opcode_modifier.isprefix
6979           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
6980         {
6981           /* Warn them that a data or address size prefix doesn't
6982              affect assembly of the next line of code.  */
6983           as_warn (_("stand-alone `%s' prefix"), t->name);
6984         }
6985     }
6986
6987   /* Copy the template we found.  */
6988   install_template (t);
6989
6990   if (addr_prefix_disp != -1)
6991     i.tm.operand_types[addr_prefix_disp]
6992       = operand_types[addr_prefix_disp];
6993
6994   switch (found_reverse_match)
6995     {
6996     case 0:
6997       break;
6998
6999     default:
7000       /* If we found a reverse match we must alter the opcode direction
7001          bit and clear/flip the regmem modifier one.  found_reverse_match
7002          holds bits to change (different for int & float insns).  */
7003
7004       i.tm.base_opcode ^= found_reverse_match;
7005
7006       /* Certain SIMD insns have their load forms specified in the opcode
7007          table, and hence we need to _set_ RegMem instead of clearing it.
7008          We need to avoid setting the bit though on insns like KMOVW.  */
7009       i.tm.opcode_modifier.regmem
7010         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
7011           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
7012           && !i.tm.opcode_modifier.regmem;
7013
7014       /* Fall through.  */
7015     case ~0:
7016       i.tm.operand_types[0] = operand_types[i.operands - 1];
7017       i.tm.operand_types[i.operands - 1] = operand_types[0];
7018       break;
7019
7020     case Opcode_VexW:
7021       /* Only the first two register operands need reversing, alongside
7022          flipping VEX.W.  */
7023       i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;
7024
7025       j = i.tm.operand_types[0].bitfield.imm8;
7026       i.tm.operand_types[j] = operand_types[j + 1];
7027       i.tm.operand_types[j + 1] = operand_types[j];
7028       break;
7029     }
7030
7031   return t;
7032 }
7033
7034 static int
7035 check_string (void)
7036 {
7037   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
7038   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
7039
7040   if (i.seg[op] != NULL && i.seg[op] != reg_es)
7041     {
7042       as_bad (_("`%s' operand %u must use `%ses' segment"),
7043               i.tm.name,
7044               intel_syntax ? i.tm.operands - es_op : es_op + 1,
7045               register_prefix);
7046       return 0;
7047     }
7048
7049   /* There's only ever one segment override allowed per instruction.
7050      This instruction possibly has a legal segment override on the
7051      second operand, so copy the segment to where non-string
7052      instructions store it, allowing common code.  */
7053   i.seg[op] = i.seg[1];
7054
7055   return 1;
7056 }
7057
7058 static int
7059 process_suffix (void)
7060 {
7061   bool is_crc32 = false, is_movx = false;
7062
7063   /* If matched instruction specifies an explicit instruction mnemonic
7064      suffix, use it.  */
7065   if (i.tm.opcode_modifier.size == SIZE16)
7066     i.suffix = WORD_MNEM_SUFFIX;
7067   else if (i.tm.opcode_modifier.size == SIZE32)
7068     i.suffix = LONG_MNEM_SUFFIX;
7069   else if (i.tm.opcode_modifier.size == SIZE64)
7070     i.suffix = QWORD_MNEM_SUFFIX;
7071   else if (i.reg_operands
7072            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
7073            && i.tm.opcode_modifier.operandconstraint != ADDR_PREFIX_OP_REG)
7074     {
7075       unsigned int numop = i.operands;
7076
7077       /* MOVSX/MOVZX */
7078       is_movx = (i.tm.opcode_modifier.opcodespace == SPACE_0F
7079                  && (i.tm.base_opcode | 8) == 0xbe)
7080                 || (i.tm.opcode_modifier.opcodespace == SPACE_BASE
7081                     && i.tm.base_opcode == 0x63
7082                     && i.tm.cpu_flags.bitfield.cpu64);
7083
7084       /* CRC32 */
7085       is_crc32 = (i.tm.base_opcode == 0xf0
7086                   && i.tm.opcode_modifier.opcodespace == SPACE_0F38
7087                   && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2);
7088
7089       /* movsx/movzx want only their source operand considered here, for the
7090          ambiguity checking below.  The suffix will be replaced afterwards
7091          to represent the destination (register).  */
7092       if (is_movx && (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63))
7093         --i.operands;
7094
7095       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
7096       if (is_crc32 && i.tm.operand_types[1].bitfield.qword)
7097         i.rex |= REX_W;
7098
7099       /* If there's no instruction mnemonic suffix we try to invent one
7100          based on GPR operands.  */
7101       if (!i.suffix)
7102         {
7103           /* We take i.suffix from the last register operand specified,
7104              Destination register type is more significant than source
7105              register type.  crc32 in SSE4.2 prefers source register
7106              type. */
7107           unsigned int op = is_crc32 ? 1 : i.operands;
7108
7109           while (op--)
7110             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
7111                 || i.tm.operand_types[op].bitfield.instance == Accum)
7112               {
7113                 if (i.types[op].bitfield.class != Reg)
7114                   continue;
7115                 if (i.types[op].bitfield.byte)
7116                   i.suffix = BYTE_MNEM_SUFFIX;
7117                 else if (i.types[op].bitfield.word)
7118                   i.suffix = WORD_MNEM_SUFFIX;
7119                 else if (i.types[op].bitfield.dword)
7120                   i.suffix = LONG_MNEM_SUFFIX;
7121                 else if (i.types[op].bitfield.qword)
7122                   i.suffix = QWORD_MNEM_SUFFIX;
7123                 else
7124                   continue;
7125                 break;
7126               }
7127
7128           /* As an exception, movsx/movzx silently default to a byte source
7129              in AT&T mode.  */
7130           if (is_movx && i.tm.opcode_modifier.w && !i.suffix && !intel_syntax)
7131             i.suffix = BYTE_MNEM_SUFFIX;
7132         }
7133       else if (i.suffix == BYTE_MNEM_SUFFIX)
7134         {
7135           if (!check_byte_reg ())
7136             return 0;
7137         }
7138       else if (i.suffix == LONG_MNEM_SUFFIX)
7139         {
7140           if (!check_long_reg ())
7141             return 0;
7142         }
7143       else if (i.suffix == QWORD_MNEM_SUFFIX)
7144         {
7145           if (!check_qword_reg ())
7146             return 0;
7147         }
7148       else if (i.suffix == WORD_MNEM_SUFFIX)
7149         {
7150           if (!check_word_reg ())
7151             return 0;
7152         }
7153       else if (intel_syntax
7154                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
7155         /* Do nothing if the instruction is going to ignore the prefix.  */
7156         ;
7157       else
7158         abort ();
7159
7160       /* Undo the movsx/movzx change done above.  */
7161       i.operands = numop;
7162     }
7163   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
7164            && !i.suffix)
7165     {
7166       i.suffix = stackop_size;
7167       if (stackop_size == LONG_MNEM_SUFFIX)
7168         {
7169           /* stackop_size is set to LONG_MNEM_SUFFIX for the
7170              .code16gcc directive to support 16-bit mode with
7171              32-bit address.  For IRET without a suffix, generate
7172              16-bit IRET (opcode 0xcf) to return from an interrupt
7173              handler.  */
7174           if (i.tm.base_opcode == 0xcf)
7175             {
7176               i.suffix = WORD_MNEM_SUFFIX;
7177               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
7178             }
7179           /* Warn about changed behavior for segment register push/pop.  */
7180           else if ((i.tm.base_opcode | 1) == 0x07)
7181             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
7182                      i.tm.name);
7183         }
7184     }
7185   else if (!i.suffix
7186            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
7187                || i.tm.opcode_modifier.jump == JUMP_BYTE
7188                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
7189                || (i.tm.opcode_modifier.opcodespace == SPACE_0F
7190                    && i.tm.base_opcode == 0x01 /* [ls][gi]dt */
7191                    && i.tm.extension_opcode <= 3)))
7192     {
7193       switch (flag_code)
7194         {
7195         case CODE_64BIT:
7196           if (!i.tm.opcode_modifier.no_qsuf)
7197             {
7198               if (i.tm.opcode_modifier.jump == JUMP_BYTE
7199                   || i.tm.opcode_modifier.no_lsuf)
7200                 i.suffix = QWORD_MNEM_SUFFIX;
7201               break;
7202             }
7203           /* Fall through.  */
7204         case CODE_32BIT:
7205           if (!i.tm.opcode_modifier.no_lsuf)
7206             i.suffix = LONG_MNEM_SUFFIX;
7207           break;
7208         case CODE_16BIT:
7209           if (!i.tm.opcode_modifier.no_wsuf)
7210             i.suffix = WORD_MNEM_SUFFIX;
7211           break;
7212         }
7213     }
7214
7215   if (!i.suffix
7216       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7217           /* Also cover lret/retf/iret in 64-bit mode.  */
7218           || (flag_code == CODE_64BIT
7219               && !i.tm.opcode_modifier.no_lsuf
7220               && !i.tm.opcode_modifier.no_qsuf))
7221       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7222       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
7223       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
7224       /* Accept FLDENV et al without suffix.  */
7225       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
7226     {
7227       unsigned int suffixes, evex = 0;
7228
7229       suffixes = !i.tm.opcode_modifier.no_bsuf;
7230       if (!i.tm.opcode_modifier.no_wsuf)
7231         suffixes |= 1 << 1;
7232       if (!i.tm.opcode_modifier.no_lsuf)
7233         suffixes |= 1 << 2;
7234       if (!i.tm.opcode_modifier.no_ssuf)
7235         suffixes |= 1 << 4;
7236       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
7237         suffixes |= 1 << 5;
7238
7239       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
7240          also suitable for AT&T syntax mode, it was requested that this be
7241          restricted to just Intel syntax.  */
7242       if (intel_syntax && is_any_vex_encoding (&i.tm)
7243           && !i.broadcast.type && !i.broadcast.bytes)
7244         {
7245           unsigned int op;
7246
7247           for (op = 0; op < i.tm.operands; ++op)
7248             {
7249               if (is_evex_encoding (&i.tm)
7250                   && !cpu_arch_flags.bitfield.cpuavx512vl)
7251                 {
7252                   if (i.tm.operand_types[op].bitfield.ymmword)
7253                     i.tm.operand_types[op].bitfield.xmmword = 0;
7254                   if (i.tm.operand_types[op].bitfield.zmmword)
7255                     i.tm.operand_types[op].bitfield.ymmword = 0;
7256                   if (!i.tm.opcode_modifier.evex
7257                       || i.tm.opcode_modifier.evex == EVEXDYN)
7258                     i.tm.opcode_modifier.evex = EVEX512;
7259                 }
7260
7261               if (i.tm.operand_types[op].bitfield.xmmword
7262                   + i.tm.operand_types[op].bitfield.ymmword
7263                   + i.tm.operand_types[op].bitfield.zmmword < 2)
7264                 continue;
7265
7266               /* Any properly sized operand disambiguates the insn.  */
7267               if (i.types[op].bitfield.xmmword
7268                   || i.types[op].bitfield.ymmword
7269                   || i.types[op].bitfield.zmmword)
7270                 {
7271                   suffixes &= ~(7 << 6);
7272                   evex = 0;
7273                   break;
7274                 }
7275
7276               if ((i.flags[op] & Operand_Mem)
7277                   && i.tm.operand_types[op].bitfield.unspecified)
7278                 {
7279                   if (i.tm.operand_types[op].bitfield.xmmword)
7280                     suffixes |= 1 << 6;
7281                   if (i.tm.operand_types[op].bitfield.ymmword)
7282                     suffixes |= 1 << 7;
7283                   if (i.tm.operand_types[op].bitfield.zmmword)
7284                     suffixes |= 1 << 8;
7285                   if (is_evex_encoding (&i.tm))
7286                     evex = EVEX512;
7287                 }
7288             }
7289         }
7290
7291       /* Are multiple suffixes / operand sizes allowed?  */
7292       if (suffixes & (suffixes - 1))
7293         {
7294           if (intel_syntax
7295               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7296                   || operand_check == check_error))
7297             {
7298               as_bad (_("ambiguous operand size for `%s'"), i.tm.name);
7299               return 0;
7300             }
7301           if (operand_check == check_error)
7302             {
7303               as_bad (_("no instruction mnemonic suffix given and "
7304                         "no register operands; can't size `%s'"), i.tm.name);
7305               return 0;
7306             }
7307           if (operand_check == check_warning)
7308             as_warn (_("%s; using default for `%s'"),
7309                        intel_syntax
7310                        ? _("ambiguous operand size")
7311                        : _("no instruction mnemonic suffix given and "
7312                            "no register operands"),
7313                        i.tm.name);
7314
7315           if (i.tm.opcode_modifier.floatmf)
7316             i.suffix = SHORT_MNEM_SUFFIX;
7317           else if (is_movx)
7318             /* handled below */;
7319           else if (evex)
7320             i.tm.opcode_modifier.evex = evex;
7321           else if (flag_code == CODE_16BIT)
7322             i.suffix = WORD_MNEM_SUFFIX;
7323           else if (!i.tm.opcode_modifier.no_lsuf)
7324             i.suffix = LONG_MNEM_SUFFIX;
7325           else
7326             i.suffix = QWORD_MNEM_SUFFIX;
7327         }
7328     }
7329
7330   if (is_movx)
7331     {
7332       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
7333          In AT&T syntax, if there is no suffix (warned about above), the default
7334          will be byte extension.  */
7335       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
7336         i.tm.base_opcode |= 1;
7337
7338       /* For further processing, the suffix should represent the destination
7339          (register).  This is already the case when one was used with
7340          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
7341          no suffix to begin with.  */
7342       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
7343         {
7344           if (i.types[1].bitfield.word)
7345             i.suffix = WORD_MNEM_SUFFIX;
7346           else if (i.types[1].bitfield.qword)
7347             i.suffix = QWORD_MNEM_SUFFIX;
7348           else
7349             i.suffix = LONG_MNEM_SUFFIX;
7350
7351           i.tm.opcode_modifier.w = 0;
7352         }
7353     }
7354
7355   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
7356     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
7357                    != (i.tm.operand_types[1].bitfield.class == Reg);
7358
7359   /* Change the opcode based on the operand size given by i.suffix.  */
7360   switch (i.suffix)
7361     {
7362     /* Size floating point instruction.  */
7363     case LONG_MNEM_SUFFIX:
7364       if (i.tm.opcode_modifier.floatmf)
7365         {
7366           i.tm.base_opcode ^= 4;
7367           break;
7368         }
7369     /* fall through */
7370     case WORD_MNEM_SUFFIX:
7371     case QWORD_MNEM_SUFFIX:
7372       /* It's not a byte, select word/dword operation.  */
7373       if (i.tm.opcode_modifier.w)
7374         {
7375           if (i.short_form)
7376             i.tm.base_opcode |= 8;
7377           else
7378             i.tm.base_opcode |= 1;
7379         }
7380     /* fall through */
7381     case SHORT_MNEM_SUFFIX:
7382       /* Now select between word & dword operations via the operand
7383          size prefix, except for instructions that will ignore this
7384          prefix anyway.  */
7385       if (i.suffix != QWORD_MNEM_SUFFIX
7386           && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7387           && !i.tm.opcode_modifier.floatmf
7388           && !is_any_vex_encoding (&i.tm)
7389           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
7390               || (flag_code == CODE_64BIT
7391                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
7392         {
7393           unsigned int prefix = DATA_PREFIX_OPCODE;
7394
7395           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
7396             prefix = ADDR_PREFIX_OPCODE;
7397
7398           if (!add_prefix (prefix))
7399             return 0;
7400         }
7401
7402       /* Set mode64 for an operand.  */
7403       if (i.suffix == QWORD_MNEM_SUFFIX
7404           && flag_code == CODE_64BIT
7405           && !i.tm.opcode_modifier.norex64
7406           && !i.tm.opcode_modifier.vexw
7407           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
7408              need rex64. */
7409           && ! (i.operands == 2
7410                 && i.tm.base_opcode == 0x90
7411                 && i.tm.extension_opcode == None
7412                 && i.types[0].bitfield.instance == Accum
7413                 && i.types[0].bitfield.qword
7414                 && i.types[1].bitfield.instance == Accum
7415                 && i.types[1].bitfield.qword))
7416         i.rex |= REX_W;
7417
7418       break;
7419
7420     case 0:
7421       /* Select word/dword/qword operation with explicit data sizing prefix
7422          when there are no suitable register operands.  */
7423       if (i.tm.opcode_modifier.w
7424           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
7425           && (!i.reg_operands
7426               || (i.reg_operands == 1
7427                       /* ShiftCount */
7428                   && (i.tm.operand_types[0].bitfield.instance == RegC
7429                       /* InOutPortReg */
7430                       || i.tm.operand_types[0].bitfield.instance == RegD
7431                       || i.tm.operand_types[1].bitfield.instance == RegD
7432                       /* CRC32 */
7433                       || is_crc32))))
7434         i.tm.base_opcode |= 1;
7435       break;
7436     }
7437
7438   if (i.tm.opcode_modifier.operandconstraint == ADDR_PREFIX_OP_REG)
7439     {
7440       gas_assert (!i.suffix);
7441       gas_assert (i.reg_operands);
7442
7443       if (i.tm.operand_types[0].bitfield.instance == Accum
7444           || i.operands == 1)
7445         {
7446           /* The address size override prefix changes the size of the
7447              first operand.  */
7448           if (flag_code == CODE_64BIT
7449               && i.op[0].regs->reg_type.bitfield.word)
7450             {
7451               as_bad (_("16-bit addressing unavailable for `%s'"),
7452                       i.tm.name);
7453               return 0;
7454             }
7455
7456           if ((flag_code == CODE_32BIT
7457                ? i.op[0].regs->reg_type.bitfield.word
7458                : i.op[0].regs->reg_type.bitfield.dword)
7459               && !add_prefix (ADDR_PREFIX_OPCODE))
7460             return 0;
7461         }
7462       else
7463         {
7464           /* Check invalid register operand when the address size override
7465              prefix changes the size of register operands.  */
7466           unsigned int op;
7467           enum { need_word, need_dword, need_qword } need;
7468
7469           /* Check the register operand for the address size prefix if
7470              the memory operand has no real registers, like symbol, DISP
7471              or bogus (x32-only) symbol(%rip) when symbol(%eip) is meant.  */
7472           if (i.mem_operands == 1
7473               && i.reg_operands == 1
7474               && i.operands == 2
7475               && i.types[1].bitfield.class == Reg
7476               && (flag_code == CODE_32BIT
7477                   ? i.op[1].regs->reg_type.bitfield.word
7478                   : i.op[1].regs->reg_type.bitfield.dword)
7479               && ((i.base_reg == NULL && i.index_reg == NULL)
7480 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
7481                   || (x86_elf_abi == X86_64_X32_ABI
7482                       && i.base_reg
7483                       && i.base_reg->reg_num == RegIP
7484                       && i.base_reg->reg_type.bitfield.qword))
7485 #else
7486                   || 0)
7487 #endif
7488               && !add_prefix (ADDR_PREFIX_OPCODE))
7489             return 0;
7490
7491           if (flag_code == CODE_32BIT)
7492             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
7493           else if (i.prefix[ADDR_PREFIX])
7494             need = need_dword;
7495           else
7496             need = flag_code == CODE_64BIT ? need_qword : need_word;
7497
7498           for (op = 0; op < i.operands; op++)
7499             {
7500               if (i.types[op].bitfield.class != Reg)
7501                 continue;
7502
7503               switch (need)
7504                 {
7505                 case need_word:
7506                   if (i.op[op].regs->reg_type.bitfield.word)
7507                     continue;
7508                   break;
7509                 case need_dword:
7510                   if (i.op[op].regs->reg_type.bitfield.dword)
7511                     continue;
7512                   break;
7513                 case need_qword:
7514                   if (i.op[op].regs->reg_type.bitfield.qword)
7515                     continue;
7516                   break;
7517                 }
7518
7519               as_bad (_("invalid register operand size for `%s'"),
7520                       i.tm.name);
7521               return 0;
7522             }
7523         }
7524     }
7525
7526   return 1;
7527 }
7528
7529 static int
7530 check_byte_reg (void)
7531 {
7532   int op;
7533
7534   for (op = i.operands; --op >= 0;)
7535     {
7536       /* Skip non-register operands. */
7537       if (i.types[op].bitfield.class != Reg)
7538         continue;
7539
7540       /* If this is an eight bit register, it's OK.  If it's the 16 or
7541          32 bit version of an eight bit register, we will just use the
7542          low portion, and that's OK too.  */
7543       if (i.types[op].bitfield.byte)
7544         continue;
7545
7546       /* I/O port address operands are OK too.  */
7547       if (i.tm.operand_types[op].bitfield.instance == RegD
7548           && i.tm.operand_types[op].bitfield.word)
7549         continue;
7550
7551       /* crc32 only wants its source operand checked here.  */
7552       if (i.tm.base_opcode == 0xf0
7553           && i.tm.opcode_modifier.opcodespace == SPACE_0F38
7554           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2
7555           && op != 0)
7556         continue;
7557
7558       /* Any other register is bad.  */
7559       as_bad (_("`%s%s' not allowed with `%s%c'"),
7560               register_prefix, i.op[op].regs->reg_name,
7561               i.tm.name, i.suffix);
7562       return 0;
7563     }
7564   return 1;
7565 }
7566
7567 static int
7568 check_long_reg (void)
7569 {
7570   int op;
7571
7572   for (op = i.operands; --op >= 0;)
7573     /* Skip non-register operands. */
7574     if (i.types[op].bitfield.class != Reg)
7575       continue;
7576     /* Reject eight bit registers, except where the template requires
7577        them. (eg. movzb)  */
7578     else if (i.types[op].bitfield.byte
7579              && (i.tm.operand_types[op].bitfield.class == Reg
7580                  || i.tm.operand_types[op].bitfield.instance == Accum)
7581              && (i.tm.operand_types[op].bitfield.word
7582                  || i.tm.operand_types[op].bitfield.dword))
7583       {
7584         as_bad (_("`%s%s' not allowed with `%s%c'"),
7585                 register_prefix,
7586                 i.op[op].regs->reg_name,
7587                 i.tm.name,
7588                 i.suffix);
7589         return 0;
7590       }
7591     /* Error if the e prefix on a general reg is missing.  */
7592     else if (i.types[op].bitfield.word
7593              && (i.tm.operand_types[op].bitfield.class == Reg
7594                  || i.tm.operand_types[op].bitfield.instance == Accum)
7595              && i.tm.operand_types[op].bitfield.dword)
7596       {
7597         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7598                 register_prefix, i.op[op].regs->reg_name,
7599                 i.suffix);
7600         return 0;
7601       }
7602     /* Warn if the r prefix on a general reg is present.  */
7603     else if (i.types[op].bitfield.qword
7604              && (i.tm.operand_types[op].bitfield.class == Reg
7605                  || i.tm.operand_types[op].bitfield.instance == Accum)
7606              && i.tm.operand_types[op].bitfield.dword)
7607       {
7608         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7609                 register_prefix, i.op[op].regs->reg_name, i.suffix);
7610         return 0;
7611       }
7612   return 1;
7613 }
7614
7615 static int
7616 check_qword_reg (void)
7617 {
7618   int op;
7619
7620   for (op = i.operands; --op >= 0; )
7621     /* Skip non-register operands. */
7622     if (i.types[op].bitfield.class != Reg)
7623       continue;
7624     /* Reject eight bit registers, except where the template requires
7625        them. (eg. movzb)  */
7626     else if (i.types[op].bitfield.byte
7627              && (i.tm.operand_types[op].bitfield.class == Reg
7628                  || i.tm.operand_types[op].bitfield.instance == Accum)
7629              && (i.tm.operand_types[op].bitfield.word
7630                  || i.tm.operand_types[op].bitfield.dword))
7631       {
7632         as_bad (_("`%s%s' not allowed with `%s%c'"),
7633                 register_prefix,
7634                 i.op[op].regs->reg_name,
7635                 i.tm.name,
7636                 i.suffix);
7637         return 0;
7638       }
7639     /* Warn if the r prefix on a general reg is missing.  */
7640     else if ((i.types[op].bitfield.word
7641               || i.types[op].bitfield.dword)
7642              && (i.tm.operand_types[op].bitfield.class == Reg
7643                  || i.tm.operand_types[op].bitfield.instance == Accum)
7644              && i.tm.operand_types[op].bitfield.qword)
7645       {
7646         /* Prohibit these changes in the 64bit mode, since the
7647            lowering is more complicated.  */
7648         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7649                 register_prefix, i.op[op].regs->reg_name, i.suffix);
7650         return 0;
7651       }
7652   return 1;
7653 }
7654
7655 static int
7656 check_word_reg (void)
7657 {
7658   int op;
7659   for (op = i.operands; --op >= 0;)
7660     /* Skip non-register operands. */
7661     if (i.types[op].bitfield.class != Reg)
7662       continue;
7663     /* Reject eight bit registers, except where the template requires
7664        them. (eg. movzb)  */
7665     else if (i.types[op].bitfield.byte
7666              && (i.tm.operand_types[op].bitfield.class == Reg
7667                  || i.tm.operand_types[op].bitfield.instance == Accum)
7668              && (i.tm.operand_types[op].bitfield.word
7669                  || i.tm.operand_types[op].bitfield.dword))
7670       {
7671         as_bad (_("`%s%s' not allowed with `%s%c'"),
7672                 register_prefix,
7673                 i.op[op].regs->reg_name,
7674                 i.tm.name,
7675                 i.suffix);
7676         return 0;
7677       }
7678     /* Error if the e or r prefix on a general reg is present.  */
7679     else if ((i.types[op].bitfield.dword
7680                  || i.types[op].bitfield.qword)
7681              && (i.tm.operand_types[op].bitfield.class == Reg
7682                  || i.tm.operand_types[op].bitfield.instance == Accum)
7683              && i.tm.operand_types[op].bitfield.word)
7684       {
7685         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7686                 register_prefix, i.op[op].regs->reg_name,
7687                 i.suffix);
7688         return 0;
7689       }
7690   return 1;
7691 }
7692
7693 static int
7694 update_imm (unsigned int j)
7695 {
7696   i386_operand_type overlap = i.types[j];
7697   if (overlap.bitfield.imm8
7698       + overlap.bitfield.imm8s
7699       + overlap.bitfield.imm16
7700       + overlap.bitfield.imm32
7701       + overlap.bitfield.imm32s
7702       + overlap.bitfield.imm64 > 1)
7703     {
7704       if (i.suffix)
7705         {
7706           i386_operand_type temp;
7707
7708           operand_type_set (&temp, 0);
7709           if (i.suffix == BYTE_MNEM_SUFFIX)
7710             {
7711               temp.bitfield.imm8 = overlap.bitfield.imm8;
7712               temp.bitfield.imm8s = overlap.bitfield.imm8s;
7713             }
7714           else if (i.suffix == WORD_MNEM_SUFFIX)
7715             temp.bitfield.imm16 = overlap.bitfield.imm16;
7716           else if (i.suffix == QWORD_MNEM_SUFFIX)
7717             {
7718               temp.bitfield.imm64 = overlap.bitfield.imm64;
7719               temp.bitfield.imm32s = overlap.bitfield.imm32s;
7720             }
7721           else
7722             temp.bitfield.imm32 = overlap.bitfield.imm32;
7723           overlap = temp;
7724         }
7725       else if (operand_type_equal (&overlap, &imm16_32_32s)
7726                || operand_type_equal (&overlap, &imm16_32)
7727                || operand_type_equal (&overlap, &imm16_32s))
7728         {
7729           if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
7730             overlap = imm16;
7731           else
7732             overlap = imm32s;
7733         }
7734       else if (i.prefix[REX_PREFIX] & REX_W)
7735         overlap = operand_type_and (overlap, imm32s);
7736       else if (i.prefix[DATA_PREFIX])
7737         overlap = operand_type_and (overlap,
7738                                     flag_code != CODE_16BIT ? imm16 : imm32);
7739       if (overlap.bitfield.imm8
7740           + overlap.bitfield.imm8s
7741           + overlap.bitfield.imm16
7742           + overlap.bitfield.imm32
7743           + overlap.bitfield.imm32s
7744           + overlap.bitfield.imm64 != 1)
7745         {
7746           as_bad (_("no instruction mnemonic suffix given; "
7747                     "can't determine immediate size"));
7748           return 0;
7749         }
7750     }
7751   i.types[j] = overlap;
7752
7753   return 1;
7754 }
7755
7756 static int
7757 finalize_imm (void)
7758 {
7759   unsigned int j, n;
7760
7761   /* Update the first 2 immediate operands.  */
7762   n = i.operands > 2 ? 2 : i.operands;
7763   if (n)
7764     {
7765       for (j = 0; j < n; j++)
7766         if (update_imm (j) == 0)
7767           return 0;
7768
7769       /* The 3rd operand can't be immediate operand.  */
7770       gas_assert (operand_type_check (i.types[2], imm) == 0);
7771     }
7772
7773   return 1;
7774 }
7775
7776 static int
7777 process_operands (void)
7778 {
7779   /* Default segment register this instruction will use for memory
7780      accesses.  0 means unknown.  This is only for optimizing out
7781      unnecessary segment overrides.  */
7782   const reg_entry *default_seg = NULL;
7783
7784   if (i.tm.opcode_modifier.sse2avx)
7785     {
7786       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
7787          need converting.  */
7788       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
7789       i.prefix[REX_PREFIX] = 0;
7790       i.rex_encoding = 0;
7791     }
7792   /* ImmExt should be processed after SSE2AVX.  */
7793   else if (i.tm.opcode_modifier.immext)
7794     process_immext ();
7795
7796   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
7797     {
7798       unsigned int dupl = i.operands;
7799       unsigned int dest = dupl - 1;
7800       unsigned int j;
7801
7802       /* The destination must be an xmm register.  */
7803       gas_assert (i.reg_operands
7804                   && MAX_OPERANDS > dupl
7805                   && operand_type_equal (&i.types[dest], &regxmm));
7806
7807       if (i.tm.operand_types[0].bitfield.instance == Accum
7808           && i.tm.operand_types[0].bitfield.xmmword)
7809         {
7810           if (i.tm.opcode_modifier.vexsources == VEX3SOURCES)
7811             {
7812               /* Keep xmm0 for instructions with VEX prefix and 3
7813                  sources.  */
7814               i.tm.operand_types[0].bitfield.instance = InstanceNone;
7815               i.tm.operand_types[0].bitfield.class = RegSIMD;
7816               goto duplicate;
7817             }
7818           else
7819             {
7820               /* We remove the first xmm0 and keep the number of
7821                  operands unchanged, which in fact duplicates the
7822                  destination.  */
7823               for (j = 1; j < i.operands; j++)
7824                 {
7825                   i.op[j - 1] = i.op[j];
7826                   i.types[j - 1] = i.types[j];
7827                   i.tm.operand_types[j - 1] = i.tm.operand_types[j];
7828                   i.flags[j - 1] = i.flags[j];
7829                 }
7830             }
7831         }
7832       else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_1ST_XMM0)
7833         {
7834           gas_assert ((MAX_OPERANDS - 1) > dupl
7835                       && (i.tm.opcode_modifier.vexsources
7836                           == VEX3SOURCES));
7837
7838           /* Add the implicit xmm0 for instructions with VEX prefix
7839              and 3 sources.  */
7840           for (j = i.operands; j > 0; j--)
7841             {
7842               i.op[j] = i.op[j - 1];
7843               i.types[j] = i.types[j - 1];
7844               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
7845               i.flags[j] = i.flags[j - 1];
7846             }
7847           i.op[0].regs
7848             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
7849           i.types[0] = regxmm;
7850           i.tm.operand_types[0] = regxmm;
7851
7852           i.operands += 2;
7853           i.reg_operands += 2;
7854           i.tm.operands += 2;
7855
7856           dupl++;
7857           dest++;
7858           i.op[dupl] = i.op[dest];
7859           i.types[dupl] = i.types[dest];
7860           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7861           i.flags[dupl] = i.flags[dest];
7862         }
7863       else
7864         {
7865         duplicate:
7866           i.operands++;
7867           i.reg_operands++;
7868           i.tm.operands++;
7869
7870           i.op[dupl] = i.op[dest];
7871           i.types[dupl] = i.types[dest];
7872           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7873           i.flags[dupl] = i.flags[dest];
7874         }
7875
7876        if (i.tm.opcode_modifier.immext)
7877          process_immext ();
7878     }
7879   else if (i.tm.operand_types[0].bitfield.instance == Accum
7880            && i.tm.operand_types[0].bitfield.xmmword)
7881     {
7882       unsigned int j;
7883
7884       for (j = 1; j < i.operands; j++)
7885         {
7886           i.op[j - 1] = i.op[j];
7887           i.types[j - 1] = i.types[j];
7888
7889           /* We need to adjust fields in i.tm since they are used by
7890              build_modrm_byte.  */
7891           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
7892
7893           i.flags[j - 1] = i.flags[j];
7894         }
7895
7896       i.operands--;
7897       i.reg_operands--;
7898       i.tm.operands--;
7899     }
7900   else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_QUAD_GROUP)
7901     {
7902       unsigned int regnum, first_reg_in_group, last_reg_in_group;
7903
7904       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
7905       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
7906       regnum = register_number (i.op[1].regs);
7907       first_reg_in_group = regnum & ~3;
7908       last_reg_in_group = first_reg_in_group + 3;
7909       if (regnum != first_reg_in_group)
7910         as_warn (_("source register `%s%s' implicitly denotes"
7911                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
7912                  register_prefix, i.op[1].regs->reg_name,
7913                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
7914                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
7915                  i.tm.name);
7916     }
7917   else if (i.tm.opcode_modifier.operandconstraint == REG_KLUDGE)
7918     {
7919       /* The imul $imm, %reg instruction is converted into
7920          imul $imm, %reg, %reg, and the clr %reg instruction
7921          is converted into xor %reg, %reg.  */
7922
7923       unsigned int first_reg_op;
7924
7925       if (operand_type_check (i.types[0], reg))
7926         first_reg_op = 0;
7927       else
7928         first_reg_op = 1;
7929       /* Pretend we saw the extra register operand.  */
7930       gas_assert (i.reg_operands == 1
7931                   && i.op[first_reg_op + 1].regs == 0);
7932       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
7933       i.types[first_reg_op + 1] = i.types[first_reg_op];
7934       i.operands++;
7935       i.reg_operands++;
7936     }
7937
7938   if (i.tm.opcode_modifier.modrm)
7939     {
7940       /* The opcode is completed (modulo i.tm.extension_opcode which
7941          must be put into the modrm byte).  Now, we make the modrm and
7942          index base bytes based on all the info we've collected.  */
7943
7944       default_seg = build_modrm_byte ();
7945     }
7946   else if (i.types[0].bitfield.class == SReg)
7947     {
7948       if (flag_code != CODE_64BIT
7949           ? i.tm.base_opcode == POP_SEG_SHORT
7950             && i.op[0].regs->reg_num == 1
7951           : (i.tm.base_opcode | 1) == (POP_SEG386_SHORT & 0xff)
7952             && i.op[0].regs->reg_num < 4)
7953         {
7954           as_bad (_("you can't `%s %s%s'"),
7955                   i.tm.name, register_prefix, i.op[0].regs->reg_name);
7956           return 0;
7957         }
7958       if (i.op[0].regs->reg_num > 3
7959           && i.tm.opcode_modifier.opcodespace == SPACE_BASE )
7960         {
7961           i.tm.base_opcode ^= (POP_SEG_SHORT ^ POP_SEG386_SHORT) & 0xff;
7962           i.tm.opcode_modifier.opcodespace = SPACE_0F;
7963         }
7964       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
7965     }
7966   else if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
7967            && (i.tm.base_opcode & ~3) == MOV_AX_DISP32)
7968     {
7969       default_seg = reg_ds;
7970     }
7971   else if (i.tm.opcode_modifier.isstring)
7972     {
7973       /* For the string instructions that allow a segment override
7974          on one of their operands, the default segment is ds.  */
7975       default_seg = reg_ds;
7976     }
7977   else if (i.short_form)
7978     {
7979       /* The register or float register operand is in operand
7980          0 or 1.  */
7981       const reg_entry *r = i.op[0].regs;
7982
7983       if (i.imm_operands
7984           || (r->reg_type.bitfield.instance == Accum && i.op[1].regs))
7985         r = i.op[1].regs;
7986       /* Register goes in low 3 bits of opcode.  */
7987       i.tm.base_opcode |= r->reg_num;
7988       if ((r->reg_flags & RegRex) != 0)
7989         i.rex |= REX_B;
7990       if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
7991         {
7992           /* Warn about some common errors, but press on regardless.  */
7993           if (i.operands != 2)
7994             {
7995               /* Extraneous `l' suffix on fp insn.  */
7996               as_warn (_("translating to `%s %s%s'"), i.tm.name,
7997                        register_prefix, i.op[0].regs->reg_name);
7998             }
7999           else if (i.op[0].regs->reg_type.bitfield.instance != Accum)
8000             {
8001               /* Reversed arguments on faddp or fmulp.  */
8002               as_warn (_("translating to `%s %s%s,%s%s'"), i.tm.name,
8003                        register_prefix, i.op[!intel_syntax].regs->reg_name,
8004                        register_prefix, i.op[intel_syntax].regs->reg_name);
8005             }
8006         }
8007     }
8008
8009   if ((i.seg[0] || i.prefix[SEG_PREFIX])
8010       && i.tm.base_opcode == 0x8d /* lea */
8011       && i.tm.opcode_modifier.opcodespace == SPACE_BASE
8012       && !is_any_vex_encoding(&i.tm))
8013     {
8014       if (!quiet_warnings)
8015         as_warn (_("segment override on `%s' is ineffectual"), i.tm.name);
8016       if (optimize)
8017         {
8018           i.seg[0] = NULL;
8019           i.prefix[SEG_PREFIX] = 0;
8020         }
8021     }
8022
8023   /* If a segment was explicitly specified, and the specified segment
8024      is neither the default nor the one already recorded from a prefix,
8025      use an opcode prefix to select it.  If we never figured out what
8026      the default segment is, then default_seg will be zero at this
8027      point, and the specified segment prefix will always be used.  */
8028   if (i.seg[0]
8029       && i.seg[0] != default_seg
8030       && i386_seg_prefixes[i.seg[0]->reg_num] != i.prefix[SEG_PREFIX])
8031     {
8032       if (!add_prefix (i386_seg_prefixes[i.seg[0]->reg_num]))
8033         return 0;
8034     }
8035   return 1;
8036 }
8037
8038 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
8039                                  bool do_sse2avx)
8040 {
8041   if (r->reg_flags & RegRex)
8042     {
8043       if (i.rex & rex_bit)
8044         as_bad (_("same type of prefix used twice"));
8045       i.rex |= rex_bit;
8046     }
8047   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
8048     {
8049       gas_assert (i.vex.register_specifier == r);
8050       i.vex.register_specifier += 8;
8051     }
8052
8053   if (r->reg_flags & RegVRex)
8054     i.vrex |= rex_bit;
8055 }
8056
8057 static const reg_entry *
8058 build_modrm_byte (void)
8059 {
8060   const reg_entry *default_seg = NULL;
8061   unsigned int source, dest;
8062   int vex_3_sources;
8063
8064   vex_3_sources = i.tm.opcode_modifier.vexsources == VEX3SOURCES;
8065   if (vex_3_sources)
8066     {
8067       unsigned int nds, reg_slot;
8068       expressionS *exp;
8069
8070       dest = i.operands - 1;
8071       nds = dest - 1;
8072
8073       /* There are 2 kinds of instructions:
8074          1. 5 operands: 4 register operands or 3 register operands
8075          plus 1 memory operand plus one Imm4 operand, VexXDS, and
8076          VexW0 or VexW1.  The destination must be either XMM, YMM or
8077          ZMM register.
8078          2. 4 operands: 4 register operands or 3 register operands
8079          plus 1 memory operand, with VexXDS.  */
8080       gas_assert ((i.reg_operands == 4
8081                    || (i.reg_operands == 3 && i.mem_operands == 1))
8082                   && i.tm.opcode_modifier.vexvvvv == VEXXDS
8083                   && i.tm.opcode_modifier.vexw
8084                   && i.tm.operand_types[dest].bitfield.class == RegSIMD);
8085
8086       /* If VexW1 is set, the first non-immediate operand is the source and
8087          the second non-immediate one is encoded in the immediate operand.  */
8088       if (i.tm.opcode_modifier.vexw == VEXW1)
8089         {
8090           source = i.imm_operands;
8091           reg_slot = i.imm_operands + 1;
8092         }
8093       else
8094         {
8095           source = i.imm_operands + 1;
8096           reg_slot = i.imm_operands;
8097         }
8098
8099       if (i.imm_operands == 0)
8100         {
8101           /* When there is no immediate operand, generate an 8bit
8102              immediate operand to encode the first operand.  */
8103           exp = &im_expressions[i.imm_operands++];
8104           i.op[i.operands].imms = exp;
8105           i.types[i.operands].bitfield.imm8 = 1;
8106           i.operands++;
8107
8108           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8109           exp->X_op = O_constant;
8110           exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
8111           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8112         }
8113       else
8114         {
8115           gas_assert (i.imm_operands == 1);
8116           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
8117           gas_assert (!i.tm.opcode_modifier.immext);
8118
8119           /* Turn on Imm8 again so that output_imm will generate it.  */
8120           i.types[0].bitfield.imm8 = 1;
8121
8122           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8123           i.op[0].imms->X_add_number
8124               |= register_number (i.op[reg_slot].regs) << 4;
8125           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8126         }
8127
8128       gas_assert (i.tm.operand_types[nds].bitfield.class == RegSIMD);
8129       i.vex.register_specifier = i.op[nds].regs;
8130     }
8131   else
8132     source = dest = 0;
8133
8134   /* i.reg_operands MUST be the number of real register operands;
8135      implicit registers do not count.  If there are 3 register
8136      operands, it must be a instruction with VexNDS.  For a
8137      instruction with VexNDD, the destination register is encoded
8138      in VEX prefix.  If there are 4 register operands, it must be
8139      a instruction with VEX prefix and 3 sources.  */
8140   if (i.mem_operands == 0
8141       && ((i.reg_operands == 2
8142            && i.tm.opcode_modifier.vexvvvv <= VEXXDS)
8143           || (i.reg_operands == 3
8144               && i.tm.opcode_modifier.vexvvvv == VEXXDS)
8145           || (i.reg_operands == 4 && vex_3_sources)))
8146     {
8147       switch (i.operands)
8148         {
8149         case 2:
8150           source = 0;
8151           break;
8152         case 3:
8153           /* When there are 3 operands, one of them may be immediate,
8154              which may be the first or the last operand.  Otherwise,
8155              the first operand must be shift count register (cl) or it
8156              is an instruction with VexNDS. */
8157           gas_assert (i.imm_operands == 1
8158                       || (i.imm_operands == 0
8159                           && (i.tm.opcode_modifier.vexvvvv == VEXXDS
8160                               || (i.types[0].bitfield.instance == RegC
8161                                   && i.types[0].bitfield.byte))));
8162           if (operand_type_check (i.types[0], imm)
8163               || (i.types[0].bitfield.instance == RegC
8164                   && i.types[0].bitfield.byte))
8165             source = 1;
8166           else
8167             source = 0;
8168           break;
8169         case 4:
8170           /* When there are 4 operands, the first two must be 8bit
8171              immediate operands. The source operand will be the 3rd
8172              one.
8173
8174              For instructions with VexNDS, if the first operand
8175              an imm8, the source operand is the 2nd one.  If the last
8176              operand is imm8, the source operand is the first one.  */
8177           gas_assert ((i.imm_operands == 2
8178                        && i.types[0].bitfield.imm8
8179                        && i.types[1].bitfield.imm8)
8180                       || (i.tm.opcode_modifier.vexvvvv == VEXXDS
8181                           && i.imm_operands == 1
8182                           && (i.types[0].bitfield.imm8
8183                               || i.types[i.operands - 1].bitfield.imm8)));
8184           if (i.imm_operands == 2)
8185             source = 2;
8186           else
8187             {
8188               if (i.types[0].bitfield.imm8)
8189                 source = 1;
8190               else
8191                 source = 0;
8192             }
8193           break;
8194         case 5:
8195           gas_assert (!is_evex_encoding (&i.tm));
8196           gas_assert (i.imm_operands == 1 && vex_3_sources);
8197           break;
8198         default:
8199           abort ();
8200         }
8201
8202       if (!vex_3_sources)
8203         {
8204           dest = source + 1;
8205
8206           if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8207             {
8208               /* For instructions with VexNDS, the register-only source
8209                  operand must be a 32/64bit integer, XMM, YMM, ZMM, or mask
8210                  register.  It is encoded in VEX prefix.  */
8211
8212               i386_operand_type op;
8213               unsigned int vvvv;
8214
8215               /* Swap two source operands if needed.  */
8216               if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES)
8217                 {
8218                   vvvv = source;
8219                   source = dest;
8220                 }
8221               else
8222                 vvvv = dest;
8223
8224               op = i.tm.operand_types[vvvv];
8225               if ((dest + 1) >= i.operands
8226                   || ((op.bitfield.class != Reg
8227                        || (!op.bitfield.dword && !op.bitfield.qword))
8228                       && op.bitfield.class != RegSIMD
8229                       && op.bitfield.class != RegMask))
8230                 abort ();
8231               i.vex.register_specifier = i.op[vvvv].regs;
8232               dest++;
8233             }
8234         }
8235
8236       i.rm.mode = 3;
8237       /* One of the register operands will be encoded in the i.rm.reg
8238          field, the other in the combined i.rm.mode and i.rm.regmem
8239          fields.  If no form of this instruction supports a memory
8240          destination operand, then we assume the source operand may
8241          sometimes be a memory operand and so we need to store the
8242          destination in the i.rm.reg field.  */
8243       if (!i.tm.opcode_modifier.regmem
8244           && operand_type_check (i.tm.operand_types[dest], anymem) == 0)
8245         {
8246           i.rm.reg = i.op[dest].regs->reg_num;
8247           i.rm.regmem = i.op[source].regs->reg_num;
8248           set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
8249           set_rex_vrex (i.op[source].regs, REX_B, false);
8250         }
8251       else
8252         {
8253           i.rm.reg = i.op[source].regs->reg_num;
8254           i.rm.regmem = i.op[dest].regs->reg_num;
8255           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
8256           set_rex_vrex (i.op[source].regs, REX_R, false);
8257         }
8258       if (flag_code != CODE_64BIT && (i.rex & REX_R))
8259         {
8260           if (i.types[!i.tm.opcode_modifier.regmem].bitfield.class != RegCR)
8261             abort ();
8262           i.rex &= ~REX_R;
8263           add_prefix (LOCK_PREFIX_OPCODE);
8264         }
8265     }
8266   else
8267     {                   /* If it's not 2 reg operands...  */
8268       unsigned int mem;
8269
8270       if (i.mem_operands)
8271         {
8272           unsigned int fake_zero_displacement = 0;
8273           unsigned int op;
8274
8275           for (op = 0; op < i.operands; op++)
8276             if (i.flags[op] & Operand_Mem)
8277               break;
8278           gas_assert (op < i.operands);
8279
8280           if (i.tm.opcode_modifier.sib)
8281             {
8282               /* The index register of VSIB shouldn't be RegIZ.  */
8283               if (i.tm.opcode_modifier.sib != SIBMEM
8284                   && i.index_reg->reg_num == RegIZ)
8285                 abort ();
8286
8287               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8288               if (!i.base_reg)
8289                 {
8290                   i.sib.base = NO_BASE_REGISTER;
8291                   i.sib.scale = i.log2_scale_factor;
8292                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8293                   i.types[op].bitfield.disp32 = 1;
8294                 }
8295
8296               /* Since the mandatory SIB always has index register, so
8297                  the code logic remains unchanged. The non-mandatory SIB
8298                  without index register is allowed and will be handled
8299                  later.  */
8300               if (i.index_reg)
8301                 {
8302                   if (i.index_reg->reg_num == RegIZ)
8303                     i.sib.index = NO_INDEX_REGISTER;
8304                   else
8305                     i.sib.index = i.index_reg->reg_num;
8306                   set_rex_vrex (i.index_reg, REX_X, false);
8307                 }
8308             }
8309
8310           default_seg = reg_ds;
8311
8312           if (i.base_reg == 0)
8313             {
8314               i.rm.mode = 0;
8315               if (!i.disp_operands)
8316                 fake_zero_displacement = 1;
8317               if (i.index_reg == 0)
8318                 {
8319                   /* Both check for VSIB and mandatory non-vector SIB. */
8320                   gas_assert (!i.tm.opcode_modifier.sib
8321                               || i.tm.opcode_modifier.sib == SIBMEM);
8322                   /* Operand is just <disp>  */
8323                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8324                   if (flag_code == CODE_64BIT)
8325                     {
8326                       /* 64bit mode overwrites the 32bit absolute
8327                          addressing by RIP relative addressing and
8328                          absolute addressing is encoded by one of the
8329                          redundant SIB forms.  */
8330                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8331                       i.sib.base = NO_BASE_REGISTER;
8332                       i.sib.index = NO_INDEX_REGISTER;
8333                       i.types[op].bitfield.disp32 = 1;
8334                     }
8335                   else if ((flag_code == CODE_16BIT)
8336                            ^ (i.prefix[ADDR_PREFIX] != 0))
8337                     {
8338                       i.rm.regmem = NO_BASE_REGISTER_16;
8339                       i.types[op].bitfield.disp16 = 1;
8340                     }
8341                   else
8342                     {
8343                       i.rm.regmem = NO_BASE_REGISTER;
8344                       i.types[op].bitfield.disp32 = 1;
8345                     }
8346                 }
8347               else if (!i.tm.opcode_modifier.sib)
8348                 {
8349                   /* !i.base_reg && i.index_reg  */
8350                   if (i.index_reg->reg_num == RegIZ)
8351                     i.sib.index = NO_INDEX_REGISTER;
8352                   else
8353                     i.sib.index = i.index_reg->reg_num;
8354                   i.sib.base = NO_BASE_REGISTER;
8355                   i.sib.scale = i.log2_scale_factor;
8356                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8357                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8358                   i.types[op].bitfield.disp32 = 1;
8359                   if ((i.index_reg->reg_flags & RegRex) != 0)
8360                     i.rex |= REX_X;
8361                 }
8362             }
8363           /* RIP addressing for 64bit mode.  */
8364           else if (i.base_reg->reg_num == RegIP)
8365             {
8366               gas_assert (!i.tm.opcode_modifier.sib);
8367               i.rm.regmem = NO_BASE_REGISTER;
8368               i.types[op].bitfield.disp8 = 0;
8369               i.types[op].bitfield.disp16 = 0;
8370               i.types[op].bitfield.disp32 = 1;
8371               i.types[op].bitfield.disp64 = 0;
8372               i.flags[op] |= Operand_PCrel;
8373               if (! i.disp_operands)
8374                 fake_zero_displacement = 1;
8375             }
8376           else if (i.base_reg->reg_type.bitfield.word)
8377             {
8378               gas_assert (!i.tm.opcode_modifier.sib);
8379               switch (i.base_reg->reg_num)
8380                 {
8381                 case 3: /* (%bx)  */
8382                   if (i.index_reg == 0)
8383                     i.rm.regmem = 7;
8384                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
8385                     i.rm.regmem = i.index_reg->reg_num - 6;
8386                   break;
8387                 case 5: /* (%bp)  */
8388                   default_seg = reg_ss;
8389                   if (i.index_reg == 0)
8390                     {
8391                       i.rm.regmem = 6;
8392                       if (operand_type_check (i.types[op], disp) == 0)
8393                         {
8394                           /* fake (%bp) into 0(%bp)  */
8395                           if (i.disp_encoding == disp_encoding_16bit)
8396                             i.types[op].bitfield.disp16 = 1;
8397                           else
8398                             i.types[op].bitfield.disp8 = 1;
8399                           fake_zero_displacement = 1;
8400                         }
8401                     }
8402                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
8403                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
8404                   break;
8405                 default: /* (%si) -> 4 or (%di) -> 5  */
8406                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
8407                 }
8408               if (!fake_zero_displacement
8409                   && !i.disp_operands
8410                   && i.disp_encoding)
8411                 {
8412                   fake_zero_displacement = 1;
8413                   if (i.disp_encoding == disp_encoding_8bit)
8414                     i.types[op].bitfield.disp8 = 1;
8415                   else
8416                     i.types[op].bitfield.disp16 = 1;
8417                 }
8418               i.rm.mode = mode_from_disp_size (i.types[op]);
8419             }
8420           else /* i.base_reg and 32/64 bit mode  */
8421             {
8422               if (operand_type_check (i.types[op], disp))
8423                 {
8424                   i.types[op].bitfield.disp16 = 0;
8425                   i.types[op].bitfield.disp64 = 0;
8426                   i.types[op].bitfield.disp32 = 1;
8427                 }
8428
8429               if (!i.tm.opcode_modifier.sib)
8430                 i.rm.regmem = i.base_reg->reg_num;
8431               if ((i.base_reg->reg_flags & RegRex) != 0)
8432                 i.rex |= REX_B;
8433               i.sib.base = i.base_reg->reg_num;
8434               /* x86-64 ignores REX prefix bit here to avoid decoder
8435                  complications.  */
8436               if (!(i.base_reg->reg_flags & RegRex)
8437                   && (i.base_reg->reg_num == EBP_REG_NUM
8438                    || i.base_reg->reg_num == ESP_REG_NUM))
8439                   default_seg = reg_ss;
8440               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
8441                 {
8442                   fake_zero_displacement = 1;
8443                   if (i.disp_encoding == disp_encoding_32bit)
8444                     i.types[op].bitfield.disp32 = 1;
8445                   else
8446                     i.types[op].bitfield.disp8 = 1;
8447                 }
8448               i.sib.scale = i.log2_scale_factor;
8449               if (i.index_reg == 0)
8450                 {
8451                   /* Only check for VSIB. */
8452                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
8453                               && i.tm.opcode_modifier.sib != VECSIB256
8454                               && i.tm.opcode_modifier.sib != VECSIB512);
8455
8456                   /* <disp>(%esp) becomes two byte modrm with no index
8457                      register.  We've already stored the code for esp
8458                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
8459                      Any base register besides %esp will not use the
8460                      extra modrm byte.  */
8461                   i.sib.index = NO_INDEX_REGISTER;
8462                 }
8463               else if (!i.tm.opcode_modifier.sib)
8464                 {
8465                   if (i.index_reg->reg_num == RegIZ)
8466                     i.sib.index = NO_INDEX_REGISTER;
8467                   else
8468                     i.sib.index = i.index_reg->reg_num;
8469                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8470                   if ((i.index_reg->reg_flags & RegRex) != 0)
8471                     i.rex |= REX_X;
8472                 }
8473
8474               if (i.disp_operands
8475                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
8476                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
8477                 i.rm.mode = 0;
8478               else
8479                 {
8480                   if (!fake_zero_displacement
8481                       && !i.disp_operands
8482                       && i.disp_encoding)
8483                     {
8484                       fake_zero_displacement = 1;
8485                       if (i.disp_encoding == disp_encoding_8bit)
8486                         i.types[op].bitfield.disp8 = 1;
8487                       else
8488                         i.types[op].bitfield.disp32 = 1;
8489                     }
8490                   i.rm.mode = mode_from_disp_size (i.types[op]);
8491                 }
8492             }
8493
8494           if (fake_zero_displacement)
8495             {
8496               /* Fakes a zero displacement assuming that i.types[op]
8497                  holds the correct displacement size.  */
8498               expressionS *exp;
8499
8500               gas_assert (i.op[op].disps == 0);
8501               exp = &disp_expressions[i.disp_operands++];
8502               i.op[op].disps = exp;
8503               exp->X_op = O_constant;
8504               exp->X_add_number = 0;
8505               exp->X_add_symbol = (symbolS *) 0;
8506               exp->X_op_symbol = (symbolS *) 0;
8507             }
8508
8509           mem = op;
8510         }
8511       else
8512         mem = ~0;
8513
8514       if (i.tm.opcode_modifier.vexsources == XOP2SOURCES)
8515         {
8516           if (operand_type_check (i.types[0], imm))
8517             i.vex.register_specifier = NULL;
8518           else
8519             {
8520               /* VEX.vvvv encodes one of the sources when the first
8521                  operand is not an immediate.  */
8522               if (i.tm.opcode_modifier.vexw == VEXW0)
8523                 i.vex.register_specifier = i.op[0].regs;
8524               else
8525                 i.vex.register_specifier = i.op[1].regs;
8526             }
8527
8528           /* Destination is a XMM register encoded in the ModRM.reg
8529              and VEX.R bit.  */
8530           i.rm.reg = i.op[2].regs->reg_num;
8531           if ((i.op[2].regs->reg_flags & RegRex) != 0)
8532             i.rex |= REX_R;
8533
8534           /* ModRM.rm and VEX.B encodes the other source.  */
8535           if (!i.mem_operands)
8536             {
8537               i.rm.mode = 3;
8538
8539               if (i.tm.opcode_modifier.vexw == VEXW0)
8540                 i.rm.regmem = i.op[1].regs->reg_num;
8541               else
8542                 i.rm.regmem = i.op[0].regs->reg_num;
8543
8544               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8545                 i.rex |= REX_B;
8546             }
8547         }
8548       else if (i.tm.opcode_modifier.vexvvvv == VEXLWP)
8549         {
8550           i.vex.register_specifier = i.op[2].regs;
8551           if (!i.mem_operands)
8552             {
8553               i.rm.mode = 3;
8554               i.rm.regmem = i.op[1].regs->reg_num;
8555               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8556                 i.rex |= REX_B;
8557             }
8558         }
8559       /* Fill in i.rm.reg or i.rm.regmem field with register operand
8560          (if any) based on i.tm.extension_opcode.  Again, we must be
8561          careful to make sure that segment/control/debug/test/MMX
8562          registers are coded into the i.rm.reg field.  */
8563       else if (i.reg_operands)
8564         {
8565           unsigned int op;
8566           unsigned int vex_reg = ~0;
8567
8568           for (op = 0; op < i.operands; op++)
8569             if (i.types[op].bitfield.class == Reg
8570                 || i.types[op].bitfield.class == RegBND
8571                 || i.types[op].bitfield.class == RegMask
8572                 || i.types[op].bitfield.class == SReg
8573                 || i.types[op].bitfield.class == RegCR
8574                 || i.types[op].bitfield.class == RegDR
8575                 || i.types[op].bitfield.class == RegTR
8576                 || i.types[op].bitfield.class == RegSIMD
8577                 || i.types[op].bitfield.class == RegMMX)
8578               break;
8579
8580           if (vex_3_sources)
8581             op = dest;
8582           else if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8583             {
8584               /* For instructions with VexNDS, the register-only
8585                  source operand is encoded in VEX prefix. */
8586               gas_assert (mem != (unsigned int) ~0);
8587
8588               if (op > mem || i.tm.cpu_flags.bitfield.cpucmpccxadd)
8589                 {
8590                   vex_reg = op++;
8591                   gas_assert (op < i.operands);
8592                 }
8593               else
8594                 {
8595                   /* Check register-only source operand when two source
8596                      operands are swapped.  */
8597                   if (!i.tm.operand_types[op].bitfield.baseindex
8598                       && i.tm.operand_types[op + 1].bitfield.baseindex)
8599                     {
8600                       vex_reg = op;
8601                       op += 2;
8602                       gas_assert (mem == (vex_reg + 1)
8603                                   && op < i.operands);
8604                     }
8605                   else
8606                     {
8607                       vex_reg = op + 1;
8608                       gas_assert (vex_reg < i.operands);
8609                     }
8610                 }
8611             }
8612           else if (i.tm.opcode_modifier.vexvvvv == VEXNDD)
8613             {
8614               /* For instructions with VexNDD, the register destination
8615                  is encoded in VEX prefix.  */
8616               if (i.mem_operands == 0)
8617                 {
8618                   /* There is no memory operand.  */
8619                   gas_assert ((op + 2) == i.operands);
8620                   vex_reg = op + 1;
8621                 }
8622               else
8623                 {
8624                   /* There are only 2 non-immediate operands.  */
8625                   gas_assert (op < i.imm_operands + 2
8626                               && i.operands == i.imm_operands + 2);
8627                   vex_reg = i.imm_operands + 1;
8628                 }
8629             }
8630           else
8631             gas_assert (op < i.operands);
8632
8633           if (vex_reg != (unsigned int) ~0)
8634             {
8635               i386_operand_type *type = &i.tm.operand_types[vex_reg];
8636
8637               if ((type->bitfield.class != Reg
8638                    || (!type->bitfield.dword && !type->bitfield.qword))
8639                   && type->bitfield.class != RegSIMD
8640                   && type->bitfield.class != RegMask)
8641                 abort ();
8642
8643               i.vex.register_specifier = i.op[vex_reg].regs;
8644             }
8645
8646           /* Don't set OP operand twice.  */
8647           if (vex_reg != op)
8648             {
8649               /* If there is an extension opcode to put here, the
8650                  register number must be put into the regmem field.  */
8651               if (i.tm.extension_opcode != None)
8652                 {
8653                   i.rm.regmem = i.op[op].regs->reg_num;
8654                   set_rex_vrex (i.op[op].regs, REX_B,
8655                                 i.tm.opcode_modifier.sse2avx);
8656                 }
8657               else
8658                 {
8659                   i.rm.reg = i.op[op].regs->reg_num;
8660                   set_rex_vrex (i.op[op].regs, REX_R,
8661                                 i.tm.opcode_modifier.sse2avx);
8662                 }
8663             }
8664
8665           /* Now, if no memory operand has set i.rm.mode = 0, 1, 2 we
8666              must set it to 3 to indicate this is a register operand
8667              in the regmem field.  */
8668           if (!i.mem_operands)
8669             i.rm.mode = 3;
8670         }
8671
8672       /* Fill in i.rm.reg field with extension opcode (if any).  */
8673       if (i.tm.extension_opcode != None)
8674         i.rm.reg = i.tm.extension_opcode;
8675     }
8676   return default_seg;
8677 }
8678
8679 static INLINE void
8680 frag_opcode_byte (unsigned char byte)
8681 {
8682   if (now_seg != absolute_section)
8683     FRAG_APPEND_1_CHAR (byte);
8684   else
8685     ++abs_section_offset;
8686 }
8687
8688 static unsigned int
8689 flip_code16 (unsigned int code16)
8690 {
8691   gas_assert (i.tm.operands == 1);
8692
8693   return !(i.prefix[REX_PREFIX] & REX_W)
8694          && (code16 ? i.tm.operand_types[0].bitfield.disp32
8695                     : i.tm.operand_types[0].bitfield.disp16)
8696          ? CODE16 : 0;
8697 }
8698
8699 static void
8700 output_branch (void)
8701 {
8702   char *p;
8703   int size;
8704   int code16;
8705   int prefix;
8706   relax_substateT subtype;
8707   symbolS *sym;
8708   offsetT off;
8709
8710   if (now_seg == absolute_section)
8711     {
8712       as_bad (_("relaxable branches not supported in absolute section"));
8713       return;
8714     }
8715
8716   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
8717   size = i.disp_encoding > disp_encoding_8bit ? BIG : SMALL;
8718
8719   prefix = 0;
8720   if (i.prefix[DATA_PREFIX] != 0)
8721     {
8722       prefix = 1;
8723       i.prefixes -= 1;
8724       code16 ^= flip_code16(code16);
8725     }
8726   /* Pentium4 branch hints.  */
8727   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8728       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8729     {
8730       prefix++;
8731       i.prefixes--;
8732     }
8733   if (i.prefix[REX_PREFIX] != 0)
8734     {
8735       prefix++;
8736       i.prefixes--;
8737     }
8738
8739   /* BND prefixed jump.  */
8740   if (i.prefix[BND_PREFIX] != 0)
8741     {
8742       prefix++;
8743       i.prefixes--;
8744     }
8745
8746   if (i.prefixes != 0)
8747     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8748
8749   /* It's always a symbol;  End frag & setup for relax.
8750      Make sure there is enough room in this frag for the largest
8751      instruction we may generate in md_convert_frag.  This is 2
8752      bytes for the opcode and room for the prefix and largest
8753      displacement.  */
8754   frag_grow (prefix + 2 + 4);
8755   /* Prefix and 1 opcode byte go in fr_fix.  */
8756   p = frag_more (prefix + 1);
8757   if (i.prefix[DATA_PREFIX] != 0)
8758     *p++ = DATA_PREFIX_OPCODE;
8759   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
8760       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
8761     *p++ = i.prefix[SEG_PREFIX];
8762   if (i.prefix[BND_PREFIX] != 0)
8763     *p++ = BND_PREFIX_OPCODE;
8764   if (i.prefix[REX_PREFIX] != 0)
8765     *p++ = i.prefix[REX_PREFIX];
8766   *p = i.tm.base_opcode;
8767
8768   if ((unsigned char) *p == JUMP_PC_RELATIVE)
8769     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
8770   else if (cpu_arch_flags.bitfield.cpui386)
8771     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
8772   else
8773     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
8774   subtype |= code16;
8775
8776   sym = i.op[0].disps->X_add_symbol;
8777   off = i.op[0].disps->X_add_number;
8778
8779   if (i.op[0].disps->X_op != O_constant
8780       && i.op[0].disps->X_op != O_symbol)
8781     {
8782       /* Handle complex expressions.  */
8783       sym = make_expr_symbol (i.op[0].disps);
8784       off = 0;
8785     }
8786
8787   frag_now->tc_frag_data.code64 = flag_code == CODE_64BIT;
8788
8789   /* 1 possible extra opcode + 4 byte displacement go in var part.
8790      Pass reloc in fr_var.  */
8791   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
8792 }
8793
8794 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8795 /* Return TRUE iff PLT32 relocation should be used for branching to
8796    symbol S.  */
8797
8798 static bool
8799 need_plt32_p (symbolS *s)
8800 {
8801   /* PLT32 relocation is ELF only.  */
8802   if (!IS_ELF)
8803     return false;
8804
8805 #ifdef TE_SOLARIS
8806   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
8807      krtld support it.  */
8808   return false;
8809 #endif
8810
8811   /* Since there is no need to prepare for PLT branch on x86-64, we
8812      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
8813      be used as a marker for 32-bit PC-relative branches.  */
8814   if (!object_64bit)
8815     return false;
8816
8817   if (s == NULL)
8818     return false;
8819
8820   /* Weak or undefined symbol need PLT32 relocation.  */
8821   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
8822     return true;
8823
8824   /* Non-global symbol doesn't need PLT32 relocation.  */
8825   if (! S_IS_EXTERNAL (s))
8826     return false;
8827
8828   /* Other global symbols need PLT32 relocation.  NB: Symbol with
8829      non-default visibilities are treated as normal global symbol
8830      so that PLT32 relocation can be used as a marker for 32-bit
8831      PC-relative branches.  It is useful for linker relaxation.  */
8832   return true;
8833 }
8834 #endif
8835
8836 static void
8837 output_jump (void)
8838 {
8839   char *p;
8840   int size;
8841   fixS *fixP;
8842   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
8843
8844   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
8845     {
8846       /* This is a loop or jecxz type instruction.  */
8847       size = 1;
8848       if (i.prefix[ADDR_PREFIX] != 0)
8849         {
8850           frag_opcode_byte (ADDR_PREFIX_OPCODE);
8851           i.prefixes -= 1;
8852         }
8853       /* Pentium4 branch hints.  */
8854       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8855           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8856         {
8857           frag_opcode_byte (i.prefix[SEG_PREFIX]);
8858           i.prefixes--;
8859         }
8860     }
8861   else
8862     {
8863       int code16;
8864
8865       code16 = 0;
8866       if (flag_code == CODE_16BIT)
8867         code16 = CODE16;
8868
8869       if (i.prefix[DATA_PREFIX] != 0)
8870         {
8871           frag_opcode_byte (DATA_PREFIX_OPCODE);
8872           i.prefixes -= 1;
8873           code16 ^= flip_code16(code16);
8874         }
8875
8876       size = 4;
8877       if (code16)
8878         size = 2;
8879     }
8880
8881   /* BND prefixed jump.  */
8882   if (i.prefix[BND_PREFIX] != 0)
8883     {
8884       frag_opcode_byte (i.prefix[BND_PREFIX]);
8885       i.prefixes -= 1;
8886     }
8887
8888   if (i.prefix[REX_PREFIX] != 0)
8889     {
8890       frag_opcode_byte (i.prefix[REX_PREFIX]);
8891       i.prefixes -= 1;
8892     }
8893
8894   if (i.prefixes != 0)
8895     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8896
8897   if (now_seg == absolute_section)
8898     {
8899       abs_section_offset += i.opcode_length + size;
8900       return;
8901     }
8902
8903   p = frag_more (i.opcode_length + size);
8904   switch (i.opcode_length)
8905     {
8906     case 2:
8907       *p++ = i.tm.base_opcode >> 8;
8908       /* Fall through.  */
8909     case 1:
8910       *p++ = i.tm.base_opcode;
8911       break;
8912     default:
8913       abort ();
8914     }
8915
8916 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8917   if (flag_code == CODE_64BIT && size == 4
8918       && jump_reloc == NO_RELOC && i.op[0].disps->X_add_number == 0
8919       && need_plt32_p (i.op[0].disps->X_add_symbol))
8920     jump_reloc = BFD_RELOC_X86_64_PLT32;
8921 #endif
8922
8923   jump_reloc = reloc (size, 1, 1, jump_reloc);
8924
8925   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
8926                       i.op[0].disps, 1, jump_reloc);
8927
8928   /* All jumps handled here are signed, but don't unconditionally use a
8929      signed limit check for 32 and 16 bit jumps as we want to allow wrap
8930      around at 4G (outside of 64-bit mode) and 64k (except for XBEGIN)
8931      respectively.  */
8932   switch (size)
8933     {
8934     case 1:
8935       fixP->fx_signed = 1;
8936       break;
8937
8938     case 2:
8939       if (i.tm.base_opcode == 0xc7f8)
8940         fixP->fx_signed = 1;
8941       break;
8942
8943     case 4:
8944       if (flag_code == CODE_64BIT)
8945         fixP->fx_signed = 1;
8946       break;
8947     }
8948 }
8949
8950 static void
8951 output_interseg_jump (void)
8952 {
8953   char *p;
8954   int size;
8955   int prefix;
8956   int code16;
8957
8958   code16 = 0;
8959   if (flag_code == CODE_16BIT)
8960     code16 = CODE16;
8961
8962   prefix = 0;
8963   if (i.prefix[DATA_PREFIX] != 0)
8964     {
8965       prefix = 1;
8966       i.prefixes -= 1;
8967       code16 ^= CODE16;
8968     }
8969
8970   gas_assert (!i.prefix[REX_PREFIX]);
8971
8972   size = 4;
8973   if (code16)
8974     size = 2;
8975
8976   if (i.prefixes != 0)
8977     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8978
8979   if (now_seg == absolute_section)
8980     {
8981       abs_section_offset += prefix + 1 + 2 + size;
8982       return;
8983     }
8984
8985   /* 1 opcode; 2 segment; offset  */
8986   p = frag_more (prefix + 1 + 2 + size);
8987
8988   if (i.prefix[DATA_PREFIX] != 0)
8989     *p++ = DATA_PREFIX_OPCODE;
8990
8991   if (i.prefix[REX_PREFIX] != 0)
8992     *p++ = i.prefix[REX_PREFIX];
8993
8994   *p++ = i.tm.base_opcode;
8995   if (i.op[1].imms->X_op == O_constant)
8996     {
8997       offsetT n = i.op[1].imms->X_add_number;
8998
8999       if (size == 2
9000           && !fits_in_unsigned_word (n)
9001           && !fits_in_signed_word (n))
9002         {
9003           as_bad (_("16-bit jump out of range"));
9004           return;
9005         }
9006       md_number_to_chars (p, n, size);
9007     }
9008   else
9009     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9010                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
9011
9012   p += size;
9013   if (i.op[0].imms->X_op == O_constant)
9014     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
9015   else
9016     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
9017                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
9018 }
9019
9020 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9021 void
9022 x86_cleanup (void)
9023 {
9024   char *p;
9025   asection *seg = now_seg;
9026   subsegT subseg = now_subseg;
9027   asection *sec;
9028   unsigned int alignment, align_size_1;
9029   unsigned int isa_1_descsz, feature_2_descsz, descsz;
9030   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
9031   unsigned int padding;
9032
9033   if (!IS_ELF || !x86_used_note)
9034     return;
9035
9036   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
9037
9038   /* The .note.gnu.property section layout:
9039
9040      Field      Length          Contents
9041      ----       ----            ----
9042      n_namsz    4               4
9043      n_descsz   4               The note descriptor size
9044      n_type     4               NT_GNU_PROPERTY_TYPE_0
9045      n_name     4               "GNU"
9046      n_desc     n_descsz        The program property array
9047      ....       ....            ....
9048    */
9049
9050   /* Create the .note.gnu.property section.  */
9051   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
9052   bfd_set_section_flags (sec,
9053                          (SEC_ALLOC
9054                           | SEC_LOAD
9055                           | SEC_DATA
9056                           | SEC_HAS_CONTENTS
9057                           | SEC_READONLY));
9058
9059   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
9060     {
9061       align_size_1 = 7;
9062       alignment = 3;
9063     }
9064   else
9065     {
9066       align_size_1 = 3;
9067       alignment = 2;
9068     }
9069
9070   bfd_set_section_alignment (sec, alignment);
9071   elf_section_type (sec) = SHT_NOTE;
9072
9073   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
9074                                   + 4-byte data  */
9075   isa_1_descsz_raw = 4 + 4 + 4;
9076   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
9077   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
9078
9079   feature_2_descsz_raw = isa_1_descsz;
9080   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
9081                                       + 4-byte data  */
9082   feature_2_descsz_raw += 4 + 4 + 4;
9083   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
9084   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
9085                       & ~align_size_1);
9086
9087   descsz = feature_2_descsz;
9088   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
9089   p = frag_more (4 + 4 + 4 + 4 + descsz);
9090
9091   /* Write n_namsz.  */
9092   md_number_to_chars (p, (valueT) 4, 4);
9093
9094   /* Write n_descsz.  */
9095   md_number_to_chars (p + 4, (valueT) descsz, 4);
9096
9097   /* Write n_type.  */
9098   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
9099
9100   /* Write n_name.  */
9101   memcpy (p + 4 * 3, "GNU", 4);
9102
9103   /* Write 4-byte type.  */
9104   md_number_to_chars (p + 4 * 4,
9105                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
9106
9107   /* Write 4-byte data size.  */
9108   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
9109
9110   /* Write 4-byte data.  */
9111   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
9112
9113   /* Zero out paddings.  */
9114   padding = isa_1_descsz - isa_1_descsz_raw;
9115   if (padding)
9116     memset (p + 4 * 7, 0, padding);
9117
9118   /* Write 4-byte type.  */
9119   md_number_to_chars (p + isa_1_descsz + 4 * 4,
9120                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
9121
9122   /* Write 4-byte data size.  */
9123   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
9124
9125   /* Write 4-byte data.  */
9126   md_number_to_chars (p + isa_1_descsz + 4 * 6,
9127                       (valueT) x86_feature_2_used, 4);
9128
9129   /* Zero out paddings.  */
9130   padding = feature_2_descsz - feature_2_descsz_raw;
9131   if (padding)
9132     memset (p + isa_1_descsz + 4 * 7, 0, padding);
9133
9134   /* We probably can't restore the current segment, for there likely
9135      isn't one yet...  */
9136   if (seg && subseg)
9137     subseg_set (seg, subseg);
9138 }
9139
9140 bool
9141 x86_support_sframe_p (void)
9142 {
9143   /* At this time, SFrame unwind is supported for AMD64 ABI only.  */
9144   return (x86_elf_abi == X86_64_ABI);
9145 }
9146
9147 bool
9148 x86_sframe_ra_tracking_p (void)
9149 {
9150   /* In AMD64, return address is always stored on the stack at a fixed offset
9151      from the CFA (provided via x86_sframe_cfa_ra_offset ()).
9152      Do not track explicitly via an SFrame Frame Row Entry.  */
9153   return false;
9154 }
9155
9156 offsetT
9157 x86_sframe_cfa_ra_offset (void)
9158 {
9159   gas_assert (x86_elf_abi == X86_64_ABI);
9160   return (offsetT) -8;
9161 }
9162
9163 unsigned char
9164 x86_sframe_get_abi_arch (void)
9165 {
9166   unsigned char sframe_abi_arch = 0;
9167
9168   if (x86_support_sframe_p ())
9169     {
9170       gas_assert (!target_big_endian);
9171       sframe_abi_arch = SFRAME_ABI_AMD64_ENDIAN_LITTLE;
9172     }
9173
9174   return sframe_abi_arch;
9175 }
9176
9177 #endif
9178
9179 static unsigned int
9180 encoding_length (const fragS *start_frag, offsetT start_off,
9181                  const char *frag_now_ptr)
9182 {
9183   unsigned int len = 0;
9184
9185   if (start_frag != frag_now)
9186     {
9187       const fragS *fr = start_frag;
9188
9189       do {
9190         len += fr->fr_fix;
9191         fr = fr->fr_next;
9192       } while (fr && fr != frag_now);
9193     }
9194
9195   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
9196 }
9197
9198 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
9199    be macro-fused with conditional jumps.
9200    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
9201    or is one of the following format:
9202
9203     cmp m, imm
9204     add m, imm
9205     sub m, imm
9206    test m, imm
9207     and m, imm
9208     inc m
9209     dec m
9210
9211    it is unfusible.  */
9212
9213 static int
9214 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
9215 {
9216   /* No RIP address.  */
9217   if (i.base_reg && i.base_reg->reg_num == RegIP)
9218     return 0;
9219
9220   /* No opcodes outside of base encoding space.  */
9221   if (i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9222     return 0;
9223
9224   /* add, sub without add/sub m, imm.  */
9225   if (i.tm.base_opcode <= 5
9226       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
9227       || ((i.tm.base_opcode | 3) == 0x83
9228           && (i.tm.extension_opcode == 0x5
9229               || i.tm.extension_opcode == 0x0)))
9230     {
9231       *mf_cmp_p = mf_cmp_alu_cmp;
9232       return !(i.mem_operands && i.imm_operands);
9233     }
9234
9235   /* and without and m, imm.  */
9236   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
9237       || ((i.tm.base_opcode | 3) == 0x83
9238           && i.tm.extension_opcode == 0x4))
9239     {
9240       *mf_cmp_p = mf_cmp_test_and;
9241       return !(i.mem_operands && i.imm_operands);
9242     }
9243
9244   /* test without test m imm.  */
9245   if ((i.tm.base_opcode | 1) == 0x85
9246       || (i.tm.base_opcode | 1) == 0xa9
9247       || ((i.tm.base_opcode | 1) == 0xf7
9248           && i.tm.extension_opcode == 0))
9249     {
9250       *mf_cmp_p = mf_cmp_test_and;
9251       return !(i.mem_operands && i.imm_operands);
9252     }
9253
9254   /* cmp without cmp m, imm.  */
9255   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
9256       || ((i.tm.base_opcode | 3) == 0x83
9257           && (i.tm.extension_opcode == 0x7)))
9258     {
9259       *mf_cmp_p = mf_cmp_alu_cmp;
9260       return !(i.mem_operands && i.imm_operands);
9261     }
9262
9263   /* inc, dec without inc/dec m.   */
9264   if ((i.tm.cpu_flags.bitfield.cpuno64
9265        && (i.tm.base_opcode | 0xf) == 0x4f)
9266       || ((i.tm.base_opcode | 1) == 0xff
9267           && i.tm.extension_opcode <= 0x1))
9268     {
9269       *mf_cmp_p = mf_cmp_incdec;
9270       return !i.mem_operands;
9271     }
9272
9273   return 0;
9274 }
9275
9276 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
9277
9278 static int
9279 add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
9280 {
9281   /* NB: Don't work with COND_JUMP86 without i386.  */
9282   if (!align_branch_power
9283       || now_seg == absolute_section
9284       || !cpu_arch_flags.bitfield.cpui386
9285       || !(align_branch & align_branch_fused_bit))
9286     return 0;
9287
9288   if (maybe_fused_with_jcc_p (mf_cmp_p))
9289     {
9290       if (last_insn.kind == last_insn_other
9291           || last_insn.seg != now_seg)
9292         return 1;
9293       if (flag_debug)
9294         as_warn_where (last_insn.file, last_insn.line,
9295                        _("`%s` skips -malign-branch-boundary on `%s`"),
9296                        last_insn.name, i.tm.name);
9297     }
9298
9299   return 0;
9300 }
9301
9302 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
9303
9304 static int
9305 add_branch_prefix_frag_p (void)
9306 {
9307   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
9308      to PadLock instructions since they include prefixes in opcode.  */
9309   if (!align_branch_power
9310       || !align_branch_prefix_size
9311       || now_seg == absolute_section
9312       || i.tm.cpu_flags.bitfield.cpupadlock
9313       || !cpu_arch_flags.bitfield.cpui386)
9314     return 0;
9315
9316   /* Don't add prefix if it is a prefix or there is no operand in case
9317      that segment prefix is special.  */
9318   if (!i.operands || i.tm.opcode_modifier.isprefix)
9319     return 0;
9320
9321   if (last_insn.kind == last_insn_other
9322       || last_insn.seg != now_seg)
9323     return 1;
9324
9325   if (flag_debug)
9326     as_warn_where (last_insn.file, last_insn.line,
9327                    _("`%s` skips -malign-branch-boundary on `%s`"),
9328                    last_insn.name, i.tm.name);
9329
9330   return 0;
9331 }
9332
9333 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
9334
9335 static int
9336 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
9337                            enum mf_jcc_kind *mf_jcc_p)
9338 {
9339   int add_padding;
9340
9341   /* NB: Don't work with COND_JUMP86 without i386.  */
9342   if (!align_branch_power
9343       || now_seg == absolute_section
9344       || !cpu_arch_flags.bitfield.cpui386
9345       || i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9346     return 0;
9347
9348   add_padding = 0;
9349
9350   /* Check for jcc and direct jmp.  */
9351   if (i.tm.opcode_modifier.jump == JUMP)
9352     {
9353       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
9354         {
9355           *branch_p = align_branch_jmp;
9356           add_padding = align_branch & align_branch_jmp_bit;
9357         }
9358       else
9359         {
9360           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
9361              igore the lowest bit.  */
9362           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
9363           *branch_p = align_branch_jcc;
9364           if ((align_branch & align_branch_jcc_bit))
9365             add_padding = 1;
9366         }
9367     }
9368   else if ((i.tm.base_opcode | 1) == 0xc3)
9369     {
9370       /* Near ret.  */
9371       *branch_p = align_branch_ret;
9372       if ((align_branch & align_branch_ret_bit))
9373         add_padding = 1;
9374     }
9375   else
9376     {
9377       /* Check for indirect jmp, direct and indirect calls.  */
9378       if (i.tm.base_opcode == 0xe8)
9379         {
9380           /* Direct call.  */
9381           *branch_p = align_branch_call;
9382           if ((align_branch & align_branch_call_bit))
9383             add_padding = 1;
9384         }
9385       else if (i.tm.base_opcode == 0xff
9386                && (i.tm.extension_opcode == 2
9387                    || i.tm.extension_opcode == 4))
9388         {
9389           /* Indirect call and jmp.  */
9390           *branch_p = align_branch_indirect;
9391           if ((align_branch & align_branch_indirect_bit))
9392             add_padding = 1;
9393         }
9394
9395       if (add_padding
9396           && i.disp_operands
9397           && tls_get_addr
9398           && (i.op[0].disps->X_op == O_symbol
9399               || (i.op[0].disps->X_op == O_subtract
9400                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
9401         {
9402           symbolS *s = i.op[0].disps->X_add_symbol;
9403           /* No padding to call to global or undefined tls_get_addr.  */
9404           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
9405               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
9406             return 0;
9407         }
9408     }
9409
9410   if (add_padding
9411       && last_insn.kind != last_insn_other
9412       && last_insn.seg == now_seg)
9413     {
9414       if (flag_debug)
9415         as_warn_where (last_insn.file, last_insn.line,
9416                        _("`%s` skips -malign-branch-boundary on `%s`"),
9417                        last_insn.name, i.tm.name);
9418       return 0;
9419     }
9420
9421   return add_padding;
9422 }
9423
9424 static void
9425 output_insn (void)
9426 {
9427   fragS *insn_start_frag;
9428   offsetT insn_start_off;
9429   fragS *fragP = NULL;
9430   enum align_branch_kind branch = align_branch_none;
9431   /* The initializer is arbitrary just to avoid uninitialized error.
9432      it's actually either assigned in add_branch_padding_frag_p
9433      or never be used.  */
9434   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
9435
9436 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9437   if (IS_ELF && x86_used_note && now_seg != absolute_section)
9438     {
9439       if ((i.xstate & xstate_tmm) == xstate_tmm
9440           || i.tm.cpu_flags.bitfield.cpuamx_tile)
9441         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
9442
9443       if (i.tm.cpu_flags.bitfield.cpu8087
9444           || i.tm.cpu_flags.bitfield.cpu287
9445           || i.tm.cpu_flags.bitfield.cpu387
9446           || i.tm.cpu_flags.bitfield.cpu687
9447           || i.tm.cpu_flags.bitfield.cpufisttp)
9448         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
9449
9450       if ((i.xstate & xstate_mmx)
9451           || (i.tm.opcode_modifier.opcodespace == SPACE_0F
9452               && !is_any_vex_encoding (&i.tm)
9453               && (i.tm.base_opcode == 0x77 /* emms */
9454                   || i.tm.base_opcode == 0x0e /* femms */)))
9455         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
9456
9457       if (i.index_reg)
9458         {
9459           if (i.index_reg->reg_type.bitfield.zmmword)
9460             i.xstate |= xstate_zmm;
9461           else if (i.index_reg->reg_type.bitfield.ymmword)
9462             i.xstate |= xstate_ymm;
9463           else if (i.index_reg->reg_type.bitfield.xmmword)
9464             i.xstate |= xstate_xmm;
9465         }
9466
9467       /* vzeroall / vzeroupper */
9468       if (i.tm.base_opcode == 0x77 && i.tm.cpu_flags.bitfield.cpuavx)
9469         i.xstate |= xstate_ymm;
9470
9471       if ((i.xstate & xstate_xmm)
9472           /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
9473           || (i.tm.base_opcode == 0xae
9474               && (i.tm.cpu_flags.bitfield.cpusse
9475                   || i.tm.cpu_flags.bitfield.cpuavx))
9476           || i.tm.cpu_flags.bitfield.cpuwidekl
9477           || i.tm.cpu_flags.bitfield.cpukl)
9478         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
9479
9480       if ((i.xstate & xstate_ymm) == xstate_ymm)
9481         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
9482       if ((i.xstate & xstate_zmm) == xstate_zmm)
9483         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
9484       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
9485         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
9486       if (i.tm.cpu_flags.bitfield.cpufxsr)
9487         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
9488       if (i.tm.cpu_flags.bitfield.cpuxsave)
9489         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
9490       if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
9491         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
9492       if (i.tm.cpu_flags.bitfield.cpuxsavec)
9493         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
9494
9495       if (x86_feature_2_used
9496           || i.tm.cpu_flags.bitfield.cpucmov
9497           || i.tm.cpu_flags.bitfield.cpusyscall
9498           || (i.tm.opcode_modifier.opcodespace == SPACE_0F
9499               && i.tm.base_opcode == 0xc7
9500               && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
9501               && i.tm.extension_opcode == 1) /* cmpxchg8b */)
9502         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
9503       if (i.tm.cpu_flags.bitfield.cpusse3
9504           || i.tm.cpu_flags.bitfield.cpussse3
9505           || i.tm.cpu_flags.bitfield.cpusse4_1
9506           || i.tm.cpu_flags.bitfield.cpusse4_2
9507           || i.tm.cpu_flags.bitfield.cpucx16
9508           || i.tm.cpu_flags.bitfield.cpupopcnt
9509           /* LAHF-SAHF insns in 64-bit mode.  */
9510           || (flag_code == CODE_64BIT
9511               && (i.tm.base_opcode | 1) == 0x9f
9512               && i.tm.opcode_modifier.opcodespace == SPACE_BASE))
9513         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
9514       if (i.tm.cpu_flags.bitfield.cpuavx
9515           || i.tm.cpu_flags.bitfield.cpuavx2
9516           /* Any VEX encoded insns execpt for AVX512F, AVX512BW, AVX512DQ,
9517              XOP, FMA4, LPW, TBM, and AMX.  */
9518           || (i.tm.opcode_modifier.vex
9519               && !i.tm.cpu_flags.bitfield.cpuavx512f
9520               && !i.tm.cpu_flags.bitfield.cpuavx512bw
9521               && !i.tm.cpu_flags.bitfield.cpuavx512dq
9522               && !i.tm.cpu_flags.bitfield.cpuxop
9523               && !i.tm.cpu_flags.bitfield.cpufma4
9524               && !i.tm.cpu_flags.bitfield.cpulwp
9525               && !i.tm.cpu_flags.bitfield.cputbm
9526               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
9527           || i.tm.cpu_flags.bitfield.cpuf16c
9528           || i.tm.cpu_flags.bitfield.cpufma
9529           || i.tm.cpu_flags.bitfield.cpulzcnt
9530           || i.tm.cpu_flags.bitfield.cpumovbe
9531           || i.tm.cpu_flags.bitfield.cpuxsaves
9532           || (x86_feature_2_used
9533               & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
9534                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
9535                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
9536         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
9537       if (i.tm.cpu_flags.bitfield.cpuavx512f
9538           || i.tm.cpu_flags.bitfield.cpuavx512bw
9539           || i.tm.cpu_flags.bitfield.cpuavx512dq
9540           || i.tm.cpu_flags.bitfield.cpuavx512vl
9541           /* Any EVEX encoded insns except for AVX512ER, AVX512PF,
9542              AVX512-4FMAPS, and AVX512-4VNNIW.  */
9543           || (i.tm.opcode_modifier.evex
9544               && !i.tm.cpu_flags.bitfield.cpuavx512er
9545               && !i.tm.cpu_flags.bitfield.cpuavx512pf
9546               && !i.tm.cpu_flags.bitfield.cpuavx512_4fmaps
9547               && !i.tm.cpu_flags.bitfield.cpuavx512_4vnniw))
9548         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
9549     }
9550 #endif
9551
9552   /* Tie dwarf2 debug info to the address at the start of the insn.
9553      We can't do this after the insn has been output as the current
9554      frag may have been closed off.  eg. by frag_var.  */
9555   dwarf2_emit_insn (0);
9556
9557   insn_start_frag = frag_now;
9558   insn_start_off = frag_now_fix ();
9559
9560   if (add_branch_padding_frag_p (&branch, &mf_jcc))
9561     {
9562       char *p;
9563       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
9564       unsigned int max_branch_padding_size = 14;
9565
9566       /* Align section to boundary.  */
9567       record_alignment (now_seg, align_branch_power);
9568
9569       /* Make room for padding.  */
9570       frag_grow (max_branch_padding_size);
9571
9572       /* Start of the padding.  */
9573       p = frag_more (0);
9574
9575       fragP = frag_now;
9576
9577       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
9578                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
9579                 NULL, 0, p);
9580
9581       fragP->tc_frag_data.mf_type = mf_jcc;
9582       fragP->tc_frag_data.branch_type = branch;
9583       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
9584     }
9585
9586   if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT)
9587       && !pre_386_16bit_warned)
9588     {
9589       as_warn (_("use .code16 to ensure correct addressing mode"));
9590       pre_386_16bit_warned = true;
9591     }
9592
9593   /* Output jumps.  */
9594   if (i.tm.opcode_modifier.jump == JUMP)
9595     output_branch ();
9596   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
9597            || i.tm.opcode_modifier.jump == JUMP_DWORD)
9598     output_jump ();
9599   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
9600     output_interseg_jump ();
9601   else
9602     {
9603       /* Output normal instructions here.  */
9604       char *p;
9605       unsigned char *q;
9606       unsigned int j;
9607       enum mf_cmp_kind mf_cmp;
9608
9609       if (avoid_fence
9610           && (i.tm.base_opcode == 0xaee8
9611               || i.tm.base_opcode == 0xaef0
9612               || i.tm.base_opcode == 0xaef8))
9613         {
9614           /* Encode lfence, mfence, and sfence as
9615              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
9616           if (flag_code == CODE_16BIT)
9617             as_bad (_("Cannot convert `%s' in 16-bit mode"), i.tm.name);
9618           else if (omit_lock_prefix)
9619             as_bad (_("Cannot convert `%s' with `-momit-lock-prefix=yes' in effect"),
9620                     i.tm.name);
9621           else if (now_seg != absolute_section)
9622             {
9623               offsetT val = 0x240483f0ULL;
9624
9625               p = frag_more (5);
9626               md_number_to_chars (p, val, 5);
9627             }
9628           else
9629             abs_section_offset += 5;
9630           return;
9631         }
9632
9633       /* Some processors fail on LOCK prefix. This options makes
9634          assembler ignore LOCK prefix and serves as a workaround.  */
9635       if (omit_lock_prefix)
9636         {
9637           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
9638               && i.tm.opcode_modifier.isprefix)
9639             return;
9640           i.prefix[LOCK_PREFIX] = 0;
9641         }
9642
9643       if (branch)
9644         /* Skip if this is a branch.  */
9645         ;
9646       else if (add_fused_jcc_padding_frag_p (&mf_cmp))
9647         {
9648           /* Make room for padding.  */
9649           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
9650           p = frag_more (0);
9651
9652           fragP = frag_now;
9653
9654           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
9655                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
9656                     NULL, 0, p);
9657
9658           fragP->tc_frag_data.mf_type = mf_cmp;
9659           fragP->tc_frag_data.branch_type = align_branch_fused;
9660           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
9661         }
9662       else if (add_branch_prefix_frag_p ())
9663         {
9664           unsigned int max_prefix_size = align_branch_prefix_size;
9665
9666           /* Make room for padding.  */
9667           frag_grow (max_prefix_size);
9668           p = frag_more (0);
9669
9670           fragP = frag_now;
9671
9672           frag_var (rs_machine_dependent, max_prefix_size, 0,
9673                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
9674                     NULL, 0, p);
9675
9676           fragP->tc_frag_data.max_bytes = max_prefix_size;
9677         }
9678
9679       /* Since the VEX/EVEX prefix contains the implicit prefix, we
9680          don't need the explicit prefix.  */
9681       if (!is_any_vex_encoding (&i.tm))
9682         {
9683           switch (i.tm.opcode_modifier.opcodeprefix)
9684             {
9685             case PREFIX_0X66:
9686               add_prefix (0x66);
9687               break;
9688             case PREFIX_0XF2:
9689               add_prefix (0xf2);
9690               break;
9691             case PREFIX_0XF3:
9692               if (!i.tm.cpu_flags.bitfield.cpupadlock
9693                   || (i.prefix[REP_PREFIX] != 0xf3))
9694                 add_prefix (0xf3);
9695               break;
9696             case PREFIX_NONE:
9697               switch (i.opcode_length)
9698                 {
9699                 case 2:
9700                   break;
9701                 case 1:
9702                   /* Check for pseudo prefixes.  */
9703                   if (!i.tm.opcode_modifier.isprefix || i.tm.base_opcode)
9704                     break;
9705                   as_bad_where (insn_start_frag->fr_file,
9706                                 insn_start_frag->fr_line,
9707                                 _("pseudo prefix without instruction"));
9708                   return;
9709                 default:
9710                   abort ();
9711                 }
9712               break;
9713             default:
9714               abort ();
9715             }
9716
9717 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
9718           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
9719              R_X86_64_GOTTPOFF relocation so that linker can safely
9720              perform IE->LE optimization.  A dummy REX_OPCODE prefix
9721              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
9722              relocation for GDesc -> IE/LE optimization.  */
9723           if (x86_elf_abi == X86_64_X32_ABI
9724               && i.operands == 2
9725               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
9726                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
9727               && i.prefix[REX_PREFIX] == 0)
9728             add_prefix (REX_OPCODE);
9729 #endif
9730
9731           /* The prefix bytes.  */
9732           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
9733             if (*q)
9734               frag_opcode_byte (*q);
9735         }
9736       else
9737         {
9738           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
9739             if (*q)
9740               switch (j)
9741                 {
9742                 case SEG_PREFIX:
9743                 case ADDR_PREFIX:
9744                   frag_opcode_byte (*q);
9745                   break;
9746                 default:
9747                   /* There should be no other prefixes for instructions
9748                      with VEX prefix.  */
9749                   abort ();
9750                 }
9751
9752           /* For EVEX instructions i.vrex should become 0 after
9753              build_evex_prefix.  For VEX instructions upper 16 registers
9754              aren't available, so VREX should be 0.  */
9755           if (i.vrex)
9756             abort ();
9757           /* Now the VEX prefix.  */
9758           if (now_seg != absolute_section)
9759             {
9760               p = frag_more (i.vex.length);
9761               for (j = 0; j < i.vex.length; j++)
9762                 p[j] = i.vex.bytes[j];
9763             }
9764           else
9765             abs_section_offset += i.vex.length;
9766         }
9767
9768       /* Now the opcode; be careful about word order here!  */
9769       j = i.opcode_length;
9770       if (!i.vex.length)
9771         switch (i.tm.opcode_modifier.opcodespace)
9772           {
9773           case SPACE_BASE:
9774             break;
9775           case SPACE_0F:
9776             ++j;
9777             break;
9778           case SPACE_0F38:
9779           case SPACE_0F3A:
9780             j += 2;
9781             break;
9782           default:
9783             abort ();
9784           }
9785
9786       if (now_seg == absolute_section)
9787         abs_section_offset += j;
9788       else if (j == 1)
9789         {
9790           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
9791         }
9792       else
9793         {
9794           p = frag_more (j);
9795           if (!i.vex.length
9796               && i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9797             {
9798               *p++ = 0x0f;
9799               if (i.tm.opcode_modifier.opcodespace != SPACE_0F)
9800                 *p++ = i.tm.opcode_modifier.opcodespace == SPACE_0F38
9801                        ? 0x38 : 0x3a;
9802             }
9803
9804           switch (i.opcode_length)
9805             {
9806             case 2:
9807               /* Put out high byte first: can't use md_number_to_chars!  */
9808               *p++ = (i.tm.base_opcode >> 8) & 0xff;
9809               /* Fall through.  */
9810             case 1:
9811               *p = i.tm.base_opcode & 0xff;
9812               break;
9813             default:
9814               abort ();
9815               break;
9816             }
9817
9818         }
9819
9820       /* Now the modrm byte and sib byte (if present).  */
9821       if (i.tm.opcode_modifier.modrm)
9822         {
9823           frag_opcode_byte ((i.rm.regmem << 0)
9824                              | (i.rm.reg << 3)
9825                              | (i.rm.mode << 6));
9826           /* If i.rm.regmem == ESP (4)
9827              && i.rm.mode != (Register mode)
9828              && not 16 bit
9829              ==> need second modrm byte.  */
9830           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
9831               && i.rm.mode != 3
9832               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
9833             frag_opcode_byte ((i.sib.base << 0)
9834                               | (i.sib.index << 3)
9835                               | (i.sib.scale << 6));
9836         }
9837
9838       if (i.disp_operands)
9839         output_disp (insn_start_frag, insn_start_off);
9840
9841       if (i.imm_operands)
9842         output_imm (insn_start_frag, insn_start_off);
9843
9844       /*
9845        * frag_now_fix () returning plain abs_section_offset when we're in the
9846        * absolute section, and abs_section_offset not getting updated as data
9847        * gets added to the frag breaks the logic below.
9848        */
9849       if (now_seg != absolute_section)
9850         {
9851           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
9852           if (j > 15)
9853             as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
9854                      j);
9855           else if (fragP)
9856             {
9857               /* NB: Don't add prefix with GOTPC relocation since
9858                  output_disp() above depends on the fixed encoding
9859                  length.  Can't add prefix with TLS relocation since
9860                  it breaks TLS linker optimization.  */
9861               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
9862               /* Prefix count on the current instruction.  */
9863               unsigned int count = i.vex.length;
9864               unsigned int k;
9865               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
9866                 /* REX byte is encoded in VEX/EVEX prefix.  */
9867                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
9868                   count++;
9869
9870               /* Count prefixes for extended opcode maps.  */
9871               if (!i.vex.length)
9872                 switch (i.tm.opcode_modifier.opcodespace)
9873                   {
9874                   case SPACE_BASE:
9875                     break;
9876                   case SPACE_0F:
9877                     count++;
9878                     break;
9879                   case SPACE_0F38:
9880                   case SPACE_0F3A:
9881                     count += 2;
9882                     break;
9883                   default:
9884                     abort ();
9885                   }
9886
9887               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
9888                   == BRANCH_PREFIX)
9889                 {
9890                   /* Set the maximum prefix size in BRANCH_PREFIX
9891                      frag.  */
9892                   if (fragP->tc_frag_data.max_bytes > max)
9893                     fragP->tc_frag_data.max_bytes = max;
9894                   if (fragP->tc_frag_data.max_bytes > count)
9895                     fragP->tc_frag_data.max_bytes -= count;
9896                   else
9897                     fragP->tc_frag_data.max_bytes = 0;
9898                 }
9899               else
9900                 {
9901                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
9902                      frag.  */
9903                   unsigned int max_prefix_size;
9904                   if (align_branch_prefix_size > max)
9905                     max_prefix_size = max;
9906                   else
9907                     max_prefix_size = align_branch_prefix_size;
9908                   if (max_prefix_size > count)
9909                     fragP->tc_frag_data.max_prefix_length
9910                       = max_prefix_size - count;
9911                 }
9912
9913               /* Use existing segment prefix if possible.  Use CS
9914                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
9915                  segment prefix with ESP/EBP base register and use DS
9916                  segment prefix without ESP/EBP base register.  */
9917               if (i.prefix[SEG_PREFIX])
9918                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
9919               else if (flag_code == CODE_64BIT)
9920                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
9921               else if (i.base_reg
9922                        && (i.base_reg->reg_num == 4
9923                            || i.base_reg->reg_num == 5))
9924                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
9925               else
9926                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
9927             }
9928         }
9929     }
9930
9931   /* NB: Don't work with COND_JUMP86 without i386.  */
9932   if (align_branch_power
9933       && now_seg != absolute_section
9934       && cpu_arch_flags.bitfield.cpui386)
9935     {
9936       /* Terminate each frag so that we can add prefix and check for
9937          fused jcc.  */
9938       frag_wane (frag_now);
9939       frag_new (0);
9940     }
9941
9942 #ifdef DEBUG386
9943   if (flag_debug)
9944     {
9945       pi ("" /*line*/, &i);
9946     }
9947 #endif /* DEBUG386  */
9948 }
9949
9950 /* Return the size of the displacement operand N.  */
9951
9952 static int
9953 disp_size (unsigned int n)
9954 {
9955   int size = 4;
9956
9957   if (i.types[n].bitfield.disp64)
9958     size = 8;
9959   else if (i.types[n].bitfield.disp8)
9960     size = 1;
9961   else if (i.types[n].bitfield.disp16)
9962     size = 2;
9963   return size;
9964 }
9965
9966 /* Return the size of the immediate operand N.  */
9967
9968 static int
9969 imm_size (unsigned int n)
9970 {
9971   int size = 4;
9972   if (i.types[n].bitfield.imm64)
9973     size = 8;
9974   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
9975     size = 1;
9976   else if (i.types[n].bitfield.imm16)
9977     size = 2;
9978   return size;
9979 }
9980
9981 static void
9982 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
9983 {
9984   char *p;
9985   unsigned int n;
9986
9987   for (n = 0; n < i.operands; n++)
9988     {
9989       if (operand_type_check (i.types[n], disp))
9990         {
9991           int size = disp_size (n);
9992
9993           if (now_seg == absolute_section)
9994             abs_section_offset += size;
9995           else if (i.op[n].disps->X_op == O_constant)
9996             {
9997               offsetT val = i.op[n].disps->X_add_number;
9998
9999               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
10000                                      size);
10001               p = frag_more (size);
10002               md_number_to_chars (p, val, size);
10003             }
10004           else
10005             {
10006               enum bfd_reloc_code_real reloc_type;
10007               bool pcrel = (i.flags[n] & Operand_PCrel) != 0;
10008               bool sign = (flag_code == CODE_64BIT && size == 4
10009                            && (!want_disp32 (&i.tm)
10010                                || (i.tm.opcode_modifier.jump && !i.jumpabsolute
10011                                    && !i.types[n].bitfield.baseindex)))
10012                           || pcrel;
10013               fixS *fixP;
10014
10015               /* We can't have 8 bit displacement here.  */
10016               gas_assert (!i.types[n].bitfield.disp8);
10017
10018               /* The PC relative address is computed relative
10019                  to the instruction boundary, so in case immediate
10020                  fields follows, we need to adjust the value.  */
10021               if (pcrel && i.imm_operands)
10022                 {
10023                   unsigned int n1;
10024                   int sz = 0;
10025
10026                   for (n1 = 0; n1 < i.operands; n1++)
10027                     if (operand_type_check (i.types[n1], imm))
10028                       {
10029                         /* Only one immediate is allowed for PC
10030                            relative address.  */
10031                         gas_assert (sz == 0);
10032                         sz = imm_size (n1);
10033                         i.op[n].disps->X_add_number -= sz;
10034                       }
10035                   /* We should find the immediate.  */
10036                   gas_assert (sz != 0);
10037                 }
10038
10039               p = frag_more (size);
10040               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
10041               if (GOT_symbol
10042                   && GOT_symbol == i.op[n].disps->X_add_symbol
10043                   && (((reloc_type == BFD_RELOC_32
10044                         || reloc_type == BFD_RELOC_X86_64_32S
10045                         || (reloc_type == BFD_RELOC_64
10046                             && object_64bit))
10047                        && (i.op[n].disps->X_op == O_symbol
10048                            || (i.op[n].disps->X_op == O_add
10049                                && ((symbol_get_value_expression
10050                                     (i.op[n].disps->X_op_symbol)->X_op)
10051                                    == O_subtract))))
10052                       || reloc_type == BFD_RELOC_32_PCREL))
10053                 {
10054                   if (!object_64bit)
10055                     {
10056                       reloc_type = BFD_RELOC_386_GOTPC;
10057                       i.has_gotpc_tls_reloc = true;
10058                       i.op[n].disps->X_add_number +=
10059                         encoding_length (insn_start_frag, insn_start_off, p);
10060                     }
10061                   else if (reloc_type == BFD_RELOC_64)
10062                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10063                   else
10064                     /* Don't do the adjustment for x86-64, as there
10065                        the pcrel addressing is relative to the _next_
10066                        insn, and that is taken care of in other code.  */
10067                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10068                 }
10069               else if (align_branch_power)
10070                 {
10071                   switch (reloc_type)
10072                     {
10073                     case BFD_RELOC_386_TLS_GD:
10074                     case BFD_RELOC_386_TLS_LDM:
10075                     case BFD_RELOC_386_TLS_IE:
10076                     case BFD_RELOC_386_TLS_IE_32:
10077                     case BFD_RELOC_386_TLS_GOTIE:
10078                     case BFD_RELOC_386_TLS_GOTDESC:
10079                     case BFD_RELOC_386_TLS_DESC_CALL:
10080                     case BFD_RELOC_X86_64_TLSGD:
10081                     case BFD_RELOC_X86_64_TLSLD:
10082                     case BFD_RELOC_X86_64_GOTTPOFF:
10083                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
10084                     case BFD_RELOC_X86_64_TLSDESC_CALL:
10085                       i.has_gotpc_tls_reloc = true;
10086                     default:
10087                       break;
10088                     }
10089                 }
10090               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
10091                                   size, i.op[n].disps, pcrel,
10092                                   reloc_type);
10093
10094               if (flag_code == CODE_64BIT && size == 4 && pcrel
10095                   && !i.prefix[ADDR_PREFIX])
10096                 fixP->fx_signed = 1;
10097
10098               /* Check for "call/jmp *mem", "mov mem, %reg",
10099                  "test %reg, mem" and "binop mem, %reg" where binop
10100                  is one of adc, add, and, cmp, or, sbb, sub, xor
10101                  instructions without data prefix.  Always generate
10102                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
10103               if (i.prefix[DATA_PREFIX] == 0
10104                   && (generate_relax_relocations
10105                       || (!object_64bit
10106                           && i.rm.mode == 0
10107                           && i.rm.regmem == 5))
10108                   && (i.rm.mode == 2
10109                       || (i.rm.mode == 0 && i.rm.regmem == 5))
10110                   && i.tm.opcode_modifier.opcodespace == SPACE_BASE
10111                   && ((i.operands == 1
10112                        && i.tm.base_opcode == 0xff
10113                        && (i.rm.reg == 2 || i.rm.reg == 4))
10114                       || (i.operands == 2
10115                           && (i.tm.base_opcode == 0x8b
10116                               || i.tm.base_opcode == 0x85
10117                               || (i.tm.base_opcode & ~0x38) == 0x03))))
10118                 {
10119                   if (object_64bit)
10120                     {
10121                       fixP->fx_tcbit = i.rex != 0;
10122                       if (i.base_reg
10123                           && (i.base_reg->reg_num == RegIP))
10124                       fixP->fx_tcbit2 = 1;
10125                     }
10126                   else
10127                     fixP->fx_tcbit2 = 1;
10128                 }
10129             }
10130         }
10131     }
10132 }
10133
10134 static void
10135 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
10136 {
10137   char *p;
10138   unsigned int n;
10139
10140   for (n = 0; n < i.operands; n++)
10141     {
10142       if (operand_type_check (i.types[n], imm))
10143         {
10144           int size = imm_size (n);
10145
10146           if (now_seg == absolute_section)
10147             abs_section_offset += size;
10148           else if (i.op[n].imms->X_op == O_constant)
10149             {
10150               offsetT val;
10151
10152               val = offset_in_range (i.op[n].imms->X_add_number,
10153                                      size);
10154               p = frag_more (size);
10155               md_number_to_chars (p, val, size);
10156             }
10157           else
10158             {
10159               /* Not absolute_section.
10160                  Need a 32-bit fixup (don't support 8bit
10161                  non-absolute imms).  Try to support other
10162                  sizes ...  */
10163               enum bfd_reloc_code_real reloc_type;
10164               int sign;
10165
10166               if (i.types[n].bitfield.imm32s
10167                   && (i.suffix == QWORD_MNEM_SUFFIX
10168                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)))
10169                 sign = 1;
10170               else
10171                 sign = 0;
10172
10173               p = frag_more (size);
10174               reloc_type = reloc (size, 0, sign, i.reloc[n]);
10175
10176               /*   This is tough to explain.  We end up with this one if we
10177                * have operands that look like
10178                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
10179                * obtain the absolute address of the GOT, and it is strongly
10180                * preferable from a performance point of view to avoid using
10181                * a runtime relocation for this.  The actual sequence of
10182                * instructions often look something like:
10183                *
10184                *        call    .L66
10185                * .L66:
10186                *        popl    %ebx
10187                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
10188                *
10189                *   The call and pop essentially return the absolute address
10190                * of the label .L66 and store it in %ebx.  The linker itself
10191                * will ultimately change the first operand of the addl so
10192                * that %ebx points to the GOT, but to keep things simple, the
10193                * .o file must have this operand set so that it generates not
10194                * the absolute address of .L66, but the absolute address of
10195                * itself.  This allows the linker itself simply treat a GOTPC
10196                * relocation as asking for a pcrel offset to the GOT to be
10197                * added in, and the addend of the relocation is stored in the
10198                * operand field for the instruction itself.
10199                *
10200                *   Our job here is to fix the operand so that it would add
10201                * the correct offset so that %ebx would point to itself.  The
10202                * thing that is tricky is that .-.L66 will point to the
10203                * beginning of the instruction, so we need to further modify
10204                * the operand so that it will point to itself.  There are
10205                * other cases where you have something like:
10206                *
10207                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
10208                *
10209                * and here no correction would be required.  Internally in
10210                * the assembler we treat operands of this form as not being
10211                * pcrel since the '.' is explicitly mentioned, and I wonder
10212                * whether it would simplify matters to do it this way.  Who
10213                * knows.  In earlier versions of the PIC patches, the
10214                * pcrel_adjust field was used to store the correction, but
10215                * since the expression is not pcrel, I felt it would be
10216                * confusing to do it this way.  */
10217
10218               if ((reloc_type == BFD_RELOC_32
10219                    || reloc_type == BFD_RELOC_X86_64_32S
10220                    || reloc_type == BFD_RELOC_64)
10221                   && GOT_symbol
10222                   && GOT_symbol == i.op[n].imms->X_add_symbol
10223                   && (i.op[n].imms->X_op == O_symbol
10224                       || (i.op[n].imms->X_op == O_add
10225                           && ((symbol_get_value_expression
10226                                (i.op[n].imms->X_op_symbol)->X_op)
10227                               == O_subtract))))
10228                 {
10229                   if (!object_64bit)
10230                     reloc_type = BFD_RELOC_386_GOTPC;
10231                   else if (size == 4)
10232                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10233                   else if (size == 8)
10234                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10235                   i.has_gotpc_tls_reloc = true;
10236                   i.op[n].imms->X_add_number +=
10237                     encoding_length (insn_start_frag, insn_start_off, p);
10238                 }
10239               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
10240                            i.op[n].imms, 0, reloc_type);
10241             }
10242         }
10243     }
10244 }
10245 \f
10246 /* x86_cons_fix_new is called via the expression parsing code when a
10247    reloc is needed.  We use this hook to get the correct .got reloc.  */
10248 static int cons_sign = -1;
10249
10250 void
10251 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
10252                   expressionS *exp, bfd_reloc_code_real_type r)
10253 {
10254   r = reloc (len, 0, cons_sign, r);
10255
10256 #ifdef TE_PE
10257   if (exp->X_op == O_secrel)
10258     {
10259       exp->X_op = O_symbol;
10260       r = BFD_RELOC_32_SECREL;
10261     }
10262   else if (exp->X_op == O_secidx)
10263     r = BFD_RELOC_16_SECIDX;
10264 #endif
10265
10266   fix_new_exp (frag, off, len, exp, 0, r);
10267 }
10268
10269 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
10270    purpose of the `.dc.a' internal pseudo-op.  */
10271
10272 int
10273 x86_address_bytes (void)
10274 {
10275   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
10276     return 4;
10277   return stdoutput->arch_info->bits_per_address / 8;
10278 }
10279
10280 #if (!(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
10281      || defined (LEX_AT)) && !defined (TE_PE)
10282 # define lex_got(reloc, adjust, types) NULL
10283 #else
10284 /* Parse operands of the form
10285    <symbol>@GOTOFF+<nnn>
10286    and similar .plt or .got references.
10287
10288    If we find one, set up the correct relocation in RELOC and copy the
10289    input string, minus the `@GOTOFF' into a malloc'd buffer for
10290    parsing by the calling routine.  Return this buffer, and if ADJUST
10291    is non-null set it to the length of the string we removed from the
10292    input line.  Otherwise return NULL.  */
10293 static char *
10294 lex_got (enum bfd_reloc_code_real *rel,
10295          int *adjust,
10296          i386_operand_type *types)
10297 {
10298   /* Some of the relocations depend on the size of what field is to
10299      be relocated.  But in our callers i386_immediate and i386_displacement
10300      we don't yet know the operand size (this will be set by insn
10301      matching).  Hence we record the word32 relocation here,
10302      and adjust the reloc according to the real size in reloc().  */
10303   static const struct
10304   {
10305     const char *str;
10306     int len;
10307     const enum bfd_reloc_code_real rel[2];
10308     const i386_operand_type types64;
10309     bool need_GOT_symbol;
10310   }
10311     gotrel[] =
10312   {
10313 #ifndef TE_PE
10314 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10315     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
10316                                         BFD_RELOC_SIZE32 },
10317       OPERAND_TYPE_IMM32_64, false },
10318 #endif
10319     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
10320                                        BFD_RELOC_X86_64_PLTOFF64 },
10321       OPERAND_TYPE_IMM64, true },
10322     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
10323                                        BFD_RELOC_X86_64_PLT32    },
10324       OPERAND_TYPE_IMM32_32S_DISP32, false },
10325     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
10326                                        BFD_RELOC_X86_64_GOTPLT64 },
10327       OPERAND_TYPE_IMM64_DISP64, true },
10328     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
10329                                        BFD_RELOC_X86_64_GOTOFF64 },
10330       OPERAND_TYPE_IMM64_DISP64, true },
10331     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
10332                                        BFD_RELOC_X86_64_GOTPCREL },
10333       OPERAND_TYPE_IMM32_32S_DISP32, true },
10334     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
10335                                        BFD_RELOC_X86_64_TLSGD    },
10336       OPERAND_TYPE_IMM32_32S_DISP32, true },
10337     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
10338                                        _dummy_first_bfd_reloc_code_real },
10339       OPERAND_TYPE_NONE, true },
10340     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
10341                                        BFD_RELOC_X86_64_TLSLD    },
10342       OPERAND_TYPE_IMM32_32S_DISP32, true },
10343     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
10344                                        BFD_RELOC_X86_64_GOTTPOFF },
10345       OPERAND_TYPE_IMM32_32S_DISP32, true },
10346     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
10347                                        BFD_RELOC_X86_64_TPOFF32  },
10348       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10349     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
10350                                        _dummy_first_bfd_reloc_code_real },
10351       OPERAND_TYPE_NONE, true },
10352     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
10353                                        BFD_RELOC_X86_64_DTPOFF32 },
10354       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10355     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
10356                                        _dummy_first_bfd_reloc_code_real },
10357       OPERAND_TYPE_NONE, true },
10358     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
10359                                        _dummy_first_bfd_reloc_code_real },
10360       OPERAND_TYPE_NONE, true },
10361     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
10362                                        BFD_RELOC_X86_64_GOT32    },
10363       OPERAND_TYPE_IMM32_32S_64_DISP32, true },
10364     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
10365                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
10366       OPERAND_TYPE_IMM32_32S_DISP32, true },
10367     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
10368                                        BFD_RELOC_X86_64_TLSDESC_CALL },
10369       OPERAND_TYPE_IMM32_32S_DISP32, true },
10370 #else /* TE_PE */
10371     { STRING_COMMA_LEN ("SECREL32"), { BFD_RELOC_32_SECREL,
10372                                        BFD_RELOC_32_SECREL },
10373       OPERAND_TYPE_IMM32_32S_64_DISP32_64, false },
10374 #endif
10375   };
10376   char *cp;
10377   unsigned int j;
10378
10379 #if defined (OBJ_MAYBE_ELF) && !defined (TE_PE)
10380   if (!IS_ELF)
10381     return NULL;
10382 #endif
10383
10384   for (cp = input_line_pointer; *cp != '@'; cp++)
10385     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
10386       return NULL;
10387
10388   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
10389     {
10390       int len = gotrel[j].len;
10391       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
10392         {
10393           if (gotrel[j].rel[object_64bit] != 0)
10394             {
10395               int first, second;
10396               char *tmpbuf, *past_reloc;
10397
10398               *rel = gotrel[j].rel[object_64bit];
10399
10400               if (types)
10401                 {
10402                   if (flag_code != CODE_64BIT)
10403                     {
10404                       types->bitfield.imm32 = 1;
10405                       types->bitfield.disp32 = 1;
10406                     }
10407                   else
10408                     *types = gotrel[j].types64;
10409                 }
10410
10411               if (gotrel[j].need_GOT_symbol && GOT_symbol == NULL)
10412                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
10413
10414               /* The length of the first part of our input line.  */
10415               first = cp - input_line_pointer;
10416
10417               /* The second part goes from after the reloc token until
10418                  (and including) an end_of_line char or comma.  */
10419               past_reloc = cp + 1 + len;
10420               cp = past_reloc;
10421               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
10422                 ++cp;
10423               second = cp + 1 - past_reloc;
10424
10425               /* Allocate and copy string.  The trailing NUL shouldn't
10426                  be necessary, but be safe.  */
10427               tmpbuf = XNEWVEC (char, first + second + 2);
10428               memcpy (tmpbuf, input_line_pointer, first);
10429               if (second != 0 && *past_reloc != ' ')
10430                 /* Replace the relocation token with ' ', so that
10431                    errors like foo@GOTOFF1 will be detected.  */
10432                 tmpbuf[first++] = ' ';
10433               else
10434                 /* Increment length by 1 if the relocation token is
10435                    removed.  */
10436                 len++;
10437               if (adjust)
10438                 *adjust = len;
10439               memcpy (tmpbuf + first, past_reloc, second);
10440               tmpbuf[first + second] = '\0';
10441               return tmpbuf;
10442             }
10443
10444           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10445                   gotrel[j].str, 1 << (5 + object_64bit));
10446           return NULL;
10447         }
10448     }
10449
10450   /* Might be a symbol version string.  Don't as_bad here.  */
10451   return NULL;
10452 }
10453 #endif
10454
10455 bfd_reloc_code_real_type
10456 x86_cons (expressionS *exp, int size)
10457 {
10458   bfd_reloc_code_real_type got_reloc = NO_RELOC;
10459
10460 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
10461       && !defined (LEX_AT)) \
10462     || defined (TE_PE)
10463   intel_syntax = -intel_syntax;
10464
10465   exp->X_md = 0;
10466   if (size == 4 || (object_64bit && size == 8))
10467     {
10468       /* Handle @GOTOFF and the like in an expression.  */
10469       char *save;
10470       char *gotfree_input_line;
10471       int adjust = 0;
10472
10473       save = input_line_pointer;
10474       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
10475       if (gotfree_input_line)
10476         input_line_pointer = gotfree_input_line;
10477
10478       expression (exp);
10479
10480       if (gotfree_input_line)
10481         {
10482           /* expression () has merrily parsed up to the end of line,
10483              or a comma - in the wrong buffer.  Transfer how far
10484              input_line_pointer has moved to the right buffer.  */
10485           input_line_pointer = (save
10486                                 + (input_line_pointer - gotfree_input_line)
10487                                 + adjust);
10488           free (gotfree_input_line);
10489           if (exp->X_op == O_constant
10490               || exp->X_op == O_absent
10491               || exp->X_op == O_illegal
10492               || exp->X_op == O_register
10493               || exp->X_op == O_big)
10494             {
10495               char c = *input_line_pointer;
10496               *input_line_pointer = 0;
10497               as_bad (_("missing or invalid expression `%s'"), save);
10498               *input_line_pointer = c;
10499             }
10500           else if ((got_reloc == BFD_RELOC_386_PLT32
10501                     || got_reloc == BFD_RELOC_X86_64_PLT32)
10502                    && exp->X_op != O_symbol)
10503             {
10504               char c = *input_line_pointer;
10505               *input_line_pointer = 0;
10506               as_bad (_("invalid PLT expression `%s'"), save);
10507               *input_line_pointer = c;
10508             }
10509         }
10510     }
10511   else
10512     expression (exp);
10513
10514   intel_syntax = -intel_syntax;
10515
10516   if (intel_syntax)
10517     i386_intel_simplify (exp);
10518 #else
10519   expression (exp);
10520 #endif
10521
10522   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
10523   if (size == 4 && exp->X_op == O_constant && !object_64bit)
10524     exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10525
10526   return got_reloc;
10527 }
10528
10529 static void
10530 signed_cons (int size)
10531 {
10532   if (object_64bit)
10533     cons_sign = 1;
10534   cons (size);
10535   cons_sign = -1;
10536 }
10537
10538 #ifdef TE_PE
10539 static void
10540 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
10541 {
10542   expressionS exp;
10543
10544   do
10545     {
10546       expression (&exp);
10547       if (exp.X_op == O_symbol)
10548         exp.X_op = O_secrel;
10549
10550       emit_expr (&exp, 4);
10551     }
10552   while (*input_line_pointer++ == ',');
10553
10554   input_line_pointer--;
10555   demand_empty_rest_of_line ();
10556 }
10557
10558 static void
10559 pe_directive_secidx (int dummy ATTRIBUTE_UNUSED)
10560 {
10561   expressionS exp;
10562
10563   do
10564     {
10565       expression (&exp);
10566       if (exp.X_op == O_symbol)
10567         exp.X_op = O_secidx;
10568
10569       emit_expr (&exp, 2);
10570     }
10571   while (*input_line_pointer++ == ',');
10572
10573   input_line_pointer--;
10574   demand_empty_rest_of_line ();
10575 }
10576 #endif
10577
10578 /* Handle Rounding Control / SAE specifiers.  */
10579
10580 static char *
10581 RC_SAE_specifier (const char *pstr)
10582 {
10583   unsigned int j;
10584
10585   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
10586     {
10587       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
10588         {
10589           if (i.rounding.type != rc_none)
10590             {
10591               as_bad (_("duplicated `{%s}'"), RC_NamesTable[j].name);
10592               return NULL;
10593             }
10594
10595           i.rounding.type = RC_NamesTable[j].type;
10596
10597           return (char *)(pstr + RC_NamesTable[j].len);
10598         }
10599     }
10600
10601   return NULL;
10602 }
10603
10604 /* Handle Vector operations.  */
10605
10606 static char *
10607 check_VecOperations (char *op_string)
10608 {
10609   const reg_entry *mask;
10610   const char *saved;
10611   char *end_op;
10612
10613   while (*op_string)
10614     {
10615       saved = op_string;
10616       if (*op_string == '{')
10617         {
10618           op_string++;
10619
10620           /* Check broadcasts.  */
10621           if (startswith (op_string, "1to"))
10622             {
10623               unsigned int bcst_type;
10624
10625               if (i.broadcast.type)
10626                 goto duplicated_vec_op;
10627
10628               op_string += 3;
10629               if (*op_string == '8')
10630                 bcst_type = 8;
10631               else if (*op_string == '4')
10632                 bcst_type = 4;
10633               else if (*op_string == '2')
10634                 bcst_type = 2;
10635               else if (*op_string == '1'
10636                        && *(op_string+1) == '6')
10637                 {
10638                   bcst_type = 16;
10639                   op_string++;
10640                 }
10641               else if (*op_string == '3'
10642                        && *(op_string+1) == '2')
10643                 {
10644                   bcst_type = 32;
10645                   op_string++;
10646                 }
10647               else
10648                 {
10649                   as_bad (_("Unsupported broadcast: `%s'"), saved);
10650                   return NULL;
10651                 }
10652               op_string++;
10653
10654               i.broadcast.type = bcst_type;
10655               i.broadcast.operand = this_operand;
10656             }
10657           /* Check masking operation.  */
10658           else if ((mask = parse_register (op_string, &end_op)) != NULL)
10659             {
10660               if (mask == &bad_reg)
10661                 return NULL;
10662
10663               /* k0 can't be used for write mask.  */
10664               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
10665                 {
10666                   as_bad (_("`%s%s' can't be used for write mask"),
10667                           register_prefix, mask->reg_name);
10668                   return NULL;
10669                 }
10670
10671               if (!i.mask.reg)
10672                 {
10673                   i.mask.reg = mask;
10674                   i.mask.operand = this_operand;
10675                 }
10676               else if (i.mask.reg->reg_num)
10677                 goto duplicated_vec_op;
10678               else
10679                 {
10680                   i.mask.reg = mask;
10681
10682                   /* Only "{z}" is allowed here.  No need to check
10683                      zeroing mask explicitly.  */
10684                   if (i.mask.operand != (unsigned int) this_operand)
10685                     {
10686                       as_bad (_("invalid write mask `%s'"), saved);
10687                       return NULL;
10688                     }
10689                 }
10690
10691               op_string = end_op;
10692             }
10693           /* Check zeroing-flag for masking operation.  */
10694           else if (*op_string == 'z')
10695             {
10696               if (!i.mask.reg)
10697                 {
10698                   i.mask.reg = reg_k0;
10699                   i.mask.zeroing = 1;
10700                   i.mask.operand = this_operand;
10701                 }
10702               else
10703                 {
10704                   if (i.mask.zeroing)
10705                     {
10706                     duplicated_vec_op:
10707                       as_bad (_("duplicated `%s'"), saved);
10708                       return NULL;
10709                     }
10710
10711                   i.mask.zeroing = 1;
10712
10713                   /* Only "{%k}" is allowed here.  No need to check mask
10714                      register explicitly.  */
10715                   if (i.mask.operand != (unsigned int) this_operand)
10716                     {
10717                       as_bad (_("invalid zeroing-masking `%s'"),
10718                               saved);
10719                       return NULL;
10720                     }
10721                 }
10722
10723               op_string++;
10724             }
10725           else if (intel_syntax
10726                    && (op_string = RC_SAE_specifier (op_string)) != NULL)
10727             i.rounding.modifier = true;
10728           else
10729             goto unknown_vec_op;
10730
10731           if (*op_string != '}')
10732             {
10733               as_bad (_("missing `}' in `%s'"), saved);
10734               return NULL;
10735             }
10736           op_string++;
10737
10738           /* Strip whitespace since the addition of pseudo prefixes
10739              changed how the scrubber treats '{'.  */
10740           if (is_space_char (*op_string))
10741             ++op_string;
10742
10743           continue;
10744         }
10745     unknown_vec_op:
10746       /* We don't know this one.  */
10747       as_bad (_("unknown vector operation: `%s'"), saved);
10748       return NULL;
10749     }
10750
10751   if (i.mask.reg && i.mask.zeroing && !i.mask.reg->reg_num)
10752     {
10753       as_bad (_("zeroing-masking only allowed with write mask"));
10754       return NULL;
10755     }
10756
10757   return op_string;
10758 }
10759
10760 static int
10761 i386_immediate (char *imm_start)
10762 {
10763   char *save_input_line_pointer;
10764   char *gotfree_input_line;
10765   segT exp_seg = 0;
10766   expressionS *exp;
10767   i386_operand_type types;
10768
10769   operand_type_set (&types, ~0);
10770
10771   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
10772     {
10773       as_bad (_("at most %d immediate operands are allowed"),
10774               MAX_IMMEDIATE_OPERANDS);
10775       return 0;
10776     }
10777
10778   exp = &im_expressions[i.imm_operands++];
10779   i.op[this_operand].imms = exp;
10780
10781   if (is_space_char (*imm_start))
10782     ++imm_start;
10783
10784   save_input_line_pointer = input_line_pointer;
10785   input_line_pointer = imm_start;
10786
10787   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10788   if (gotfree_input_line)
10789     input_line_pointer = gotfree_input_line;
10790
10791   exp_seg = expression (exp);
10792
10793   SKIP_WHITESPACE ();
10794   if (*input_line_pointer)
10795     as_bad (_("junk `%s' after expression"), input_line_pointer);
10796
10797   input_line_pointer = save_input_line_pointer;
10798   if (gotfree_input_line)
10799     {
10800       free (gotfree_input_line);
10801
10802       if (exp->X_op == O_constant)
10803         exp->X_op = O_illegal;
10804     }
10805
10806   if (exp_seg == reg_section)
10807     {
10808       as_bad (_("illegal immediate register operand %s"), imm_start);
10809       return 0;
10810     }
10811
10812   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
10813 }
10814
10815 static int
10816 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10817                          i386_operand_type types, const char *imm_start)
10818 {
10819   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
10820     {
10821       if (imm_start)
10822         as_bad (_("missing or invalid immediate expression `%s'"),
10823                 imm_start);
10824       return 0;
10825     }
10826   else if (exp->X_op == O_constant)
10827     {
10828       /* Size it properly later.  */
10829       i.types[this_operand].bitfield.imm64 = 1;
10830
10831       /* If not 64bit, sign/zero extend val, to account for wraparound
10832          when !BFD64.  */
10833       if (flag_code != CODE_64BIT)
10834         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10835     }
10836 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
10837   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
10838            && exp_seg != absolute_section
10839            && exp_seg != text_section
10840            && exp_seg != data_section
10841            && exp_seg != bss_section
10842            && exp_seg != undefined_section
10843            && !bfd_is_com_section (exp_seg))
10844     {
10845       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
10846       return 0;
10847     }
10848 #endif
10849   else
10850     {
10851       /* This is an address.  The size of the address will be
10852          determined later, depending on destination register,
10853          suffix, or the default for the section.  */
10854       i.types[this_operand].bitfield.imm8 = 1;
10855       i.types[this_operand].bitfield.imm16 = 1;
10856       i.types[this_operand].bitfield.imm32 = 1;
10857       i.types[this_operand].bitfield.imm32s = 1;
10858       i.types[this_operand].bitfield.imm64 = 1;
10859       i.types[this_operand] = operand_type_and (i.types[this_operand],
10860                                                 types);
10861     }
10862
10863   return 1;
10864 }
10865
10866 static char *
10867 i386_scale (char *scale)
10868 {
10869   offsetT val;
10870   char *save = input_line_pointer;
10871
10872   input_line_pointer = scale;
10873   val = get_absolute_expression ();
10874
10875   switch (val)
10876     {
10877     case 1:
10878       i.log2_scale_factor = 0;
10879       break;
10880     case 2:
10881       i.log2_scale_factor = 1;
10882       break;
10883     case 4:
10884       i.log2_scale_factor = 2;
10885       break;
10886     case 8:
10887       i.log2_scale_factor = 3;
10888       break;
10889     default:
10890       {
10891         char sep = *input_line_pointer;
10892
10893         *input_line_pointer = '\0';
10894         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
10895                 scale);
10896         *input_line_pointer = sep;
10897         input_line_pointer = save;
10898         return NULL;
10899       }
10900     }
10901   if (i.log2_scale_factor != 0 && i.index_reg == 0)
10902     {
10903       as_warn (_("scale factor of %d without an index register"),
10904                1 << i.log2_scale_factor);
10905       i.log2_scale_factor = 0;
10906     }
10907   scale = input_line_pointer;
10908   input_line_pointer = save;
10909   return scale;
10910 }
10911
10912 static int
10913 i386_displacement (char *disp_start, char *disp_end)
10914 {
10915   expressionS *exp;
10916   segT exp_seg = 0;
10917   char *save_input_line_pointer;
10918   char *gotfree_input_line;
10919   int override;
10920   i386_operand_type bigdisp, types = anydisp;
10921   int ret;
10922
10923   if (i.disp_operands == MAX_MEMORY_OPERANDS)
10924     {
10925       as_bad (_("at most %d displacement operands are allowed"),
10926               MAX_MEMORY_OPERANDS);
10927       return 0;
10928     }
10929
10930   operand_type_set (&bigdisp, 0);
10931   if (i.jumpabsolute
10932       || i.types[this_operand].bitfield.baseindex
10933       || (current_templates->start->opcode_modifier.jump != JUMP
10934           && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
10935     {
10936       i386_addressing_mode ();
10937       override = (i.prefix[ADDR_PREFIX] != 0);
10938       if (flag_code == CODE_64BIT)
10939         {
10940           bigdisp.bitfield.disp32 = 1;
10941           if (!override)
10942             bigdisp.bitfield.disp64 = 1;
10943         }
10944       else if ((flag_code == CODE_16BIT) ^ override)
10945           bigdisp.bitfield.disp16 = 1;
10946       else
10947           bigdisp.bitfield.disp32 = 1;
10948     }
10949   else
10950     {
10951       /* For PC-relative branches, the width of the displacement may be
10952          dependent upon data size, but is never dependent upon address size.
10953          Also make sure to not unintentionally match against a non-PC-relative
10954          branch template.  */
10955       static templates aux_templates;
10956       const insn_template *t = current_templates->start;
10957       bool has_intel64 = false;
10958
10959       aux_templates.start = t;
10960       while (++t < current_templates->end)
10961         {
10962           if (t->opcode_modifier.jump
10963               != current_templates->start->opcode_modifier.jump)
10964             break;
10965           if ((t->opcode_modifier.isa64 >= INTEL64))
10966             has_intel64 = true;
10967         }
10968       if (t < current_templates->end)
10969         {
10970           aux_templates.end = t;
10971           current_templates = &aux_templates;
10972         }
10973
10974       override = (i.prefix[DATA_PREFIX] != 0);
10975       if (flag_code == CODE_64BIT)
10976         {
10977           if ((override || i.suffix == WORD_MNEM_SUFFIX)
10978               && (!intel64 || !has_intel64))
10979             bigdisp.bitfield.disp16 = 1;
10980           else
10981             bigdisp.bitfield.disp32 = 1;
10982         }
10983       else
10984         {
10985           if (!override)
10986             override = (i.suffix == (flag_code != CODE_16BIT
10987                                      ? WORD_MNEM_SUFFIX
10988                                      : LONG_MNEM_SUFFIX));
10989           bigdisp.bitfield.disp32 = 1;
10990           if ((flag_code == CODE_16BIT) ^ override)
10991             {
10992               bigdisp.bitfield.disp32 = 0;
10993               bigdisp.bitfield.disp16 = 1;
10994             }
10995         }
10996     }
10997   i.types[this_operand] = operand_type_or (i.types[this_operand],
10998                                            bigdisp);
10999
11000   exp = &disp_expressions[i.disp_operands];
11001   i.op[this_operand].disps = exp;
11002   i.disp_operands++;
11003   save_input_line_pointer = input_line_pointer;
11004   input_line_pointer = disp_start;
11005   END_STRING_AND_SAVE (disp_end);
11006
11007 #ifndef GCC_ASM_O_HACK
11008 #define GCC_ASM_O_HACK 0
11009 #endif
11010 #if GCC_ASM_O_HACK
11011   END_STRING_AND_SAVE (disp_end + 1);
11012   if (i.types[this_operand].bitfield.baseIndex
11013       && displacement_string_end[-1] == '+')
11014     {
11015       /* This hack is to avoid a warning when using the "o"
11016          constraint within gcc asm statements.
11017          For instance:
11018
11019          #define _set_tssldt_desc(n,addr,limit,type) \
11020          __asm__ __volatile__ ( \
11021          "movw %w2,%0\n\t" \
11022          "movw %w1,2+%0\n\t" \
11023          "rorl $16,%1\n\t" \
11024          "movb %b1,4+%0\n\t" \
11025          "movb %4,5+%0\n\t" \
11026          "movb $0,6+%0\n\t" \
11027          "movb %h1,7+%0\n\t" \
11028          "rorl $16,%1" \
11029          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
11030
11031          This works great except that the output assembler ends
11032          up looking a bit weird if it turns out that there is
11033          no offset.  You end up producing code that looks like:
11034
11035          #APP
11036          movw $235,(%eax)
11037          movw %dx,2+(%eax)
11038          rorl $16,%edx
11039          movb %dl,4+(%eax)
11040          movb $137,5+(%eax)
11041          movb $0,6+(%eax)
11042          movb %dh,7+(%eax)
11043          rorl $16,%edx
11044          #NO_APP
11045
11046          So here we provide the missing zero.  */
11047
11048       *displacement_string_end = '0';
11049     }
11050 #endif
11051   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
11052   if (gotfree_input_line)
11053     input_line_pointer = gotfree_input_line;
11054
11055   exp_seg = expression (exp);
11056
11057   SKIP_WHITESPACE ();
11058   if (*input_line_pointer)
11059     as_bad (_("junk `%s' after expression"), input_line_pointer);
11060 #if GCC_ASM_O_HACK
11061   RESTORE_END_STRING (disp_end + 1);
11062 #endif
11063   input_line_pointer = save_input_line_pointer;
11064   if (gotfree_input_line)
11065     {
11066       free (gotfree_input_line);
11067
11068       if (exp->X_op == O_constant || exp->X_op == O_register)
11069         exp->X_op = O_illegal;
11070     }
11071
11072   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
11073
11074   RESTORE_END_STRING (disp_end);
11075
11076   return ret;
11077 }
11078
11079 static int
11080 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
11081                             i386_operand_type types, const char *disp_start)
11082 {
11083   int ret = 1;
11084
11085   /* We do this to make sure that the section symbol is in
11086      the symbol table.  We will ultimately change the relocation
11087      to be relative to the beginning of the section.  */
11088   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
11089       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
11090       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11091     {
11092       if (exp->X_op != O_symbol)
11093         goto inv_disp;
11094
11095       if (S_IS_LOCAL (exp->X_add_symbol)
11096           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
11097           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
11098         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
11099       exp->X_op = O_subtract;
11100       exp->X_op_symbol = GOT_symbol;
11101       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
11102         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
11103       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11104         i.reloc[this_operand] = BFD_RELOC_64;
11105       else
11106         i.reloc[this_operand] = BFD_RELOC_32;
11107     }
11108
11109   else if (exp->X_op == O_absent
11110            || exp->X_op == O_illegal
11111            || exp->X_op == O_big)
11112     {
11113     inv_disp:
11114       as_bad (_("missing or invalid displacement expression `%s'"),
11115               disp_start);
11116       ret = 0;
11117     }
11118
11119   else if (exp->X_op == O_constant)
11120     {
11121       /* Sizing gets taken care of by optimize_disp().
11122
11123          If not 64bit, sign/zero extend val, to account for wraparound
11124          when !BFD64.  */
11125       if (flag_code != CODE_64BIT)
11126         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
11127     }
11128
11129 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
11130   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
11131            && exp_seg != absolute_section
11132            && exp_seg != text_section
11133            && exp_seg != data_section
11134            && exp_seg != bss_section
11135            && exp_seg != undefined_section
11136            && !bfd_is_com_section (exp_seg))
11137     {
11138       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
11139       ret = 0;
11140     }
11141 #endif
11142
11143   else if (current_templates->start->opcode_modifier.jump == JUMP_BYTE)
11144     i.types[this_operand].bitfield.disp8 = 1;
11145
11146   /* Check if this is a displacement only operand.  */
11147   if (!i.types[this_operand].bitfield.baseindex)
11148     i.types[this_operand] =
11149       operand_type_or (operand_type_and_not (i.types[this_operand], anydisp),
11150                        operand_type_and (i.types[this_operand], types));
11151
11152   return ret;
11153 }
11154
11155 /* Return the active addressing mode, taking address override and
11156    registers forming the address into consideration.  Update the
11157    address override prefix if necessary.  */
11158
11159 static enum flag_code
11160 i386_addressing_mode (void)
11161 {
11162   enum flag_code addr_mode;
11163
11164   if (i.prefix[ADDR_PREFIX])
11165     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
11166   else if (flag_code == CODE_16BIT
11167            && current_templates->start->cpu_flags.bitfield.cpumpx
11168            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
11169               from md_assemble() by "is not a valid base/index expression"
11170               when there is a base and/or index.  */
11171            && !i.types[this_operand].bitfield.baseindex)
11172     {
11173       /* MPX insn memory operands with neither base nor index must be forced
11174          to use 32-bit addressing in 16-bit mode.  */
11175       addr_mode = CODE_32BIT;
11176       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11177       ++i.prefixes;
11178       gas_assert (!i.types[this_operand].bitfield.disp16);
11179       gas_assert (!i.types[this_operand].bitfield.disp32);
11180     }
11181   else
11182     {
11183       addr_mode = flag_code;
11184
11185 #if INFER_ADDR_PREFIX
11186       if (i.mem_operands == 0)
11187         {
11188           /* Infer address prefix from the first memory operand.  */
11189           const reg_entry *addr_reg = i.base_reg;
11190
11191           if (addr_reg == NULL)
11192             addr_reg = i.index_reg;
11193
11194           if (addr_reg)
11195             {
11196               if (addr_reg->reg_type.bitfield.dword)
11197                 addr_mode = CODE_32BIT;
11198               else if (flag_code != CODE_64BIT
11199                        && addr_reg->reg_type.bitfield.word)
11200                 addr_mode = CODE_16BIT;
11201
11202               if (addr_mode != flag_code)
11203                 {
11204                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11205                   i.prefixes += 1;
11206                   /* Change the size of any displacement too.  At most one
11207                      of Disp16 or Disp32 is set.
11208                      FIXME.  There doesn't seem to be any real need for
11209                      separate Disp16 and Disp32 flags.  The same goes for
11210                      Imm16 and Imm32.  Removing them would probably clean
11211                      up the code quite a lot.  */
11212                   if (flag_code != CODE_64BIT
11213                       && (i.types[this_operand].bitfield.disp16
11214                           || i.types[this_operand].bitfield.disp32))
11215                     i.types[this_operand]
11216                       = operand_type_xor (i.types[this_operand], disp16_32);
11217                 }
11218             }
11219         }
11220 #endif
11221     }
11222
11223   return addr_mode;
11224 }
11225
11226 /* Make sure the memory operand we've been dealt is valid.
11227    Return 1 on success, 0 on a failure.  */
11228
11229 static int
11230 i386_index_check (const char *operand_string)
11231 {
11232   const char *kind = "base/index";
11233   enum flag_code addr_mode = i386_addressing_mode ();
11234   const insn_template *t = current_templates->start;
11235
11236   if (t->opcode_modifier.isstring
11237       && (current_templates->end[-1].opcode_modifier.isstring
11238           || i.mem_operands))
11239     {
11240       /* Memory operands of string insns are special in that they only allow
11241          a single register (rDI, rSI, or rBX) as their memory address.  */
11242       const reg_entry *expected_reg;
11243       static const char *di_si[][2] =
11244         {
11245           { "esi", "edi" },
11246           { "si", "di" },
11247           { "rsi", "rdi" }
11248         };
11249       static const char *bx[] = { "ebx", "bx", "rbx" };
11250
11251       kind = "string address";
11252
11253       if (t->opcode_modifier.prefixok == PrefixRep)
11254         {
11255           int es_op = current_templates->end[-1].opcode_modifier.isstring
11256                       - IS_STRING_ES_OP0;
11257           int op = 0;
11258
11259           if (!current_templates->end[-1].operand_types[0].bitfield.baseindex
11260               || ((!i.mem_operands != !intel_syntax)
11261                   && current_templates->end[-1].operand_types[1]
11262                      .bitfield.baseindex))
11263             op = 1;
11264           expected_reg
11265             = (const reg_entry *) str_hash_find (reg_hash,
11266                                                  di_si[addr_mode][op == es_op]);
11267         }
11268       else
11269         expected_reg
11270           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
11271
11272       if (i.base_reg != expected_reg
11273           || i.index_reg
11274           || operand_type_check (i.types[this_operand], disp))
11275         {
11276           /* The second memory operand must have the same size as
11277              the first one.  */
11278           if (i.mem_operands
11279               && i.base_reg
11280               && !((addr_mode == CODE_64BIT
11281                     && i.base_reg->reg_type.bitfield.qword)
11282                    || (addr_mode == CODE_32BIT
11283                        ? i.base_reg->reg_type.bitfield.dword
11284                        : i.base_reg->reg_type.bitfield.word)))
11285             goto bad_address;
11286
11287           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
11288                    operand_string,
11289                    intel_syntax ? '[' : '(',
11290                    register_prefix,
11291                    expected_reg->reg_name,
11292                    intel_syntax ? ']' : ')');
11293           return 1;
11294         }
11295       else
11296         return 1;
11297
11298     bad_address:
11299       as_bad (_("`%s' is not a valid %s expression"),
11300               operand_string, kind);
11301       return 0;
11302     }
11303   else
11304     {
11305       if (addr_mode != CODE_16BIT)
11306         {
11307           /* 32-bit/64-bit checks.  */
11308           if (i.disp_encoding == disp_encoding_16bit)
11309             {
11310             bad_disp:
11311               as_bad (_("invalid `%s' prefix"),
11312                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
11313               return 0;
11314             }
11315
11316           if ((i.base_reg
11317                && ((addr_mode == CODE_64BIT
11318                     ? !i.base_reg->reg_type.bitfield.qword
11319                     : !i.base_reg->reg_type.bitfield.dword)
11320                    || (i.index_reg && i.base_reg->reg_num == RegIP)
11321                    || i.base_reg->reg_num == RegIZ))
11322               || (i.index_reg
11323                   && !i.index_reg->reg_type.bitfield.xmmword
11324                   && !i.index_reg->reg_type.bitfield.ymmword
11325                   && !i.index_reg->reg_type.bitfield.zmmword
11326                   && ((addr_mode == CODE_64BIT
11327                        ? !i.index_reg->reg_type.bitfield.qword
11328                        : !i.index_reg->reg_type.bitfield.dword)
11329                       || !i.index_reg->reg_type.bitfield.baseindex)))
11330             goto bad_address;
11331
11332           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
11333           if ((t->opcode_modifier.opcodeprefix == PREFIX_0XF3
11334                && t->opcode_modifier.opcodespace == SPACE_0F
11335                && t->base_opcode == 0x1b)
11336               || (t->opcode_modifier.opcodeprefix == PREFIX_NONE
11337                   && t->opcode_modifier.opcodespace == SPACE_0F
11338                   && (t->base_opcode & ~1) == 0x1a)
11339               || t->opcode_modifier.sib == SIBMEM)
11340             {
11341               /* They cannot use RIP-relative addressing. */
11342               if (i.base_reg && i.base_reg->reg_num == RegIP)
11343                 {
11344                   as_bad (_("`%s' cannot be used here"), operand_string);
11345                   return 0;
11346                 }
11347
11348               /* bndldx and bndstx ignore their scale factor. */
11349               if (t->opcode_modifier.opcodeprefix == PREFIX_NONE
11350                   && t->opcode_modifier.opcodespace == SPACE_0F
11351                   && (t->base_opcode & ~1) == 0x1a
11352                   && i.log2_scale_factor)
11353                 as_warn (_("register scaling is being ignored here"));
11354             }
11355         }
11356       else
11357         {
11358           /* 16-bit checks.  */
11359           if (i.disp_encoding == disp_encoding_32bit)
11360             goto bad_disp;
11361
11362           if ((i.base_reg
11363                && (!i.base_reg->reg_type.bitfield.word
11364                    || !i.base_reg->reg_type.bitfield.baseindex))
11365               || (i.index_reg
11366                   && (!i.index_reg->reg_type.bitfield.word
11367                       || !i.index_reg->reg_type.bitfield.baseindex
11368                       || !(i.base_reg
11369                            && i.base_reg->reg_num < 6
11370                            && i.index_reg->reg_num >= 6
11371                            && i.log2_scale_factor == 0))))
11372             goto bad_address;
11373         }
11374     }
11375   return 1;
11376 }
11377
11378 /* Handle vector immediates.  */
11379
11380 static int
11381 RC_SAE_immediate (const char *imm_start)
11382 {
11383   const char *pstr = imm_start;
11384
11385   if (*pstr != '{')
11386     return 0;
11387
11388   pstr = RC_SAE_specifier (pstr + 1);
11389   if (pstr == NULL)
11390     return 0;
11391
11392   if (*pstr++ != '}')
11393     {
11394       as_bad (_("Missing '}': '%s'"), imm_start);
11395       return 0;
11396     }
11397   /* RC/SAE immediate string should contain nothing more.  */;
11398   if (*pstr != 0)
11399     {
11400       as_bad (_("Junk after '}': '%s'"), imm_start);
11401       return 0;
11402     }
11403
11404   /* Internally this doesn't count as an operand.  */
11405   --i.operands;
11406
11407   return 1;
11408 }
11409
11410 /* Only string instructions can have a second memory operand, so
11411    reduce current_templates to just those if it contains any.  */
11412 static int
11413 maybe_adjust_templates (void)
11414 {
11415   const insn_template *t;
11416
11417   gas_assert (i.mem_operands == 1);
11418
11419   for (t = current_templates->start; t < current_templates->end; ++t)
11420     if (t->opcode_modifier.isstring)
11421       break;
11422
11423   if (t < current_templates->end)
11424     {
11425       static templates aux_templates;
11426       bool recheck;
11427
11428       aux_templates.start = t;
11429       for (; t < current_templates->end; ++t)
11430         if (!t->opcode_modifier.isstring)
11431           break;
11432       aux_templates.end = t;
11433
11434       /* Determine whether to re-check the first memory operand.  */
11435       recheck = (aux_templates.start != current_templates->start
11436                  || t != current_templates->end);
11437
11438       current_templates = &aux_templates;
11439
11440       if (recheck)
11441         {
11442           i.mem_operands = 0;
11443           if (i.memop1_string != NULL
11444               && i386_index_check (i.memop1_string) == 0)
11445             return 0;
11446           i.mem_operands = 1;
11447         }
11448     }
11449
11450   return 1;
11451 }
11452
11453 static INLINE bool starts_memory_operand (char c)
11454 {
11455   return ISDIGIT (c)
11456          || is_identifier_char (c)
11457          || strchr ("([\"+-!~", c);
11458 }
11459
11460 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
11461    on error.  */
11462
11463 static int
11464 i386_att_operand (char *operand_string)
11465 {
11466   const reg_entry *r;
11467   char *end_op;
11468   char *op_string = operand_string;
11469
11470   if (is_space_char (*op_string))
11471     ++op_string;
11472
11473   /* We check for an absolute prefix (differentiating,
11474      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
11475   if (*op_string == ABSOLUTE_PREFIX)
11476     {
11477       ++op_string;
11478       if (is_space_char (*op_string))
11479         ++op_string;
11480       i.jumpabsolute = true;
11481     }
11482
11483   /* Check if operand is a register.  */
11484   if ((r = parse_register (op_string, &end_op)) != NULL)
11485     {
11486       i386_operand_type temp;
11487
11488       if (r == &bad_reg)
11489         return 0;
11490
11491       /* Check for a segment override by searching for ':' after a
11492          segment register.  */
11493       op_string = end_op;
11494       if (is_space_char (*op_string))
11495         ++op_string;
11496       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
11497         {
11498           i.seg[i.mem_operands] = r;
11499
11500           /* Skip the ':' and whitespace.  */
11501           ++op_string;
11502           if (is_space_char (*op_string))
11503             ++op_string;
11504
11505           /* Handle case of %es:*foo.  */
11506           if (!i.jumpabsolute && *op_string == ABSOLUTE_PREFIX)
11507             {
11508               ++op_string;
11509               if (is_space_char (*op_string))
11510                 ++op_string;
11511               i.jumpabsolute = true;
11512             }
11513
11514           if (!starts_memory_operand (*op_string))
11515             {
11516               as_bad (_("bad memory operand `%s'"), op_string);
11517               return 0;
11518             }
11519           goto do_memory_reference;
11520         }
11521
11522       /* Handle vector operations.  */
11523       if (*op_string == '{')
11524         {
11525           op_string = check_VecOperations (op_string);
11526           if (op_string == NULL)
11527             return 0;
11528         }
11529
11530       if (*op_string)
11531         {
11532           as_bad (_("junk `%s' after register"), op_string);
11533           return 0;
11534         }
11535       temp = r->reg_type;
11536       temp.bitfield.baseindex = 0;
11537       i.types[this_operand] = operand_type_or (i.types[this_operand],
11538                                                temp);
11539       i.types[this_operand].bitfield.unspecified = 0;
11540       i.op[this_operand].regs = r;
11541       i.reg_operands++;
11542
11543       /* A GPR may follow an RC or SAE immediate only if a (vector) register
11544          operand was also present earlier on.  */
11545       if (i.rounding.type != rc_none && temp.bitfield.class == Reg
11546           && i.reg_operands == 1)
11547         {
11548           unsigned int j;
11549
11550           for (j = 0; j < ARRAY_SIZE (RC_NamesTable); ++j)
11551             if (i.rounding.type == RC_NamesTable[j].type)
11552               break;
11553           as_bad (_("`%s': misplaced `{%s}'"),
11554                   current_templates->start->name, RC_NamesTable[j].name);
11555           return 0;
11556         }
11557     }
11558   else if (*op_string == REGISTER_PREFIX)
11559     {
11560       as_bad (_("bad register name `%s'"), op_string);
11561       return 0;
11562     }
11563   else if (*op_string == IMMEDIATE_PREFIX)
11564     {
11565       ++op_string;
11566       if (i.jumpabsolute)
11567         {
11568           as_bad (_("immediate operand illegal with absolute jump"));
11569           return 0;
11570         }
11571       if (!i386_immediate (op_string))
11572         return 0;
11573       if (i.rounding.type != rc_none)
11574         {
11575           as_bad (_("`%s': RC/SAE operand must follow immediate operands"),
11576                   current_templates->start->name);
11577           return 0;
11578         }
11579     }
11580   else if (RC_SAE_immediate (operand_string))
11581     {
11582       /* If it is a RC or SAE immediate, do the necessary placement check:
11583          Only another immediate or a GPR may precede it.  */
11584       if (i.mem_operands || i.reg_operands + i.imm_operands > 1
11585           || (i.reg_operands == 1
11586               && i.op[0].regs->reg_type.bitfield.class != Reg))
11587         {
11588           as_bad (_("`%s': misplaced `%s'"),
11589                   current_templates->start->name, operand_string);
11590           return 0;
11591         }
11592     }
11593   else if (starts_memory_operand (*op_string))
11594     {
11595       /* This is a memory reference of some sort.  */
11596       char *base_string;
11597
11598       /* Start and end of displacement string expression (if found).  */
11599       char *displacement_string_start;
11600       char *displacement_string_end;
11601
11602     do_memory_reference:
11603       if (i.mem_operands == 1 && !maybe_adjust_templates ())
11604         return 0;
11605       if ((i.mem_operands == 1
11606            && !current_templates->start->opcode_modifier.isstring)
11607           || i.mem_operands == 2)
11608         {
11609           as_bad (_("too many memory references for `%s'"),
11610                   current_templates->start->name);
11611           return 0;
11612         }
11613
11614       /* Check for base index form.  We detect the base index form by
11615          looking for an ')' at the end of the operand, searching
11616          for the '(' matching it, and finding a REGISTER_PREFIX or ','
11617          after the '('.  */
11618       base_string = op_string + strlen (op_string);
11619
11620       /* Handle vector operations.  */
11621       --base_string;
11622       if (is_space_char (*base_string))
11623         --base_string;
11624
11625       if (*base_string == '}')
11626         {
11627           char *vop_start = NULL;
11628
11629           while (base_string-- > op_string)
11630             {
11631               if (*base_string == '"')
11632                 break;
11633               if (*base_string != '{')
11634                 continue;
11635
11636               vop_start = base_string;
11637
11638               --base_string;
11639               if (is_space_char (*base_string))
11640                 --base_string;
11641
11642               if (*base_string != '}')
11643                 break;
11644
11645               vop_start = NULL;
11646             }
11647
11648           if (!vop_start)
11649             {
11650               as_bad (_("unbalanced figure braces"));
11651               return 0;
11652             }
11653
11654           if (check_VecOperations (vop_start) == NULL)
11655             return 0;
11656         }
11657
11658       /* If we only have a displacement, set-up for it to be parsed later.  */
11659       displacement_string_start = op_string;
11660       displacement_string_end = base_string + 1;
11661
11662       if (*base_string == ')')
11663         {
11664           char *temp_string;
11665           unsigned int parens_not_balanced = 0;
11666           bool in_quotes = false;
11667
11668           /* We've already checked that the number of left & right ()'s are
11669              equal, and that there's a matching set of double quotes.  */
11670           end_op = base_string;
11671           for (temp_string = op_string; temp_string < end_op; temp_string++)
11672             {
11673               if (*temp_string == '\\' && temp_string[1] == '"')
11674                 ++temp_string;
11675               else if (*temp_string == '"')
11676                 in_quotes = !in_quotes;
11677               else if (!in_quotes)
11678                 {
11679                   if (*temp_string == '(' && !parens_not_balanced++)
11680                     base_string = temp_string;
11681                   if (*temp_string == ')')
11682                     --parens_not_balanced;
11683                 }
11684             }
11685
11686           temp_string = base_string;
11687
11688           /* Skip past '(' and whitespace.  */
11689           gas_assert (*base_string == '(');
11690           ++base_string;
11691           if (is_space_char (*base_string))
11692             ++base_string;
11693
11694           if (*base_string == ','
11695               || ((i.base_reg = parse_register (base_string, &end_op))
11696                   != NULL))
11697             {
11698               displacement_string_end = temp_string;
11699
11700               i.types[this_operand].bitfield.baseindex = 1;
11701
11702               if (i.base_reg)
11703                 {
11704                   if (i.base_reg == &bad_reg)
11705                     return 0;
11706                   base_string = end_op;
11707                   if (is_space_char (*base_string))
11708                     ++base_string;
11709                 }
11710
11711               /* There may be an index reg or scale factor here.  */
11712               if (*base_string == ',')
11713                 {
11714                   ++base_string;
11715                   if (is_space_char (*base_string))
11716                     ++base_string;
11717
11718                   if ((i.index_reg = parse_register (base_string, &end_op))
11719                       != NULL)
11720                     {
11721                       if (i.index_reg == &bad_reg)
11722                         return 0;
11723                       base_string = end_op;
11724                       if (is_space_char (*base_string))
11725                         ++base_string;
11726                       if (*base_string == ',')
11727                         {
11728                           ++base_string;
11729                           if (is_space_char (*base_string))
11730                             ++base_string;
11731                         }
11732                       else if (*base_string != ')')
11733                         {
11734                           as_bad (_("expecting `,' or `)' "
11735                                     "after index register in `%s'"),
11736                                   operand_string);
11737                           return 0;
11738                         }
11739                     }
11740                   else if (*base_string == REGISTER_PREFIX)
11741                     {
11742                       end_op = strchr (base_string, ',');
11743                       if (end_op)
11744                         *end_op = '\0';
11745                       as_bad (_("bad register name `%s'"), base_string);
11746                       return 0;
11747                     }
11748
11749                   /* Check for scale factor.  */
11750                   if (*base_string != ')')
11751                     {
11752                       char *end_scale = i386_scale (base_string);
11753
11754                       if (!end_scale)
11755                         return 0;
11756
11757                       base_string = end_scale;
11758                       if (is_space_char (*base_string))
11759                         ++base_string;
11760                       if (*base_string != ')')
11761                         {
11762                           as_bad (_("expecting `)' "
11763                                     "after scale factor in `%s'"),
11764                                   operand_string);
11765                           return 0;
11766                         }
11767                     }
11768                   else if (!i.index_reg)
11769                     {
11770                       as_bad (_("expecting index register or scale factor "
11771                                 "after `,'; got '%c'"),
11772                               *base_string);
11773                       return 0;
11774                     }
11775                 }
11776               else if (*base_string != ')')
11777                 {
11778                   as_bad (_("expecting `,' or `)' "
11779                             "after base register in `%s'"),
11780                           operand_string);
11781                   return 0;
11782                 }
11783             }
11784           else if (*base_string == REGISTER_PREFIX)
11785             {
11786               end_op = strchr (base_string, ',');
11787               if (end_op)
11788                 *end_op = '\0';
11789               as_bad (_("bad register name `%s'"), base_string);
11790               return 0;
11791             }
11792         }
11793
11794       /* If there's an expression beginning the operand, parse it,
11795          assuming displacement_string_start and
11796          displacement_string_end are meaningful.  */
11797       if (displacement_string_start != displacement_string_end)
11798         {
11799           if (!i386_displacement (displacement_string_start,
11800                                   displacement_string_end))
11801             return 0;
11802         }
11803
11804       /* Special case for (%dx) while doing input/output op.  */
11805       if (i.base_reg
11806           && i.base_reg->reg_type.bitfield.instance == RegD
11807           && i.base_reg->reg_type.bitfield.word
11808           && i.index_reg == 0
11809           && i.log2_scale_factor == 0
11810           && i.seg[i.mem_operands] == 0
11811           && !operand_type_check (i.types[this_operand], disp))
11812         {
11813           i.types[this_operand] = i.base_reg->reg_type;
11814           i.input_output_operand = true;
11815           return 1;
11816         }
11817
11818       if (i386_index_check (operand_string) == 0)
11819         return 0;
11820       i.flags[this_operand] |= Operand_Mem;
11821       if (i.mem_operands == 0)
11822         i.memop1_string = xstrdup (operand_string);
11823       i.mem_operands++;
11824     }
11825   else
11826     {
11827       /* It's not a memory operand; argh!  */
11828       as_bad (_("invalid char %s beginning operand %d `%s'"),
11829               output_invalid (*op_string),
11830               this_operand + 1,
11831               op_string);
11832       return 0;
11833     }
11834   return 1;                     /* Normal return.  */
11835 }
11836 \f
11837 /* Calculate the maximum variable size (i.e., excluding fr_fix)
11838    that an rs_machine_dependent frag may reach.  */
11839
11840 unsigned int
11841 i386_frag_max_var (fragS *frag)
11842 {
11843   /* The only relaxable frags are for jumps.
11844      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
11845   gas_assert (frag->fr_type == rs_machine_dependent);
11846   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
11847 }
11848
11849 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11850 static int
11851 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
11852 {
11853   /* STT_GNU_IFUNC symbol must go through PLT.  */
11854   if ((symbol_get_bfdsym (fr_symbol)->flags
11855        & BSF_GNU_INDIRECT_FUNCTION) != 0)
11856     return 0;
11857
11858   if (!S_IS_EXTERNAL (fr_symbol))
11859     /* Symbol may be weak or local.  */
11860     return !S_IS_WEAK (fr_symbol);
11861
11862   /* Global symbols with non-default visibility can't be preempted. */
11863   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
11864     return 1;
11865
11866   if (fr_var != NO_RELOC)
11867     switch ((enum bfd_reloc_code_real) fr_var)
11868       {
11869       case BFD_RELOC_386_PLT32:
11870       case BFD_RELOC_X86_64_PLT32:
11871         /* Symbol with PLT relocation may be preempted. */
11872         return 0;
11873       default:
11874         abort ();
11875       }
11876
11877   /* Global symbols with default visibility in a shared library may be
11878      preempted by another definition.  */
11879   return !shared;
11880 }
11881 #endif
11882
11883 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
11884    Note also work for Skylake and Cascadelake.
11885 ---------------------------------------------------------------------
11886 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
11887 | ------  | ----------- | ------- | -------- |
11888 |   Jo    |      N      |    N    |     Y    |
11889 |   Jno   |      N      |    N    |     Y    |
11890 |  Jc/Jb  |      Y      |    N    |     Y    |
11891 | Jae/Jnb |      Y      |    N    |     Y    |
11892 |  Je/Jz  |      Y      |    Y    |     Y    |
11893 | Jne/Jnz |      Y      |    Y    |     Y    |
11894 | Jna/Jbe |      Y      |    N    |     Y    |
11895 | Ja/Jnbe |      Y      |    N    |     Y    |
11896 |   Js    |      N      |    N    |     Y    |
11897 |   Jns   |      N      |    N    |     Y    |
11898 |  Jp/Jpe |      N      |    N    |     Y    |
11899 | Jnp/Jpo |      N      |    N    |     Y    |
11900 | Jl/Jnge |      Y      |    Y    |     Y    |
11901 | Jge/Jnl |      Y      |    Y    |     Y    |
11902 | Jle/Jng |      Y      |    Y    |     Y    |
11903 | Jg/Jnle |      Y      |    Y    |     Y    |
11904 ---------------------------------------------------------------------  */
11905 static int
11906 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
11907 {
11908   if (mf_cmp == mf_cmp_alu_cmp)
11909     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
11910             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
11911   if (mf_cmp == mf_cmp_incdec)
11912     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
11913             || mf_jcc == mf_jcc_jle);
11914   if (mf_cmp == mf_cmp_test_and)
11915     return 1;
11916   return 0;
11917 }
11918
11919 /* Return the next non-empty frag.  */
11920
11921 static fragS *
11922 i386_next_non_empty_frag (fragS *fragP)
11923 {
11924   /* There may be a frag with a ".fill 0" when there is no room in
11925      the current frag for frag_grow in output_insn.  */
11926   for (fragP = fragP->fr_next;
11927        (fragP != NULL
11928         && fragP->fr_type == rs_fill
11929         && fragP->fr_fix == 0);
11930        fragP = fragP->fr_next)
11931     ;
11932   return fragP;
11933 }
11934
11935 /* Return the next jcc frag after BRANCH_PADDING.  */
11936
11937 static fragS *
11938 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
11939 {
11940   fragS *branch_fragP;
11941   if (!pad_fragP)
11942     return NULL;
11943
11944   if (pad_fragP->fr_type == rs_machine_dependent
11945       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
11946           == BRANCH_PADDING))
11947     {
11948       branch_fragP = i386_next_non_empty_frag (pad_fragP);
11949       if (branch_fragP->fr_type != rs_machine_dependent)
11950         return NULL;
11951       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
11952           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
11953                                    pad_fragP->tc_frag_data.mf_type))
11954         return branch_fragP;
11955     }
11956
11957   return NULL;
11958 }
11959
11960 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
11961
11962 static void
11963 i386_classify_machine_dependent_frag (fragS *fragP)
11964 {
11965   fragS *cmp_fragP;
11966   fragS *pad_fragP;
11967   fragS *branch_fragP;
11968   fragS *next_fragP;
11969   unsigned int max_prefix_length;
11970
11971   if (fragP->tc_frag_data.classified)
11972     return;
11973
11974   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
11975      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
11976   for (next_fragP = fragP;
11977        next_fragP != NULL;
11978        next_fragP = next_fragP->fr_next)
11979     {
11980       next_fragP->tc_frag_data.classified = 1;
11981       if (next_fragP->fr_type == rs_machine_dependent)
11982         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
11983           {
11984           case BRANCH_PADDING:
11985             /* The BRANCH_PADDING frag must be followed by a branch
11986                frag.  */
11987             branch_fragP = i386_next_non_empty_frag (next_fragP);
11988             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
11989             break;
11990           case FUSED_JCC_PADDING:
11991             /* Check if this is a fused jcc:
11992                FUSED_JCC_PADDING
11993                CMP like instruction
11994                BRANCH_PADDING
11995                COND_JUMP
11996                */
11997             cmp_fragP = i386_next_non_empty_frag (next_fragP);
11998             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
11999             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
12000             if (branch_fragP)
12001               {
12002                 /* The BRANCH_PADDING frag is merged with the
12003                    FUSED_JCC_PADDING frag.  */
12004                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
12005                 /* CMP like instruction size.  */
12006                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
12007                 frag_wane (pad_fragP);
12008                 /* Skip to branch_fragP.  */
12009                 next_fragP = branch_fragP;
12010               }
12011             else if (next_fragP->tc_frag_data.max_prefix_length)
12012               {
12013                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
12014                    a fused jcc.  */
12015                 next_fragP->fr_subtype
12016                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
12017                 next_fragP->tc_frag_data.max_bytes
12018                   = next_fragP->tc_frag_data.max_prefix_length;
12019                 /* This will be updated in the BRANCH_PREFIX scan.  */
12020                 next_fragP->tc_frag_data.max_prefix_length = 0;
12021               }
12022             else
12023               frag_wane (next_fragP);
12024             break;
12025           }
12026     }
12027
12028   /* Stop if there is no BRANCH_PREFIX.  */
12029   if (!align_branch_prefix_size)
12030     return;
12031
12032   /* Scan for BRANCH_PREFIX.  */
12033   for (; fragP != NULL; fragP = fragP->fr_next)
12034     {
12035       if (fragP->fr_type != rs_machine_dependent
12036           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12037               != BRANCH_PREFIX))
12038         continue;
12039
12040       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
12041          COND_JUMP_PREFIX.  */
12042       max_prefix_length = 0;
12043       for (next_fragP = fragP;
12044            next_fragP != NULL;
12045            next_fragP = next_fragP->fr_next)
12046         {
12047           if (next_fragP->fr_type == rs_fill)
12048             /* Skip rs_fill frags.  */
12049             continue;
12050           else if (next_fragP->fr_type != rs_machine_dependent)
12051             /* Stop for all other frags.  */
12052             break;
12053
12054           /* rs_machine_dependent frags.  */
12055           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12056               == BRANCH_PREFIX)
12057             {
12058               /* Count BRANCH_PREFIX frags.  */
12059               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
12060                 {
12061                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
12062                   frag_wane (next_fragP);
12063                 }
12064               else
12065                 max_prefix_length
12066                   += next_fragP->tc_frag_data.max_bytes;
12067             }
12068           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12069                     == BRANCH_PADDING)
12070                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12071                        == FUSED_JCC_PADDING))
12072             {
12073               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
12074               fragP->tc_frag_data.u.padding_fragP = next_fragP;
12075               break;
12076             }
12077           else
12078             /* Stop for other rs_machine_dependent frags.  */
12079             break;
12080         }
12081
12082       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
12083
12084       /* Skip to the next frag.  */
12085       fragP = next_fragP;
12086     }
12087 }
12088
12089 /* Compute padding size for
12090
12091         FUSED_JCC_PADDING
12092         CMP like instruction
12093         BRANCH_PADDING
12094         COND_JUMP/UNCOND_JUMP
12095
12096    or
12097
12098         BRANCH_PADDING
12099         COND_JUMP/UNCOND_JUMP
12100  */
12101
12102 static int
12103 i386_branch_padding_size (fragS *fragP, offsetT address)
12104 {
12105   unsigned int offset, size, padding_size;
12106   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
12107
12108   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
12109   if (!address)
12110     address = fragP->fr_address;
12111   address += fragP->fr_fix;
12112
12113   /* CMP like instrunction size.  */
12114   size = fragP->tc_frag_data.cmp_size;
12115
12116   /* The base size of the branch frag.  */
12117   size += branch_fragP->fr_fix;
12118
12119   /* Add opcode and displacement bytes for the rs_machine_dependent
12120      branch frag.  */
12121   if (branch_fragP->fr_type == rs_machine_dependent)
12122     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
12123
12124   /* Check if branch is within boundary and doesn't end at the last
12125      byte.  */
12126   offset = address & ((1U << align_branch_power) - 1);
12127   if ((offset + size) >= (1U << align_branch_power))
12128     /* Padding needed to avoid crossing boundary.  */
12129     padding_size = (1U << align_branch_power) - offset;
12130   else
12131     /* No padding needed.  */
12132     padding_size = 0;
12133
12134   /* The return value may be saved in tc_frag_data.length which is
12135      unsigned byte.  */
12136   if (!fits_in_unsigned_byte (padding_size))
12137     abort ();
12138
12139   return padding_size;
12140 }
12141
12142 /* i386_generic_table_relax_frag()
12143
12144    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
12145    grow/shrink padding to align branch frags.  Hand others to
12146    relax_frag().  */
12147
12148 long
12149 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
12150 {
12151   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12152       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12153     {
12154       long padding_size = i386_branch_padding_size (fragP, 0);
12155       long grow = padding_size - fragP->tc_frag_data.length;
12156
12157       /* When the BRANCH_PREFIX frag is used, the computed address
12158          must match the actual address and there should be no padding.  */
12159       if (fragP->tc_frag_data.padding_address
12160           && (fragP->tc_frag_data.padding_address != fragP->fr_address
12161               || padding_size))
12162         abort ();
12163
12164       /* Update the padding size.  */
12165       if (grow)
12166         fragP->tc_frag_data.length = padding_size;
12167
12168       return grow;
12169     }
12170   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12171     {
12172       fragS *padding_fragP, *next_fragP;
12173       long padding_size, left_size, last_size;
12174
12175       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12176       if (!padding_fragP)
12177         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
12178         return (fragP->tc_frag_data.length
12179                 - fragP->tc_frag_data.last_length);
12180
12181       /* Compute the relative address of the padding frag in the very
12182         first time where the BRANCH_PREFIX frag sizes are zero.  */
12183       if (!fragP->tc_frag_data.padding_address)
12184         fragP->tc_frag_data.padding_address
12185           = padding_fragP->fr_address - (fragP->fr_address - stretch);
12186
12187       /* First update the last length from the previous interation.  */
12188       left_size = fragP->tc_frag_data.prefix_length;
12189       for (next_fragP = fragP;
12190            next_fragP != padding_fragP;
12191            next_fragP = next_fragP->fr_next)
12192         if (next_fragP->fr_type == rs_machine_dependent
12193             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12194                 == BRANCH_PREFIX))
12195           {
12196             if (left_size)
12197               {
12198                 int max = next_fragP->tc_frag_data.max_bytes;
12199                 if (max)
12200                   {
12201                     int size;
12202                     if (max > left_size)
12203                       size = left_size;
12204                     else
12205                       size = max;
12206                     left_size -= size;
12207                     next_fragP->tc_frag_data.last_length = size;
12208                   }
12209               }
12210             else
12211               next_fragP->tc_frag_data.last_length = 0;
12212           }
12213
12214       /* Check the padding size for the padding frag.  */
12215       padding_size = i386_branch_padding_size
12216         (padding_fragP, (fragP->fr_address
12217                          + fragP->tc_frag_data.padding_address));
12218
12219       last_size = fragP->tc_frag_data.prefix_length;
12220       /* Check if there is change from the last interation.  */
12221       if (padding_size == last_size)
12222         {
12223           /* Update the expected address of the padding frag.  */
12224           padding_fragP->tc_frag_data.padding_address
12225             = (fragP->fr_address + padding_size
12226                + fragP->tc_frag_data.padding_address);
12227           return 0;
12228         }
12229
12230       if (padding_size > fragP->tc_frag_data.max_prefix_length)
12231         {
12232           /* No padding if there is no sufficient room.  Clear the
12233              expected address of the padding frag.  */
12234           padding_fragP->tc_frag_data.padding_address = 0;
12235           padding_size = 0;
12236         }
12237       else
12238         /* Store the expected address of the padding frag.  */
12239         padding_fragP->tc_frag_data.padding_address
12240           = (fragP->fr_address + padding_size
12241              + fragP->tc_frag_data.padding_address);
12242
12243       fragP->tc_frag_data.prefix_length = padding_size;
12244
12245       /* Update the length for the current interation.  */
12246       left_size = padding_size;
12247       for (next_fragP = fragP;
12248            next_fragP != padding_fragP;
12249            next_fragP = next_fragP->fr_next)
12250         if (next_fragP->fr_type == rs_machine_dependent
12251             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12252                 == BRANCH_PREFIX))
12253           {
12254             if (left_size)
12255               {
12256                 int max = next_fragP->tc_frag_data.max_bytes;
12257                 if (max)
12258                   {
12259                     int size;
12260                     if (max > left_size)
12261                       size = left_size;
12262                     else
12263                       size = max;
12264                     left_size -= size;
12265                     next_fragP->tc_frag_data.length = size;
12266                   }
12267               }
12268             else
12269               next_fragP->tc_frag_data.length = 0;
12270           }
12271
12272       return (fragP->tc_frag_data.length
12273               - fragP->tc_frag_data.last_length);
12274     }
12275   return relax_frag (segment, fragP, stretch);
12276 }
12277
12278 /* md_estimate_size_before_relax()
12279
12280    Called just before relax() for rs_machine_dependent frags.  The x86
12281    assembler uses these frags to handle variable size jump
12282    instructions.
12283
12284    Any symbol that is now undefined will not become defined.
12285    Return the correct fr_subtype in the frag.
12286    Return the initial "guess for variable size of frag" to caller.
12287    The guess is actually the growth beyond the fixed part.  Whatever
12288    we do to grow the fixed or variable part contributes to our
12289    returned value.  */
12290
12291 int
12292 md_estimate_size_before_relax (fragS *fragP, segT segment)
12293 {
12294   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12295       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
12296       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12297     {
12298       i386_classify_machine_dependent_frag (fragP);
12299       return fragP->tc_frag_data.length;
12300     }
12301
12302   /* We've already got fragP->fr_subtype right;  all we have to do is
12303      check for un-relaxable symbols.  On an ELF system, we can't relax
12304      an externally visible symbol, because it may be overridden by a
12305      shared library.  */
12306   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
12307 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12308       || (IS_ELF
12309           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
12310                                                 fragP->fr_var))
12311 #endif
12312 #if defined (OBJ_COFF) && defined (TE_PE)
12313       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
12314           && S_IS_WEAK (fragP->fr_symbol))
12315 #endif
12316       )
12317     {
12318       /* Symbol is undefined in this segment, or we need to keep a
12319          reloc so that weak symbols can be overridden.  */
12320       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
12321       enum bfd_reloc_code_real reloc_type;
12322       unsigned char *opcode;
12323       int old_fr_fix;
12324       fixS *fixP = NULL;
12325
12326       if (fragP->fr_var != NO_RELOC)
12327         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
12328       else if (size == 2)
12329         reloc_type = BFD_RELOC_16_PCREL;
12330 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12331       else if (fragP->tc_frag_data.code64 && fragP->fr_offset == 0
12332                && need_plt32_p (fragP->fr_symbol))
12333         reloc_type = BFD_RELOC_X86_64_PLT32;
12334 #endif
12335       else
12336         reloc_type = BFD_RELOC_32_PCREL;
12337
12338       old_fr_fix = fragP->fr_fix;
12339       opcode = (unsigned char *) fragP->fr_opcode;
12340
12341       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
12342         {
12343         case UNCOND_JUMP:
12344           /* Make jmp (0xeb) a (d)word displacement jump.  */
12345           opcode[0] = 0xe9;
12346           fragP->fr_fix += size;
12347           fixP = fix_new (fragP, old_fr_fix, size,
12348                           fragP->fr_symbol,
12349                           fragP->fr_offset, 1,
12350                           reloc_type);
12351           break;
12352
12353         case COND_JUMP86:
12354           if (size == 2
12355               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
12356             {
12357               /* Negate the condition, and branch past an
12358                  unconditional jump.  */
12359               opcode[0] ^= 1;
12360               opcode[1] = 3;
12361               /* Insert an unconditional jump.  */
12362               opcode[2] = 0xe9;
12363               /* We added two extra opcode bytes, and have a two byte
12364                  offset.  */
12365               fragP->fr_fix += 2 + 2;
12366               fix_new (fragP, old_fr_fix + 2, 2,
12367                        fragP->fr_symbol,
12368                        fragP->fr_offset, 1,
12369                        reloc_type);
12370               break;
12371             }
12372           /* Fall through.  */
12373
12374         case COND_JUMP:
12375           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
12376             {
12377               fragP->fr_fix += 1;
12378               fixP = fix_new (fragP, old_fr_fix, 1,
12379                               fragP->fr_symbol,
12380                               fragP->fr_offset, 1,
12381                               BFD_RELOC_8_PCREL);
12382               fixP->fx_signed = 1;
12383               break;
12384             }
12385
12386           /* This changes the byte-displacement jump 0x7N
12387              to the (d)word-displacement jump 0x0f,0x8N.  */
12388           opcode[1] = opcode[0] + 0x10;
12389           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12390           /* We've added an opcode byte.  */
12391           fragP->fr_fix += 1 + size;
12392           fixP = fix_new (fragP, old_fr_fix + 1, size,
12393                           fragP->fr_symbol,
12394                           fragP->fr_offset, 1,
12395                           reloc_type);
12396           break;
12397
12398         default:
12399           BAD_CASE (fragP->fr_subtype);
12400           break;
12401         }
12402
12403       /* All jumps handled here are signed, but don't unconditionally use a
12404          signed limit check for 32 and 16 bit jumps as we want to allow wrap
12405          around at 4G (outside of 64-bit mode) and 64k.  */
12406       if (size == 4 && flag_code == CODE_64BIT)
12407         fixP->fx_signed = 1;
12408
12409       frag_wane (fragP);
12410       return fragP->fr_fix - old_fr_fix;
12411     }
12412
12413   /* Guess size depending on current relax state.  Initially the relax
12414      state will correspond to a short jump and we return 1, because
12415      the variable part of the frag (the branch offset) is one byte
12416      long.  However, we can relax a section more than once and in that
12417      case we must either set fr_subtype back to the unrelaxed state,
12418      or return the value for the appropriate branch.  */
12419   return md_relax_table[fragP->fr_subtype].rlx_length;
12420 }
12421
12422 /* Called after relax() is finished.
12423
12424    In:  Address of frag.
12425         fr_type == rs_machine_dependent.
12426         fr_subtype is what the address relaxed to.
12427
12428    Out: Any fixSs and constants are set up.
12429         Caller will turn frag into a ".space 0".  */
12430
12431 void
12432 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
12433                  fragS *fragP)
12434 {
12435   unsigned char *opcode;
12436   unsigned char *where_to_put_displacement = NULL;
12437   offsetT target_address;
12438   offsetT opcode_address;
12439   unsigned int extension = 0;
12440   offsetT displacement_from_opcode_start;
12441
12442   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12443       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
12444       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12445     {
12446       /* Generate nop padding.  */
12447       unsigned int size = fragP->tc_frag_data.length;
12448       if (size)
12449         {
12450           if (size > fragP->tc_frag_data.max_bytes)
12451             abort ();
12452
12453           if (flag_debug)
12454             {
12455               const char *msg;
12456               const char *branch = "branch";
12457               const char *prefix = "";
12458               fragS *padding_fragP;
12459               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12460                   == BRANCH_PREFIX)
12461                 {
12462                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12463                   switch (fragP->tc_frag_data.default_prefix)
12464                     {
12465                     default:
12466                       abort ();
12467                       break;
12468                     case CS_PREFIX_OPCODE:
12469                       prefix = " cs";
12470                       break;
12471                     case DS_PREFIX_OPCODE:
12472                       prefix = " ds";
12473                       break;
12474                     case ES_PREFIX_OPCODE:
12475                       prefix = " es";
12476                       break;
12477                     case FS_PREFIX_OPCODE:
12478                       prefix = " fs";
12479                       break;
12480                     case GS_PREFIX_OPCODE:
12481                       prefix = " gs";
12482                       break;
12483                     case SS_PREFIX_OPCODE:
12484                       prefix = " ss";
12485                       break;
12486                     }
12487                   if (padding_fragP)
12488                     msg = _("%s:%u: add %d%s at 0x%llx to align "
12489                             "%s within %d-byte boundary\n");
12490                   else
12491                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
12492                             "align %s within %d-byte boundary\n");
12493                 }
12494               else
12495                 {
12496                   padding_fragP = fragP;
12497                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
12498                           "%s within %d-byte boundary\n");
12499                 }
12500
12501               if (padding_fragP)
12502                 switch (padding_fragP->tc_frag_data.branch_type)
12503                   {
12504                   case align_branch_jcc:
12505                     branch = "jcc";
12506                     break;
12507                   case align_branch_fused:
12508                     branch = "fused jcc";
12509                     break;
12510                   case align_branch_jmp:
12511                     branch = "jmp";
12512                     break;
12513                   case align_branch_call:
12514                     branch = "call";
12515                     break;
12516                   case align_branch_indirect:
12517                     branch = "indiret branch";
12518                     break;
12519                   case align_branch_ret:
12520                     branch = "ret";
12521                     break;
12522                   default:
12523                     break;
12524                   }
12525
12526               fprintf (stdout, msg,
12527                        fragP->fr_file, fragP->fr_line, size, prefix,
12528                        (long long) fragP->fr_address, branch,
12529                        1 << align_branch_power);
12530             }
12531           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12532             memset (fragP->fr_opcode,
12533                     fragP->tc_frag_data.default_prefix, size);
12534           else
12535             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
12536                                 size, 0);
12537           fragP->fr_fix += size;
12538         }
12539       return;
12540     }
12541
12542   opcode = (unsigned char *) fragP->fr_opcode;
12543
12544   /* Address we want to reach in file space.  */
12545   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
12546
12547   /* Address opcode resides at in file space.  */
12548   opcode_address = fragP->fr_address + fragP->fr_fix;
12549
12550   /* Displacement from opcode start to fill into instruction.  */
12551   displacement_from_opcode_start = target_address - opcode_address;
12552
12553   if ((fragP->fr_subtype & BIG) == 0)
12554     {
12555       /* Don't have to change opcode.  */
12556       extension = 1;            /* 1 opcode + 1 displacement  */
12557       where_to_put_displacement = &opcode[1];
12558     }
12559   else
12560     {
12561       if (no_cond_jump_promotion
12562           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
12563         as_warn_where (fragP->fr_file, fragP->fr_line,
12564                        _("long jump required"));
12565
12566       switch (fragP->fr_subtype)
12567         {
12568         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
12569           extension = 4;                /* 1 opcode + 4 displacement  */
12570           opcode[0] = 0xe9;
12571           where_to_put_displacement = &opcode[1];
12572           break;
12573
12574         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
12575           extension = 2;                /* 1 opcode + 2 displacement  */
12576           opcode[0] = 0xe9;
12577           where_to_put_displacement = &opcode[1];
12578           break;
12579
12580         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
12581         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
12582           extension = 5;                /* 2 opcode + 4 displacement  */
12583           opcode[1] = opcode[0] + 0x10;
12584           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12585           where_to_put_displacement = &opcode[2];
12586           break;
12587
12588         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
12589           extension = 3;                /* 2 opcode + 2 displacement  */
12590           opcode[1] = opcode[0] + 0x10;
12591           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12592           where_to_put_displacement = &opcode[2];
12593           break;
12594
12595         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
12596           extension = 4;
12597           opcode[0] ^= 1;
12598           opcode[1] = 3;
12599           opcode[2] = 0xe9;
12600           where_to_put_displacement = &opcode[3];
12601           break;
12602
12603         default:
12604           BAD_CASE (fragP->fr_subtype);
12605           break;
12606         }
12607     }
12608
12609   /* If size if less then four we are sure that the operand fits,
12610      but if it's 4, then it could be that the displacement is larger
12611      then -/+ 2GB.  */
12612   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
12613       && object_64bit
12614       && ((addressT) (displacement_from_opcode_start - extension
12615                       + ((addressT) 1 << 31))
12616           > (((addressT) 2 << 31) - 1)))
12617     {
12618       as_bad_where (fragP->fr_file, fragP->fr_line,
12619                     _("jump target out of range"));
12620       /* Make us emit 0.  */
12621       displacement_from_opcode_start = extension;
12622     }
12623   /* Now put displacement after opcode.  */
12624   md_number_to_chars ((char *) where_to_put_displacement,
12625                       (valueT) (displacement_from_opcode_start - extension),
12626                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
12627   fragP->fr_fix += extension;
12628 }
12629 \f
12630 /* Apply a fixup (fixP) to segment data, once it has been determined
12631    by our caller that we have all the info we need to fix it up.
12632
12633    Parameter valP is the pointer to the value of the bits.
12634
12635    On the 386, immediates, displacements, and data pointers are all in
12636    the same (little-endian) format, so we don't need to care about which
12637    we are handling.  */
12638
12639 void
12640 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
12641 {
12642   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
12643   valueT value = *valP;
12644
12645 #if !defined (TE_Mach)
12646   if (fixP->fx_pcrel)
12647     {
12648       switch (fixP->fx_r_type)
12649         {
12650         default:
12651           break;
12652
12653         case BFD_RELOC_64:
12654           fixP->fx_r_type = BFD_RELOC_64_PCREL;
12655           break;
12656         case BFD_RELOC_32:
12657         case BFD_RELOC_X86_64_32S:
12658           fixP->fx_r_type = BFD_RELOC_32_PCREL;
12659           break;
12660         case BFD_RELOC_16:
12661           fixP->fx_r_type = BFD_RELOC_16_PCREL;
12662           break;
12663         case BFD_RELOC_8:
12664           fixP->fx_r_type = BFD_RELOC_8_PCREL;
12665           break;
12666         }
12667     }
12668
12669   if (fixP->fx_addsy != NULL
12670       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
12671           || fixP->fx_r_type == BFD_RELOC_64_PCREL
12672           || fixP->fx_r_type == BFD_RELOC_16_PCREL
12673           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
12674       && !use_rela_relocations)
12675     {
12676       /* This is a hack.  There should be a better way to handle this.
12677          This covers for the fact that bfd_install_relocation will
12678          subtract the current location (for partial_inplace, PC relative
12679          relocations); see more below.  */
12680 #ifndef OBJ_AOUT
12681       if (IS_ELF
12682 #ifdef TE_PE
12683           || OUTPUT_FLAVOR == bfd_target_coff_flavour
12684 #endif
12685           )
12686         value += fixP->fx_where + fixP->fx_frag->fr_address;
12687 #endif
12688 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12689       if (IS_ELF)
12690         {
12691           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
12692
12693           if ((sym_seg == seg
12694                || (symbol_section_p (fixP->fx_addsy)
12695                    && sym_seg != absolute_section))
12696               && !generic_force_reloc (fixP))
12697             {
12698               /* Yes, we add the values in twice.  This is because
12699                  bfd_install_relocation subtracts them out again.  I think
12700                  bfd_install_relocation is broken, but I don't dare change
12701                  it.  FIXME.  */
12702               value += fixP->fx_where + fixP->fx_frag->fr_address;
12703             }
12704         }
12705 #endif
12706 #if defined (OBJ_COFF) && defined (TE_PE)
12707       /* For some reason, the PE format does not store a
12708          section address offset for a PC relative symbol.  */
12709       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
12710           || S_IS_WEAK (fixP->fx_addsy))
12711         value += md_pcrel_from (fixP);
12712 #endif
12713     }
12714 #if defined (OBJ_COFF) && defined (TE_PE)
12715   if (fixP->fx_addsy != NULL
12716       && S_IS_WEAK (fixP->fx_addsy)
12717       /* PR 16858: Do not modify weak function references.  */
12718       && ! fixP->fx_pcrel)
12719     {
12720 #if !defined (TE_PEP)
12721       /* For x86 PE weak function symbols are neither PC-relative
12722          nor do they set S_IS_FUNCTION.  So the only reliable way
12723          to detect them is to check the flags of their containing
12724          section.  */
12725       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
12726           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
12727         ;
12728       else
12729 #endif
12730       value -= S_GET_VALUE (fixP->fx_addsy);
12731     }
12732 #endif
12733
12734   /* Fix a few things - the dynamic linker expects certain values here,
12735      and we must not disappoint it.  */
12736 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12737   if (IS_ELF && fixP->fx_addsy)
12738     switch (fixP->fx_r_type)
12739       {
12740       case BFD_RELOC_386_PLT32:
12741       case BFD_RELOC_X86_64_PLT32:
12742         /* Make the jump instruction point to the address of the operand.
12743            At runtime we merely add the offset to the actual PLT entry.
12744            NB: Subtract the offset size only for jump instructions.  */
12745         if (fixP->fx_pcrel)
12746           value = -4;
12747         break;
12748
12749       case BFD_RELOC_386_TLS_GD:
12750       case BFD_RELOC_386_TLS_LDM:
12751       case BFD_RELOC_386_TLS_IE_32:
12752       case BFD_RELOC_386_TLS_IE:
12753       case BFD_RELOC_386_TLS_GOTIE:
12754       case BFD_RELOC_386_TLS_GOTDESC:
12755       case BFD_RELOC_X86_64_TLSGD:
12756       case BFD_RELOC_X86_64_TLSLD:
12757       case BFD_RELOC_X86_64_GOTTPOFF:
12758       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
12759         value = 0; /* Fully resolved at runtime.  No addend.  */
12760         /* Fallthrough */
12761       case BFD_RELOC_386_TLS_LE:
12762       case BFD_RELOC_386_TLS_LDO_32:
12763       case BFD_RELOC_386_TLS_LE_32:
12764       case BFD_RELOC_X86_64_DTPOFF32:
12765       case BFD_RELOC_X86_64_DTPOFF64:
12766       case BFD_RELOC_X86_64_TPOFF32:
12767       case BFD_RELOC_X86_64_TPOFF64:
12768         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12769         break;
12770
12771       case BFD_RELOC_386_TLS_DESC_CALL:
12772       case BFD_RELOC_X86_64_TLSDESC_CALL:
12773         value = 0; /* Fully resolved at runtime.  No addend.  */
12774         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12775         fixP->fx_done = 0;
12776         return;
12777
12778       case BFD_RELOC_VTABLE_INHERIT:
12779       case BFD_RELOC_VTABLE_ENTRY:
12780         fixP->fx_done = 0;
12781         return;
12782
12783       default:
12784         break;
12785       }
12786 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
12787
12788   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
12789   if (!object_64bit)
12790     value = extend_to_32bit_address (value);
12791
12792   *valP = value;
12793 #endif /* !defined (TE_Mach)  */
12794
12795   /* Are we finished with this relocation now?  */
12796   if (fixP->fx_addsy == NULL)
12797     {
12798       fixP->fx_done = 1;
12799       switch (fixP->fx_r_type)
12800         {
12801         case BFD_RELOC_X86_64_32S:
12802           fixP->fx_signed = 1;
12803           break;
12804
12805         default:
12806           break;
12807         }
12808     }
12809 #if defined (OBJ_COFF) && defined (TE_PE)
12810   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
12811     {
12812       fixP->fx_done = 0;
12813       /* Remember value for tc_gen_reloc.  */
12814       fixP->fx_addnumber = value;
12815       /* Clear out the frag for now.  */
12816       value = 0;
12817     }
12818 #endif
12819   else if (use_rela_relocations)
12820     {
12821       if (!disallow_64bit_reloc || fixP->fx_r_type == NO_RELOC)
12822         fixP->fx_no_overflow = 1;
12823       /* Remember value for tc_gen_reloc.  */
12824       fixP->fx_addnumber = value;
12825       value = 0;
12826     }
12827
12828   md_number_to_chars (p, value, fixP->fx_size);
12829 }
12830 \f
12831 const char *
12832 md_atof (int type, char *litP, int *sizeP)
12833 {
12834   /* This outputs the LITTLENUMs in REVERSE order;
12835      in accord with the bigendian 386.  */
12836   return ieee_md_atof (type, litP, sizeP, false);
12837 }
12838 \f
12839 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
12840
12841 static char *
12842 output_invalid (int c)
12843 {
12844   if (ISPRINT (c))
12845     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12846               "'%c'", c);
12847   else
12848     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12849               "(0x%x)", (unsigned char) c);
12850   return output_invalid_buf;
12851 }
12852
12853 /* Verify that @r can be used in the current context.  */
12854
12855 static bool check_register (const reg_entry *r)
12856 {
12857   if (allow_pseudo_reg)
12858     return true;
12859
12860   if (operand_type_all_zero (&r->reg_type))
12861     return false;
12862
12863   if ((r->reg_type.bitfield.dword
12864        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
12865        || r->reg_type.bitfield.class == RegCR
12866        || r->reg_type.bitfield.class == RegDR)
12867       && !cpu_arch_flags.bitfield.cpui386)
12868     return false;
12869
12870   if (r->reg_type.bitfield.class == RegTR
12871       && (flag_code == CODE_64BIT
12872           || !cpu_arch_flags.bitfield.cpui386
12873           || cpu_arch_isa_flags.bitfield.cpui586
12874           || cpu_arch_isa_flags.bitfield.cpui686))
12875     return false;
12876
12877   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
12878     return false;
12879
12880   if (!cpu_arch_flags.bitfield.cpuavx512f)
12881     {
12882       if (r->reg_type.bitfield.zmmword
12883           || r->reg_type.bitfield.class == RegMask)
12884         return false;
12885
12886       if (!cpu_arch_flags.bitfield.cpuavx)
12887         {
12888           if (r->reg_type.bitfield.ymmword)
12889             return false;
12890
12891           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
12892             return false;
12893         }
12894     }
12895
12896   if (r->reg_type.bitfield.tmmword
12897       && (!cpu_arch_flags.bitfield.cpuamx_tile
12898           || flag_code != CODE_64BIT))
12899     return false;
12900
12901   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
12902     return false;
12903
12904   /* Don't allow fake index register unless allow_index_reg isn't 0. */
12905   if (!allow_index_reg && r->reg_num == RegIZ)
12906     return false;
12907
12908   /* Upper 16 vector registers are only available with VREX in 64bit
12909      mode, and require EVEX encoding.  */
12910   if (r->reg_flags & RegVRex)
12911     {
12912       if (!cpu_arch_flags.bitfield.cpuavx512f
12913           || flag_code != CODE_64BIT)
12914         return false;
12915
12916       if (i.vec_encoding == vex_encoding_default)
12917         i.vec_encoding = vex_encoding_evex;
12918       else if (i.vec_encoding != vex_encoding_evex)
12919         i.vec_encoding = vex_encoding_error;
12920     }
12921
12922   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
12923       && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
12924       && flag_code != CODE_64BIT)
12925     return false;
12926
12927   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
12928       && !intel_syntax)
12929     return false;
12930
12931   return true;
12932 }
12933
12934 /* REG_STRING starts *before* REGISTER_PREFIX.  */
12935
12936 static const reg_entry *
12937 parse_real_register (char *reg_string, char **end_op)
12938 {
12939   char *s = reg_string;
12940   char *p;
12941   char reg_name_given[MAX_REG_NAME_SIZE + 1];
12942   const reg_entry *r;
12943
12944   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
12945   if (*s == REGISTER_PREFIX)
12946     ++s;
12947
12948   if (is_space_char (*s))
12949     ++s;
12950
12951   p = reg_name_given;
12952   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
12953     {
12954       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
12955         return (const reg_entry *) NULL;
12956       s++;
12957     }
12958
12959   /* For naked regs, make sure that we are not dealing with an identifier.
12960      This prevents confusing an identifier like `eax_var' with register
12961      `eax'.  */
12962   if (allow_naked_reg && identifier_chars[(unsigned char) *s])
12963     return (const reg_entry *) NULL;
12964
12965   *end_op = s;
12966
12967   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
12968
12969   /* Handle floating point regs, allowing spaces in the (i) part.  */
12970   if (r == reg_st0)
12971     {
12972       if (!cpu_arch_flags.bitfield.cpu8087
12973           && !cpu_arch_flags.bitfield.cpu287
12974           && !cpu_arch_flags.bitfield.cpu387
12975           && !allow_pseudo_reg)
12976         return (const reg_entry *) NULL;
12977
12978       if (is_space_char (*s))
12979         ++s;
12980       if (*s == '(')
12981         {
12982           ++s;
12983           if (is_space_char (*s))
12984             ++s;
12985           if (*s >= '0' && *s <= '7')
12986             {
12987               int fpr = *s - '0';
12988               ++s;
12989               if (is_space_char (*s))
12990                 ++s;
12991               if (*s == ')')
12992                 {
12993                   *end_op = s + 1;
12994                   know (r[fpr].reg_num == fpr);
12995                   return r + fpr;
12996                 }
12997             }
12998           /* We have "%st(" then garbage.  */
12999           return (const reg_entry *) NULL;
13000         }
13001     }
13002
13003   return r && check_register (r) ? r : NULL;
13004 }
13005
13006 /* REG_STRING starts *before* REGISTER_PREFIX.  */
13007
13008 static const reg_entry *
13009 parse_register (char *reg_string, char **end_op)
13010 {
13011   const reg_entry *r;
13012
13013   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
13014     r = parse_real_register (reg_string, end_op);
13015   else
13016     r = NULL;
13017   if (!r)
13018     {
13019       char *save = input_line_pointer;
13020       char c;
13021       symbolS *symbolP;
13022
13023       input_line_pointer = reg_string;
13024       c = get_symbol_name (&reg_string);
13025       symbolP = symbol_find (reg_string);
13026       while (symbolP && S_GET_SEGMENT (symbolP) != reg_section)
13027         {
13028           const expressionS *e = symbol_get_value_expression(symbolP);
13029
13030           if (e->X_op != O_symbol || e->X_add_number)
13031             break;
13032           symbolP = e->X_add_symbol;
13033         }
13034       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
13035         {
13036           const expressionS *e = symbol_get_value_expression (symbolP);
13037
13038           know (e->X_op == O_register);
13039           know (e->X_add_number >= 0
13040                 && (valueT) e->X_add_number < i386_regtab_size);
13041           r = i386_regtab + e->X_add_number;
13042           if (!check_register (r))
13043             {
13044               as_bad (_("register '%s%s' cannot be used here"),
13045                       register_prefix, r->reg_name);
13046               r = &bad_reg;
13047             }
13048           *end_op = input_line_pointer;
13049         }
13050       *input_line_pointer = c;
13051       input_line_pointer = save;
13052     }
13053   return r;
13054 }
13055
13056 int
13057 i386_parse_name (char *name, expressionS *e, char *nextcharP)
13058 {
13059   const reg_entry *r = NULL;
13060   char *end = input_line_pointer;
13061
13062   *end = *nextcharP;
13063   if (*name == REGISTER_PREFIX || allow_naked_reg)
13064     r = parse_real_register (name, &input_line_pointer);
13065   if (r && end <= input_line_pointer)
13066     {
13067       *nextcharP = *input_line_pointer;
13068       *input_line_pointer = 0;
13069       if (r != &bad_reg)
13070         {
13071           e->X_op = O_register;
13072           e->X_add_number = r - i386_regtab;
13073         }
13074       else
13075           e->X_op = O_illegal;
13076       return 1;
13077     }
13078   input_line_pointer = end;
13079   *end = 0;
13080   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
13081 }
13082
13083 void
13084 md_operand (expressionS *e)
13085 {
13086   char *end;
13087   const reg_entry *r;
13088
13089   switch (*input_line_pointer)
13090     {
13091     case REGISTER_PREFIX:
13092       r = parse_real_register (input_line_pointer, &end);
13093       if (r)
13094         {
13095           e->X_op = O_register;
13096           e->X_add_number = r - i386_regtab;
13097           input_line_pointer = end;
13098         }
13099       break;
13100
13101     case '[':
13102       gas_assert (intel_syntax);
13103       end = input_line_pointer++;
13104       expression (e);
13105       if (*input_line_pointer == ']')
13106         {
13107           ++input_line_pointer;
13108           e->X_op_symbol = make_expr_symbol (e);
13109           e->X_add_symbol = NULL;
13110           e->X_add_number = 0;
13111           e->X_op = O_index;
13112         }
13113       else
13114         {
13115           e->X_op = O_absent;
13116           input_line_pointer = end;
13117         }
13118       break;
13119     }
13120 }
13121
13122 \f
13123 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13124 const char *md_shortopts = "kVQ:sqnO::";
13125 #else
13126 const char *md_shortopts = "qnO::";
13127 #endif
13128
13129 #define OPTION_32 (OPTION_MD_BASE + 0)
13130 #define OPTION_64 (OPTION_MD_BASE + 1)
13131 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
13132 #define OPTION_MARCH (OPTION_MD_BASE + 3)
13133 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
13134 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
13135 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
13136 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
13137 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
13138 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
13139 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
13140 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
13141 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
13142 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
13143 #define OPTION_X32 (OPTION_MD_BASE + 14)
13144 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
13145 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
13146 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
13147 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
13148 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
13149 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
13150 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
13151 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
13152 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
13153 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
13154 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
13155 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
13156 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
13157 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
13158 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
13159 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
13160 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
13161 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
13162 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
13163 #define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34)
13164
13165 struct option md_longopts[] =
13166 {
13167   {"32", no_argument, NULL, OPTION_32},
13168 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13169      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13170   {"64", no_argument, NULL, OPTION_64},
13171 #endif
13172 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13173   {"x32", no_argument, NULL, OPTION_X32},
13174   {"mshared", no_argument, NULL, OPTION_MSHARED},
13175   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
13176 #endif
13177   {"divide", no_argument, NULL, OPTION_DIVIDE},
13178   {"march", required_argument, NULL, OPTION_MARCH},
13179   {"mtune", required_argument, NULL, OPTION_MTUNE},
13180   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
13181   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
13182   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
13183   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
13184   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
13185   {"muse-unaligned-vector-move", no_argument, NULL, OPTION_MUSE_UNALIGNED_VECTOR_MOVE},
13186   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
13187   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
13188   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
13189   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
13190   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
13191   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
13192   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
13193 # if defined (TE_PE) || defined (TE_PEP)
13194   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
13195 #endif
13196   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
13197   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
13198   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
13199   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
13200   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
13201   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
13202   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
13203   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
13204   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
13205   {"mlfence-before-indirect-branch", required_argument, NULL,
13206    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
13207   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
13208   {"mamd64", no_argument, NULL, OPTION_MAMD64},
13209   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
13210   {NULL, no_argument, NULL, 0}
13211 };
13212 size_t md_longopts_size = sizeof (md_longopts);
13213
13214 int
13215 md_parse_option (int c, const char *arg)
13216 {
13217   unsigned int j;
13218   char *arch, *next, *saved, *type;
13219
13220   switch (c)
13221     {
13222     case 'n':
13223       optimize_align_code = 0;
13224       break;
13225
13226     case 'q':
13227       quiet_warnings = 1;
13228       break;
13229
13230 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13231       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
13232          should be emitted or not.  FIXME: Not implemented.  */
13233     case 'Q':
13234       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
13235         return 0;
13236       break;
13237
13238       /* -V: SVR4 argument to print version ID.  */
13239     case 'V':
13240       print_version_id ();
13241       break;
13242
13243       /* -k: Ignore for FreeBSD compatibility.  */
13244     case 'k':
13245       break;
13246
13247     case 's':
13248       /* -s: On i386 Solaris, this tells the native assembler to use
13249          .stab instead of .stab.excl.  We always use .stab anyhow.  */
13250       break;
13251
13252     case OPTION_MSHARED:
13253       shared = 1;
13254       break;
13255
13256     case OPTION_X86_USED_NOTE:
13257       if (strcasecmp (arg, "yes") == 0)
13258         x86_used_note = 1;
13259       else if (strcasecmp (arg, "no") == 0)
13260         x86_used_note = 0;
13261       else
13262         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
13263       break;
13264
13265
13266 #endif
13267 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13268      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13269     case OPTION_64:
13270       {
13271         const char **list, **l;
13272
13273         list = bfd_target_list ();
13274         for (l = list; *l != NULL; l++)
13275           if (startswith (*l, "elf64-x86-64")
13276               || strcmp (*l, "coff-x86-64") == 0
13277               || strcmp (*l, "pe-x86-64") == 0
13278               || strcmp (*l, "pei-x86-64") == 0
13279               || strcmp (*l, "mach-o-x86-64") == 0)
13280             {
13281               default_arch = "x86_64";
13282               break;
13283             }
13284         if (*l == NULL)
13285           as_fatal (_("no compiled in support for x86_64"));
13286         free (list);
13287       }
13288       break;
13289 #endif
13290
13291 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13292     case OPTION_X32:
13293       if (IS_ELF)
13294         {
13295           const char **list, **l;
13296
13297           list = bfd_target_list ();
13298           for (l = list; *l != NULL; l++)
13299             if (startswith (*l, "elf32-x86-64"))
13300               {
13301                 default_arch = "x86_64:32";
13302                 break;
13303               }
13304           if (*l == NULL)
13305             as_fatal (_("no compiled in support for 32bit x86_64"));
13306           free (list);
13307         }
13308       else
13309         as_fatal (_("32bit x86_64 is only supported for ELF"));
13310       break;
13311 #endif
13312
13313     case OPTION_32:
13314       default_arch = "i386";
13315       break;
13316
13317     case OPTION_DIVIDE:
13318 #ifdef SVR4_COMMENT_CHARS
13319       {
13320         char *n, *t;
13321         const char *s;
13322
13323         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
13324         t = n;
13325         for (s = i386_comment_chars; *s != '\0'; s++)
13326           if (*s != '/')
13327             *t++ = *s;
13328         *t = '\0';
13329         i386_comment_chars = n;
13330       }
13331 #endif
13332       break;
13333
13334     case OPTION_MARCH:
13335       saved = xstrdup (arg);
13336       arch = saved;
13337       /* Allow -march=+nosse.  */
13338       if (*arch == '+')
13339         arch++;
13340       do
13341         {
13342           if (*arch == '.')
13343             as_fatal (_("invalid -march= option: `%s'"), arg);
13344           next = strchr (arch, '+');
13345           if (next)
13346             *next++ = '\0';
13347           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13348             {
13349               if (arch == saved && cpu_arch[j].type != PROCESSOR_NONE
13350                   && strcmp (arch, cpu_arch[j].name) == 0)
13351                 {
13352                   /* Processor.  */
13353                   if (! cpu_arch[j].enable.bitfield.cpui386)
13354                     continue;
13355
13356                   cpu_arch_name = cpu_arch[j].name;
13357                   free (cpu_sub_arch_name);
13358                   cpu_sub_arch_name = NULL;
13359                   cpu_arch_flags = cpu_arch[j].enable;
13360                   cpu_arch_isa = cpu_arch[j].type;
13361                   cpu_arch_isa_flags = cpu_arch[j].enable;
13362                   if (!cpu_arch_tune_set)
13363                     {
13364                       cpu_arch_tune = cpu_arch_isa;
13365                       cpu_arch_tune_flags = cpu_arch_isa_flags;
13366                     }
13367                   break;
13368                 }
13369               else if (cpu_arch[j].type == PROCESSOR_NONE
13370                        && strcmp (arch, cpu_arch[j].name) == 0
13371                        && !cpu_flags_all_zero (&cpu_arch[j].enable))
13372                 {
13373                   /* ISA extension.  */
13374                   i386_cpu_flags flags;
13375
13376                   flags = cpu_flags_or (cpu_arch_flags,
13377                                         cpu_arch[j].enable);
13378
13379                   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13380                     {
13381                       extend_cpu_sub_arch_name (arch);
13382                       cpu_arch_flags = flags;
13383                       cpu_arch_isa_flags = flags;
13384                     }
13385                   else
13386                     cpu_arch_isa_flags
13387                       = cpu_flags_or (cpu_arch_isa_flags,
13388                                       cpu_arch[j].enable);
13389                   break;
13390                 }
13391             }
13392
13393           if (j >= ARRAY_SIZE (cpu_arch) && startswith (arch, "no"))
13394             {
13395               /* Disable an ISA extension.  */
13396               for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13397                 if (cpu_arch[j].type == PROCESSOR_NONE
13398                     && strcmp (arch + 2, cpu_arch[j].name) == 0)
13399                   {
13400                     i386_cpu_flags flags;
13401
13402                     flags = cpu_flags_and_not (cpu_arch_flags,
13403                                                cpu_arch[j].disable);
13404                     if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13405                       {
13406                         extend_cpu_sub_arch_name (arch);
13407                         cpu_arch_flags = flags;
13408                         cpu_arch_isa_flags = flags;
13409                       }
13410                     break;
13411                   }
13412             }
13413
13414           if (j >= ARRAY_SIZE (cpu_arch))
13415             as_fatal (_("invalid -march= option: `%s'"), arg);
13416
13417           arch = next;
13418         }
13419       while (next != NULL);
13420       free (saved);
13421       break;
13422
13423     case OPTION_MTUNE:
13424       if (*arg == '.')
13425         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13426       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13427         {
13428           if (cpu_arch[j].type != PROCESSOR_NONE
13429               && strcmp (arg, cpu_arch[j].name) == 0)
13430             {
13431               cpu_arch_tune_set = 1;
13432               cpu_arch_tune = cpu_arch [j].type;
13433               cpu_arch_tune_flags = cpu_arch[j].enable;
13434               break;
13435             }
13436         }
13437       if (j >= ARRAY_SIZE (cpu_arch))
13438         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13439       break;
13440
13441     case OPTION_MMNEMONIC:
13442       if (strcasecmp (arg, "att") == 0)
13443         intel_mnemonic = 0;
13444       else if (strcasecmp (arg, "intel") == 0)
13445         intel_mnemonic = 1;
13446       else
13447         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
13448       break;
13449
13450     case OPTION_MSYNTAX:
13451       if (strcasecmp (arg, "att") == 0)
13452         intel_syntax = 0;
13453       else if (strcasecmp (arg, "intel") == 0)
13454         intel_syntax = 1;
13455       else
13456         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
13457       break;
13458
13459     case OPTION_MINDEX_REG:
13460       allow_index_reg = 1;
13461       break;
13462
13463     case OPTION_MNAKED_REG:
13464       allow_naked_reg = 1;
13465       break;
13466
13467     case OPTION_MSSE2AVX:
13468       sse2avx = 1;
13469       break;
13470
13471     case OPTION_MUSE_UNALIGNED_VECTOR_MOVE:
13472       use_unaligned_vector_move = 1;
13473       break;
13474
13475     case OPTION_MSSE_CHECK:
13476       if (strcasecmp (arg, "error") == 0)
13477         sse_check = check_error;
13478       else if (strcasecmp (arg, "warning") == 0)
13479         sse_check = check_warning;
13480       else if (strcasecmp (arg, "none") == 0)
13481         sse_check = check_none;
13482       else
13483         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
13484       break;
13485
13486     case OPTION_MOPERAND_CHECK:
13487       if (strcasecmp (arg, "error") == 0)
13488         operand_check = check_error;
13489       else if (strcasecmp (arg, "warning") == 0)
13490         operand_check = check_warning;
13491       else if (strcasecmp (arg, "none") == 0)
13492         operand_check = check_none;
13493       else
13494         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
13495       break;
13496
13497     case OPTION_MAVXSCALAR:
13498       if (strcasecmp (arg, "128") == 0)
13499         avxscalar = vex128;
13500       else if (strcasecmp (arg, "256") == 0)
13501         avxscalar = vex256;
13502       else
13503         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
13504       break;
13505
13506     case OPTION_MVEXWIG:
13507       if (strcmp (arg, "0") == 0)
13508         vexwig = vexw0;
13509       else if (strcmp (arg, "1") == 0)
13510         vexwig = vexw1;
13511       else
13512         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
13513       break;
13514
13515     case OPTION_MADD_BND_PREFIX:
13516       add_bnd_prefix = 1;
13517       break;
13518
13519     case OPTION_MEVEXLIG:
13520       if (strcmp (arg, "128") == 0)
13521         evexlig = evexl128;
13522       else if (strcmp (arg, "256") == 0)
13523         evexlig = evexl256;
13524       else  if (strcmp (arg, "512") == 0)
13525         evexlig = evexl512;
13526       else
13527         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
13528       break;
13529
13530     case OPTION_MEVEXRCIG:
13531       if (strcmp (arg, "rne") == 0)
13532         evexrcig = rne;
13533       else if (strcmp (arg, "rd") == 0)
13534         evexrcig = rd;
13535       else if (strcmp (arg, "ru") == 0)
13536         evexrcig = ru;
13537       else if (strcmp (arg, "rz") == 0)
13538         evexrcig = rz;
13539       else
13540         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
13541       break;
13542
13543     case OPTION_MEVEXWIG:
13544       if (strcmp (arg, "0") == 0)
13545         evexwig = evexw0;
13546       else if (strcmp (arg, "1") == 0)
13547         evexwig = evexw1;
13548       else
13549         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
13550       break;
13551
13552 # if defined (TE_PE) || defined (TE_PEP)
13553     case OPTION_MBIG_OBJ:
13554       use_big_obj = 1;
13555       break;
13556 #endif
13557
13558     case OPTION_MOMIT_LOCK_PREFIX:
13559       if (strcasecmp (arg, "yes") == 0)
13560         omit_lock_prefix = 1;
13561       else if (strcasecmp (arg, "no") == 0)
13562         omit_lock_prefix = 0;
13563       else
13564         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
13565       break;
13566
13567     case OPTION_MFENCE_AS_LOCK_ADD:
13568       if (strcasecmp (arg, "yes") == 0)
13569         avoid_fence = 1;
13570       else if (strcasecmp (arg, "no") == 0)
13571         avoid_fence = 0;
13572       else
13573         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
13574       break;
13575
13576     case OPTION_MLFENCE_AFTER_LOAD:
13577       if (strcasecmp (arg, "yes") == 0)
13578         lfence_after_load = 1;
13579       else if (strcasecmp (arg, "no") == 0)
13580         lfence_after_load = 0;
13581       else
13582         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
13583       break;
13584
13585     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
13586       if (strcasecmp (arg, "all") == 0)
13587         {
13588           lfence_before_indirect_branch = lfence_branch_all;
13589           if (lfence_before_ret == lfence_before_ret_none)
13590             lfence_before_ret = lfence_before_ret_shl;
13591         }
13592       else if (strcasecmp (arg, "memory") == 0)
13593         lfence_before_indirect_branch = lfence_branch_memory;
13594       else if (strcasecmp (arg, "register") == 0)
13595         lfence_before_indirect_branch = lfence_branch_register;
13596       else if (strcasecmp (arg, "none") == 0)
13597         lfence_before_indirect_branch = lfence_branch_none;
13598       else
13599         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
13600                   arg);
13601       break;
13602
13603     case OPTION_MLFENCE_BEFORE_RET:
13604       if (strcasecmp (arg, "or") == 0)
13605         lfence_before_ret = lfence_before_ret_or;
13606       else if (strcasecmp (arg, "not") == 0)
13607         lfence_before_ret = lfence_before_ret_not;
13608       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
13609         lfence_before_ret = lfence_before_ret_shl;
13610       else if (strcasecmp (arg, "none") == 0)
13611         lfence_before_ret = lfence_before_ret_none;
13612       else
13613         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
13614                   arg);
13615       break;
13616
13617     case OPTION_MRELAX_RELOCATIONS:
13618       if (strcasecmp (arg, "yes") == 0)
13619         generate_relax_relocations = 1;
13620       else if (strcasecmp (arg, "no") == 0)
13621         generate_relax_relocations = 0;
13622       else
13623         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
13624       break;
13625
13626     case OPTION_MALIGN_BRANCH_BOUNDARY:
13627       {
13628         char *end;
13629         long int align = strtoul (arg, &end, 0);
13630         if (*end == '\0')
13631           {
13632             if (align == 0)
13633               {
13634                 align_branch_power = 0;
13635                 break;
13636               }
13637             else if (align >= 16)
13638               {
13639                 int align_power;
13640                 for (align_power = 0;
13641                      (align & 1) == 0;
13642                      align >>= 1, align_power++)
13643                   continue;
13644                 /* Limit alignment power to 31.  */
13645                 if (align == 1 && align_power < 32)
13646                   {
13647                     align_branch_power = align_power;
13648                     break;
13649                   }
13650               }
13651           }
13652         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
13653       }
13654       break;
13655
13656     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
13657       {
13658         char *end;
13659         int align = strtoul (arg, &end, 0);
13660         /* Some processors only support 5 prefixes.  */
13661         if (*end == '\0' && align >= 0 && align < 6)
13662           {
13663             align_branch_prefix_size = align;
13664             break;
13665           }
13666         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
13667                   arg);
13668       }
13669       break;
13670
13671     case OPTION_MALIGN_BRANCH:
13672       align_branch = 0;
13673       saved = xstrdup (arg);
13674       type = saved;
13675       do
13676         {
13677           next = strchr (type, '+');
13678           if (next)
13679             *next++ = '\0';
13680           if (strcasecmp (type, "jcc") == 0)
13681             align_branch |= align_branch_jcc_bit;
13682           else if (strcasecmp (type, "fused") == 0)
13683             align_branch |= align_branch_fused_bit;
13684           else if (strcasecmp (type, "jmp") == 0)
13685             align_branch |= align_branch_jmp_bit;
13686           else if (strcasecmp (type, "call") == 0)
13687             align_branch |= align_branch_call_bit;
13688           else if (strcasecmp (type, "ret") == 0)
13689             align_branch |= align_branch_ret_bit;
13690           else if (strcasecmp (type, "indirect") == 0)
13691             align_branch |= align_branch_indirect_bit;
13692           else
13693             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
13694           type = next;
13695         }
13696       while (next != NULL);
13697       free (saved);
13698       break;
13699
13700     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
13701       align_branch_power = 5;
13702       align_branch_prefix_size = 5;
13703       align_branch = (align_branch_jcc_bit
13704                       | align_branch_fused_bit
13705                       | align_branch_jmp_bit);
13706       break;
13707
13708     case OPTION_MAMD64:
13709       isa64 = amd64;
13710       break;
13711
13712     case OPTION_MINTEL64:
13713       isa64 = intel64;
13714       break;
13715
13716     case 'O':
13717       if (arg == NULL)
13718         {
13719           optimize = 1;
13720           /* Turn off -Os.  */
13721           optimize_for_space = 0;
13722         }
13723       else if (*arg == 's')
13724         {
13725           optimize_for_space = 1;
13726           /* Turn on all encoding optimizations.  */
13727           optimize = INT_MAX;
13728         }
13729       else
13730         {
13731           optimize = atoi (arg);
13732           /* Turn off -Os.  */
13733           optimize_for_space = 0;
13734         }
13735       break;
13736
13737     default:
13738       return 0;
13739     }
13740   return 1;
13741 }
13742
13743 #define MESSAGE_TEMPLATE \
13744 "                                                                                "
13745
13746 static char *
13747 output_message (FILE *stream, char *p, char *message, char *start,
13748                 int *left_p, const char *name, int len)
13749 {
13750   int size = sizeof (MESSAGE_TEMPLATE);
13751   int left = *left_p;
13752
13753   /* Reserve 2 spaces for ", " or ",\0" */
13754   left -= len + 2;
13755
13756   /* Check if there is any room.  */
13757   if (left >= 0)
13758     {
13759       if (p != start)
13760         {
13761           *p++ = ',';
13762           *p++ = ' ';
13763         }
13764       p = mempcpy (p, name, len);
13765     }
13766   else
13767     {
13768       /* Output the current message now and start a new one.  */
13769       *p++ = ',';
13770       *p = '\0';
13771       fprintf (stream, "%s\n", message);
13772       p = start;
13773       left = size - (start - message) - len - 2;
13774
13775       gas_assert (left >= 0);
13776
13777       p = mempcpy (p, name, len);
13778     }
13779
13780   *left_p = left;
13781   return p;
13782 }
13783
13784 static void
13785 show_arch (FILE *stream, int ext, int check)
13786 {
13787   static char message[] = MESSAGE_TEMPLATE;
13788   char *start = message + 27;
13789   char *p;
13790   int size = sizeof (MESSAGE_TEMPLATE);
13791   int left;
13792   const char *name;
13793   int len;
13794   unsigned int j;
13795
13796   p = start;
13797   left = size - (start - message);
13798
13799   if (!ext && check)
13800     {
13801       p = output_message (stream, p, message, start, &left,
13802                           STRING_COMMA_LEN ("default"));
13803       p = output_message (stream, p, message, start, &left,
13804                           STRING_COMMA_LEN ("push"));
13805       p = output_message (stream, p, message, start, &left,
13806                           STRING_COMMA_LEN ("pop"));
13807     }
13808
13809   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13810     {
13811       /* Should it be skipped?  */
13812       if (cpu_arch [j].skip)
13813         continue;
13814
13815       name = cpu_arch [j].name;
13816       len = cpu_arch [j].len;
13817       if (cpu_arch[j].type == PROCESSOR_NONE)
13818         {
13819           /* It is an extension.  Skip if we aren't asked to show it.  */
13820           if (!ext || cpu_flags_all_zero (&cpu_arch[j].enable))
13821             continue;
13822         }
13823       else if (ext)
13824         {
13825           /* It is an processor.  Skip if we show only extension.  */
13826           continue;
13827         }
13828       else if (check && ! cpu_arch[j].enable.bitfield.cpui386)
13829         {
13830           /* It is an impossible processor - skip.  */
13831           continue;
13832         }
13833
13834       p = output_message (stream, p, message, start, &left, name, len);
13835     }
13836
13837   /* Display disabled extensions.  */
13838   if (ext)
13839     for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13840       {
13841         char *str;
13842
13843         if (cpu_arch[j].type != PROCESSOR_NONE
13844             || !cpu_flags_all_zero (&cpu_arch[j].enable))
13845           continue;
13846         str = xasprintf ("no%s", cpu_arch[j].name);
13847         p = output_message (stream, p, message, start, &left, str,
13848                             strlen (str));
13849         free (str);
13850       }
13851
13852   *p = '\0';
13853   fprintf (stream, "%s\n", message);
13854 }
13855
13856 void
13857 md_show_usage (FILE *stream)
13858 {
13859 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13860   fprintf (stream, _("\
13861   -Qy, -Qn                ignored\n\
13862   -V                      print assembler version number\n\
13863   -k                      ignored\n"));
13864 #endif
13865   fprintf (stream, _("\
13866   -n                      do not optimize code alignment\n\
13867   -O{012s}                attempt some code optimizations\n\
13868   -q                      quieten some warnings\n"));
13869 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13870   fprintf (stream, _("\
13871   -s                      ignored\n"));
13872 #endif
13873 #ifdef BFD64
13874 # if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13875   fprintf (stream, _("\
13876   --32/--64/--x32         generate 32bit/64bit/x32 object\n"));
13877 # elif defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O)
13878   fprintf (stream, _("\
13879   --32/--64               generate 32bit/64bit object\n"));
13880 # endif
13881 #endif
13882 #ifdef SVR4_COMMENT_CHARS
13883   fprintf (stream, _("\
13884   --divide                do not treat `/' as a comment character\n"));
13885 #else
13886   fprintf (stream, _("\
13887   --divide                ignored\n"));
13888 #endif
13889   fprintf (stream, _("\
13890   -march=CPU[,+EXTENSION...]\n\
13891                           generate code for CPU and EXTENSION, CPU is one of:\n"));
13892   show_arch (stream, 0, 1);
13893   fprintf (stream, _("\
13894                           EXTENSION is combination of (possibly \"no\"-prefixed):\n"));
13895   show_arch (stream, 1, 0);
13896   fprintf (stream, _("\
13897   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
13898   show_arch (stream, 0, 0);
13899   fprintf (stream, _("\
13900   -msse2avx               encode SSE instructions with VEX prefix\n"));
13901   fprintf (stream, _("\
13902   -muse-unaligned-vector-move\n\
13903                           encode aligned vector move as unaligned vector move\n"));
13904   fprintf (stream, _("\
13905   -msse-check=[none|error|warning] (default: warning)\n\
13906                           check SSE instructions\n"));
13907   fprintf (stream, _("\
13908   -moperand-check=[none|error|warning] (default: warning)\n\
13909                           check operand combinations for validity\n"));
13910   fprintf (stream, _("\
13911   -mavxscalar=[128|256] (default: 128)\n\
13912                           encode scalar AVX instructions with specific vector\n\
13913                            length\n"));
13914   fprintf (stream, _("\
13915   -mvexwig=[0|1] (default: 0)\n\
13916                           encode VEX instructions with specific VEX.W value\n\
13917                            for VEX.W bit ignored instructions\n"));
13918   fprintf (stream, _("\
13919   -mevexlig=[128|256|512] (default: 128)\n\
13920                           encode scalar EVEX instructions with specific vector\n\
13921                            length\n"));
13922   fprintf (stream, _("\
13923   -mevexwig=[0|1] (default: 0)\n\
13924                           encode EVEX instructions with specific EVEX.W value\n\
13925                            for EVEX.W bit ignored instructions\n"));
13926   fprintf (stream, _("\
13927   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
13928                           encode EVEX instructions with specific EVEX.RC value\n\
13929                            for SAE-only ignored instructions\n"));
13930   fprintf (stream, _("\
13931   -mmnemonic=[att|intel] "));
13932   if (SYSV386_COMPAT)
13933     fprintf (stream, _("(default: att)\n"));
13934   else
13935     fprintf (stream, _("(default: intel)\n"));
13936   fprintf (stream, _("\
13937                           use AT&T/Intel mnemonic\n"));
13938   fprintf (stream, _("\
13939   -msyntax=[att|intel] (default: att)\n\
13940                           use AT&T/Intel syntax\n"));
13941   fprintf (stream, _("\
13942   -mindex-reg             support pseudo index registers\n"));
13943   fprintf (stream, _("\
13944   -mnaked-reg             don't require `%%' prefix for registers\n"));
13945   fprintf (stream, _("\
13946   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
13947 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13948   fprintf (stream, _("\
13949   -mshared                disable branch optimization for shared code\n"));
13950   fprintf (stream, _("\
13951   -mx86-used-note=[no|yes] "));
13952   if (DEFAULT_X86_USED_NOTE)
13953     fprintf (stream, _("(default: yes)\n"));
13954   else
13955     fprintf (stream, _("(default: no)\n"));
13956   fprintf (stream, _("\
13957                           generate x86 used ISA and feature properties\n"));
13958 #endif
13959 #if defined (TE_PE) || defined (TE_PEP)
13960   fprintf (stream, _("\
13961   -mbig-obj               generate big object files\n"));
13962 #endif
13963   fprintf (stream, _("\
13964   -momit-lock-prefix=[no|yes] (default: no)\n\
13965                           strip all lock prefixes\n"));
13966   fprintf (stream, _("\
13967   -mfence-as-lock-add=[no|yes] (default: no)\n\
13968                           encode lfence, mfence and sfence as\n\
13969                            lock addl $0x0, (%%{re}sp)\n"));
13970   fprintf (stream, _("\
13971   -mrelax-relocations=[no|yes] "));
13972   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
13973     fprintf (stream, _("(default: yes)\n"));
13974   else
13975     fprintf (stream, _("(default: no)\n"));
13976   fprintf (stream, _("\
13977                           generate relax relocations\n"));
13978   fprintf (stream, _("\
13979   -malign-branch-boundary=NUM (default: 0)\n\
13980                           align branches within NUM byte boundary\n"));
13981   fprintf (stream, _("\
13982   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
13983                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
13984                            indirect\n\
13985                           specify types of branches to align\n"));
13986   fprintf (stream, _("\
13987   -malign-branch-prefix-size=NUM (default: 5)\n\
13988                           align branches with NUM prefixes per instruction\n"));
13989   fprintf (stream, _("\
13990   -mbranches-within-32B-boundaries\n\
13991                           align branches within 32 byte boundary\n"));
13992   fprintf (stream, _("\
13993   -mlfence-after-load=[no|yes] (default: no)\n\
13994                           generate lfence after load\n"));
13995   fprintf (stream, _("\
13996   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
13997                           generate lfence before indirect near branch\n"));
13998   fprintf (stream, _("\
13999   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
14000                           generate lfence before ret\n"));
14001   fprintf (stream, _("\
14002   -mamd64                 accept only AMD64 ISA [default]\n"));
14003   fprintf (stream, _("\
14004   -mintel64               accept only Intel64 ISA\n"));
14005 }
14006
14007 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
14008      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
14009      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
14010
14011 /* Pick the target format to use.  */
14012
14013 const char *
14014 i386_target_format (void)
14015 {
14016   if (startswith (default_arch, "x86_64"))
14017     {
14018       update_code_flag (CODE_64BIT, 1);
14019       if (default_arch[6] == '\0')
14020         x86_elf_abi = X86_64_ABI;
14021       else
14022         x86_elf_abi = X86_64_X32_ABI;
14023     }
14024   else if (!strcmp (default_arch, "i386"))
14025     update_code_flag (CODE_32BIT, 1);
14026   else if (!strcmp (default_arch, "iamcu"))
14027     {
14028       update_code_flag (CODE_32BIT, 1);
14029       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
14030         {
14031           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
14032           cpu_arch_name = "iamcu";
14033           free (cpu_sub_arch_name);
14034           cpu_sub_arch_name = NULL;
14035           cpu_arch_flags = iamcu_flags;
14036           cpu_arch_isa = PROCESSOR_IAMCU;
14037           cpu_arch_isa_flags = iamcu_flags;
14038           if (!cpu_arch_tune_set)
14039             {
14040               cpu_arch_tune = cpu_arch_isa;
14041               cpu_arch_tune_flags = cpu_arch_isa_flags;
14042             }
14043         }
14044       else if (cpu_arch_isa != PROCESSOR_IAMCU)
14045         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
14046                   cpu_arch_name);
14047     }
14048   else
14049     as_fatal (_("unknown architecture"));
14050
14051   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
14052     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
14053   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
14054     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].enable;
14055
14056   switch (OUTPUT_FLAVOR)
14057     {
14058 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
14059     case bfd_target_aout_flavour:
14060       return AOUT_TARGET_FORMAT;
14061 #endif
14062 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
14063 # if defined (TE_PE) || defined (TE_PEP)
14064     case bfd_target_coff_flavour:
14065       if (flag_code == CODE_64BIT)
14066         {
14067           object_64bit = 1;
14068           return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
14069         }
14070       return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
14071 # elif defined (TE_GO32)
14072     case bfd_target_coff_flavour:
14073       return "coff-go32";
14074 # else
14075     case bfd_target_coff_flavour:
14076       return "coff-i386";
14077 # endif
14078 #endif
14079 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14080     case bfd_target_elf_flavour:
14081       {
14082         const char *format;
14083
14084         switch (x86_elf_abi)
14085           {
14086           default:
14087             format = ELF_TARGET_FORMAT;
14088 #ifndef TE_SOLARIS
14089             tls_get_addr = "___tls_get_addr";
14090 #endif
14091             break;
14092           case X86_64_ABI:
14093             use_rela_relocations = 1;
14094             object_64bit = 1;
14095 #ifndef TE_SOLARIS
14096             tls_get_addr = "__tls_get_addr";
14097 #endif
14098             format = ELF_TARGET_FORMAT64;
14099             break;
14100           case X86_64_X32_ABI:
14101             use_rela_relocations = 1;
14102             object_64bit = 1;
14103 #ifndef TE_SOLARIS
14104             tls_get_addr = "__tls_get_addr";
14105 #endif
14106             disallow_64bit_reloc = 1;
14107             format = ELF_TARGET_FORMAT32;
14108             break;
14109           }
14110         if (cpu_arch_isa == PROCESSOR_IAMCU)
14111           {
14112             if (x86_elf_abi != I386_ABI)
14113               as_fatal (_("Intel MCU is 32bit only"));
14114             return ELF_TARGET_IAMCU_FORMAT;
14115           }
14116         else
14117           return format;
14118       }
14119 #endif
14120 #if defined (OBJ_MACH_O)
14121     case bfd_target_mach_o_flavour:
14122       if (flag_code == CODE_64BIT)
14123         {
14124           use_rela_relocations = 1;
14125           object_64bit = 1;
14126           return "mach-o-x86-64";
14127         }
14128       else
14129         return "mach-o-i386";
14130 #endif
14131     default:
14132       abort ();
14133       return NULL;
14134     }
14135 }
14136
14137 #endif /* OBJ_MAYBE_ more than one  */
14138 \f
14139 symbolS *
14140 md_undefined_symbol (char *name)
14141 {
14142   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
14143       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
14144       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
14145       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
14146     {
14147       if (!GOT_symbol)
14148         {
14149           if (symbol_find (name))
14150             as_bad (_("GOT already in symbol table"));
14151           GOT_symbol = symbol_new (name, undefined_section,
14152                                    &zero_address_frag, 0);
14153         };
14154       return GOT_symbol;
14155     }
14156   return 0;
14157 }
14158
14159 /* Round up a section size to the appropriate boundary.  */
14160
14161 valueT
14162 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
14163 {
14164 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
14165   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
14166     {
14167       /* For a.out, force the section size to be aligned.  If we don't do
14168          this, BFD will align it for us, but it will not write out the
14169          final bytes of the section.  This may be a bug in BFD, but it is
14170          easier to fix it here since that is how the other a.out targets
14171          work.  */
14172       int align;
14173
14174       align = bfd_section_alignment (segment);
14175       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
14176     }
14177 #endif
14178
14179   return size;
14180 }
14181
14182 /* On the i386, PC-relative offsets are relative to the start of the
14183    next instruction.  That is, the address of the offset, plus its
14184    size, since the offset is always the last part of the insn.  */
14185
14186 long
14187 md_pcrel_from (fixS *fixP)
14188 {
14189   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
14190 }
14191
14192 #ifndef I386COFF
14193
14194 static void
14195 s_bss (int ignore ATTRIBUTE_UNUSED)
14196 {
14197   int temp;
14198
14199 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14200   if (IS_ELF)
14201     obj_elf_section_change_hook ();
14202 #endif
14203   temp = get_absolute_expression ();
14204   subseg_set (bss_section, (subsegT) temp);
14205   demand_empty_rest_of_line ();
14206 }
14207
14208 #endif
14209
14210 /* Remember constant directive.  */
14211
14212 void
14213 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
14214 {
14215   if (last_insn.kind != last_insn_directive
14216       && (bfd_section_flags (now_seg) & SEC_CODE))
14217     {
14218       last_insn.seg = now_seg;
14219       last_insn.kind = last_insn_directive;
14220       last_insn.name = "constant directive";
14221       last_insn.file = as_where (&last_insn.line);
14222       if (lfence_before_ret != lfence_before_ret_none)
14223         {
14224           if (lfence_before_indirect_branch != lfence_branch_none)
14225             as_warn (_("constant directive skips -mlfence-before-ret "
14226                        "and -mlfence-before-indirect-branch"));
14227           else
14228             as_warn (_("constant directive skips -mlfence-before-ret"));
14229         }
14230       else if (lfence_before_indirect_branch != lfence_branch_none)
14231         as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
14232     }
14233 }
14234
14235 int
14236 i386_validate_fix (fixS *fixp)
14237 {
14238   if (fixp->fx_addsy && S_GET_SEGMENT(fixp->fx_addsy) == reg_section)
14239     {
14240       reloc_howto_type *howto;
14241
14242       howto = bfd_reloc_type_lookup (stdoutput, fixp->fx_r_type);
14243       as_bad_where (fixp->fx_file, fixp->fx_line,
14244                     _("invalid %s relocation against register"),
14245                     howto ? howto->name : "<unknown>");
14246       return 0;
14247     }
14248
14249 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14250   if (fixp->fx_r_type == BFD_RELOC_SIZE32
14251       || fixp->fx_r_type == BFD_RELOC_SIZE64)
14252     return IS_ELF && fixp->fx_addsy
14253            && (!S_IS_DEFINED (fixp->fx_addsy)
14254                || S_IS_EXTERNAL (fixp->fx_addsy));
14255 #endif
14256
14257   if (fixp->fx_subsy)
14258     {
14259       if (fixp->fx_subsy == GOT_symbol)
14260         {
14261           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
14262             {
14263               if (!object_64bit)
14264                 abort ();
14265 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14266               if (fixp->fx_tcbit2)
14267                 fixp->fx_r_type = (fixp->fx_tcbit
14268                                    ? BFD_RELOC_X86_64_REX_GOTPCRELX
14269                                    : BFD_RELOC_X86_64_GOTPCRELX);
14270               else
14271 #endif
14272                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
14273             }
14274           else
14275             {
14276               if (!object_64bit)
14277                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
14278               else
14279                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
14280             }
14281           fixp->fx_subsy = 0;
14282         }
14283     }
14284 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14285   else
14286     {
14287       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
14288          to section.  Since PLT32 relocation must be against symbols,
14289          turn such PLT32 relocation into PC32 relocation.  */
14290       if (fixp->fx_addsy
14291           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
14292               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
14293           && symbol_section_p (fixp->fx_addsy))
14294         fixp->fx_r_type = BFD_RELOC_32_PCREL;
14295       if (!object_64bit)
14296         {
14297           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
14298               && fixp->fx_tcbit2)
14299             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
14300         }
14301     }
14302 #endif
14303
14304   return 1;
14305 }
14306
14307 arelent *
14308 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
14309 {
14310   arelent *rel;
14311   bfd_reloc_code_real_type code;
14312
14313   switch (fixp->fx_r_type)
14314     {
14315 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14316       symbolS *sym;
14317
14318     case BFD_RELOC_SIZE32:
14319     case BFD_RELOC_SIZE64:
14320       if (fixp->fx_addsy
14321           && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))
14322           && (!fixp->fx_subsy
14323               || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))))
14324         sym = fixp->fx_addsy;
14325       else if (fixp->fx_subsy
14326                && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))
14327                && (!fixp->fx_addsy
14328                    || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))))
14329         sym = fixp->fx_subsy;
14330       else
14331         sym = NULL;
14332       if (IS_ELF && sym && S_IS_DEFINED (sym) && !S_IS_EXTERNAL (sym))
14333         {
14334           /* Resolve size relocation against local symbol to size of
14335              the symbol plus addend.  */
14336           valueT value = S_GET_SIZE (sym);
14337
14338           if (symbol_get_bfdsym (sym)->flags & BSF_SECTION_SYM)
14339             value = bfd_section_size (S_GET_SEGMENT (sym));
14340           if (sym == fixp->fx_subsy)
14341             {
14342               value = -value;
14343               if (fixp->fx_addsy)
14344                 value += S_GET_VALUE (fixp->fx_addsy);
14345             }
14346           else if (fixp->fx_subsy)
14347             value -= S_GET_VALUE (fixp->fx_subsy);
14348           value += fixp->fx_offset;
14349           if (fixp->fx_r_type == BFD_RELOC_SIZE32
14350               && object_64bit
14351               && !fits_in_unsigned_long (value))
14352             as_bad_where (fixp->fx_file, fixp->fx_line,
14353                           _("symbol size computation overflow"));
14354           fixp->fx_addsy = NULL;
14355           fixp->fx_subsy = NULL;
14356           md_apply_fix (fixp, (valueT *) &value, NULL);
14357           return NULL;
14358         }
14359       if (!fixp->fx_addsy || fixp->fx_subsy)
14360         {
14361           as_bad_where (fixp->fx_file, fixp->fx_line,
14362                         "unsupported expression involving @size");
14363           return NULL;
14364         }
14365 #endif
14366       /* Fall through.  */
14367
14368     case BFD_RELOC_X86_64_PLT32:
14369     case BFD_RELOC_X86_64_GOT32:
14370     case BFD_RELOC_X86_64_GOTPCREL:
14371     case BFD_RELOC_X86_64_GOTPCRELX:
14372     case BFD_RELOC_X86_64_REX_GOTPCRELX:
14373     case BFD_RELOC_386_PLT32:
14374     case BFD_RELOC_386_GOT32:
14375     case BFD_RELOC_386_GOT32X:
14376     case BFD_RELOC_386_GOTOFF:
14377     case BFD_RELOC_386_GOTPC:
14378     case BFD_RELOC_386_TLS_GD:
14379     case BFD_RELOC_386_TLS_LDM:
14380     case BFD_RELOC_386_TLS_LDO_32:
14381     case BFD_RELOC_386_TLS_IE_32:
14382     case BFD_RELOC_386_TLS_IE:
14383     case BFD_RELOC_386_TLS_GOTIE:
14384     case BFD_RELOC_386_TLS_LE_32:
14385     case BFD_RELOC_386_TLS_LE:
14386     case BFD_RELOC_386_TLS_GOTDESC:
14387     case BFD_RELOC_386_TLS_DESC_CALL:
14388     case BFD_RELOC_X86_64_TLSGD:
14389     case BFD_RELOC_X86_64_TLSLD:
14390     case BFD_RELOC_X86_64_DTPOFF32:
14391     case BFD_RELOC_X86_64_DTPOFF64:
14392     case BFD_RELOC_X86_64_GOTTPOFF:
14393     case BFD_RELOC_X86_64_TPOFF32:
14394     case BFD_RELOC_X86_64_TPOFF64:
14395     case BFD_RELOC_X86_64_GOTOFF64:
14396     case BFD_RELOC_X86_64_GOTPC32:
14397     case BFD_RELOC_X86_64_GOT64:
14398     case BFD_RELOC_X86_64_GOTPCREL64:
14399     case BFD_RELOC_X86_64_GOTPC64:
14400     case BFD_RELOC_X86_64_GOTPLT64:
14401     case BFD_RELOC_X86_64_PLTOFF64:
14402     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14403     case BFD_RELOC_X86_64_TLSDESC_CALL:
14404     case BFD_RELOC_RVA:
14405     case BFD_RELOC_VTABLE_ENTRY:
14406     case BFD_RELOC_VTABLE_INHERIT:
14407 #ifdef TE_PE
14408     case BFD_RELOC_32_SECREL:
14409     case BFD_RELOC_16_SECIDX:
14410 #endif
14411       code = fixp->fx_r_type;
14412       break;
14413     case BFD_RELOC_X86_64_32S:
14414       if (!fixp->fx_pcrel)
14415         {
14416           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
14417           code = fixp->fx_r_type;
14418           break;
14419         }
14420       /* Fall through.  */
14421     default:
14422       if (fixp->fx_pcrel)
14423         {
14424           switch (fixp->fx_size)
14425             {
14426             default:
14427               as_bad_where (fixp->fx_file, fixp->fx_line,
14428                             _("can not do %d byte pc-relative relocation"),
14429                             fixp->fx_size);
14430               code = BFD_RELOC_32_PCREL;
14431               break;
14432             case 1: code = BFD_RELOC_8_PCREL;  break;
14433             case 2: code = BFD_RELOC_16_PCREL; break;
14434             case 4: code = BFD_RELOC_32_PCREL; break;
14435 #ifdef BFD64
14436             case 8: code = BFD_RELOC_64_PCREL; break;
14437 #endif
14438             }
14439         }
14440       else
14441         {
14442           switch (fixp->fx_size)
14443             {
14444             default:
14445               as_bad_where (fixp->fx_file, fixp->fx_line,
14446                             _("can not do %d byte relocation"),
14447                             fixp->fx_size);
14448               code = BFD_RELOC_32;
14449               break;
14450             case 1: code = BFD_RELOC_8;  break;
14451             case 2: code = BFD_RELOC_16; break;
14452             case 4: code = BFD_RELOC_32; break;
14453 #ifdef BFD64
14454             case 8: code = BFD_RELOC_64; break;
14455 #endif
14456             }
14457         }
14458       break;
14459     }
14460
14461   if ((code == BFD_RELOC_32
14462        || code == BFD_RELOC_32_PCREL
14463        || code == BFD_RELOC_X86_64_32S)
14464       && GOT_symbol
14465       && fixp->fx_addsy == GOT_symbol)
14466     {
14467       if (!object_64bit)
14468         code = BFD_RELOC_386_GOTPC;
14469       else
14470         code = BFD_RELOC_X86_64_GOTPC32;
14471     }
14472   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
14473       && GOT_symbol
14474       && fixp->fx_addsy == GOT_symbol)
14475     {
14476       code = BFD_RELOC_X86_64_GOTPC64;
14477     }
14478
14479   rel = XNEW (arelent);
14480   rel->sym_ptr_ptr = XNEW (asymbol *);
14481   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
14482
14483   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
14484
14485   if (!use_rela_relocations)
14486     {
14487       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
14488          vtable entry to be used in the relocation's section offset.  */
14489       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
14490         rel->address = fixp->fx_offset;
14491 #if defined (OBJ_COFF) && defined (TE_PE)
14492       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
14493         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
14494       else
14495 #endif
14496       rel->addend = 0;
14497     }
14498   /* Use the rela in 64bit mode.  */
14499   else
14500     {
14501       if (disallow_64bit_reloc)
14502         switch (code)
14503           {
14504           case BFD_RELOC_X86_64_DTPOFF64:
14505           case BFD_RELOC_X86_64_TPOFF64:
14506           case BFD_RELOC_64_PCREL:
14507           case BFD_RELOC_X86_64_GOTOFF64:
14508           case BFD_RELOC_X86_64_GOT64:
14509           case BFD_RELOC_X86_64_GOTPCREL64:
14510           case BFD_RELOC_X86_64_GOTPC64:
14511           case BFD_RELOC_X86_64_GOTPLT64:
14512           case BFD_RELOC_X86_64_PLTOFF64:
14513             as_bad_where (fixp->fx_file, fixp->fx_line,
14514                           _("cannot represent relocation type %s in x32 mode"),
14515                           bfd_get_reloc_code_name (code));
14516             break;
14517           default:
14518             break;
14519           }
14520
14521       if (!fixp->fx_pcrel)
14522         rel->addend = fixp->fx_offset;
14523       else
14524         switch (code)
14525           {
14526           case BFD_RELOC_X86_64_PLT32:
14527           case BFD_RELOC_X86_64_GOT32:
14528           case BFD_RELOC_X86_64_GOTPCREL:
14529           case BFD_RELOC_X86_64_GOTPCRELX:
14530           case BFD_RELOC_X86_64_REX_GOTPCRELX:
14531           case BFD_RELOC_X86_64_TLSGD:
14532           case BFD_RELOC_X86_64_TLSLD:
14533           case BFD_RELOC_X86_64_GOTTPOFF:
14534           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14535           case BFD_RELOC_X86_64_TLSDESC_CALL:
14536             rel->addend = fixp->fx_offset - fixp->fx_size;
14537             break;
14538           default:
14539             rel->addend = (section->vma
14540                            - fixp->fx_size
14541                            + fixp->fx_addnumber
14542                            + md_pcrel_from (fixp));
14543             break;
14544           }
14545     }
14546
14547   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
14548   if (rel->howto == NULL)
14549     {
14550       as_bad_where (fixp->fx_file, fixp->fx_line,
14551                     _("cannot represent relocation type %s"),
14552                     bfd_get_reloc_code_name (code));
14553       /* Set howto to a garbage value so that we can keep going.  */
14554       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
14555       gas_assert (rel->howto != NULL);
14556     }
14557
14558   return rel;
14559 }
14560
14561 #include "tc-i386-intel.c"
14562
14563 void
14564 tc_x86_parse_to_dw2regnum (expressionS *exp)
14565 {
14566   int saved_naked_reg;
14567   char saved_register_dot;
14568
14569   saved_naked_reg = allow_naked_reg;
14570   allow_naked_reg = 1;
14571   saved_register_dot = register_chars['.'];
14572   register_chars['.'] = '.';
14573   allow_pseudo_reg = 1;
14574   expression_and_evaluate (exp);
14575   allow_pseudo_reg = 0;
14576   register_chars['.'] = saved_register_dot;
14577   allow_naked_reg = saved_naked_reg;
14578
14579   if (exp->X_op == O_register && exp->X_add_number >= 0)
14580     {
14581       if ((addressT) exp->X_add_number < i386_regtab_size)
14582         {
14583           exp->X_op = O_constant;
14584           exp->X_add_number = i386_regtab[exp->X_add_number]
14585                               .dw2_regnum[flag_code >> 1];
14586         }
14587       else
14588         exp->X_op = O_illegal;
14589     }
14590 }
14591
14592 void
14593 tc_x86_frame_initial_instructions (void)
14594 {
14595   static unsigned int sp_regno[2];
14596
14597   if (!sp_regno[flag_code >> 1])
14598     {
14599       char *saved_input = input_line_pointer;
14600       char sp[][4] = {"esp", "rsp"};
14601       expressionS exp;
14602
14603       input_line_pointer = sp[flag_code >> 1];
14604       tc_x86_parse_to_dw2regnum (&exp);
14605       gas_assert (exp.X_op == O_constant);
14606       sp_regno[flag_code >> 1] = exp.X_add_number;
14607       input_line_pointer = saved_input;
14608     }
14609
14610   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
14611   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
14612 }
14613
14614 int
14615 x86_dwarf2_addr_size (void)
14616 {
14617 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14618   if (x86_elf_abi == X86_64_X32_ABI)
14619     return 4;
14620 #endif
14621   return bfd_arch_bits_per_address (stdoutput) / 8;
14622 }
14623
14624 int
14625 i386_elf_section_type (const char *str, size_t len)
14626 {
14627   if (flag_code == CODE_64BIT
14628       && len == sizeof ("unwind") - 1
14629       && startswith (str, "unwind"))
14630     return SHT_X86_64_UNWIND;
14631
14632   return -1;
14633 }
14634
14635 #ifdef TE_SOLARIS
14636 void
14637 i386_solaris_fix_up_eh_frame (segT sec)
14638 {
14639   if (flag_code == CODE_64BIT)
14640     elf_section_type (sec) = SHT_X86_64_UNWIND;
14641 }
14642 #endif
14643
14644 #ifdef TE_PE
14645 void
14646 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
14647 {
14648   expressionS exp;
14649
14650   exp.X_op = O_secrel;
14651   exp.X_add_symbol = symbol;
14652   exp.X_add_number = 0;
14653   emit_expr (&exp, size);
14654 }
14655 #endif
14656
14657 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14658 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
14659
14660 bfd_vma
14661 x86_64_section_letter (int letter, const char **ptr_msg)
14662 {
14663   if (flag_code == CODE_64BIT)
14664     {
14665       if (letter == 'l')
14666         return SHF_X86_64_LARGE;
14667
14668       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
14669     }
14670   else
14671     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
14672   return -1;
14673 }
14674
14675 bfd_vma
14676 x86_64_section_word (char *str, size_t len)
14677 {
14678   if (len == 5 && flag_code == CODE_64BIT && startswith (str, "large"))
14679     return SHF_X86_64_LARGE;
14680
14681   return -1;
14682 }
14683
14684 static void
14685 handle_large_common (int small ATTRIBUTE_UNUSED)
14686 {
14687   if (flag_code != CODE_64BIT)
14688     {
14689       s_comm_internal (0, elf_common_parse);
14690       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
14691     }
14692   else
14693     {
14694       static segT lbss_section;
14695       asection *saved_com_section_ptr = elf_com_section_ptr;
14696       asection *saved_bss_section = bss_section;
14697
14698       if (lbss_section == NULL)
14699         {
14700           flagword applicable;
14701           segT seg = now_seg;
14702           subsegT subseg = now_subseg;
14703
14704           /* The .lbss section is for local .largecomm symbols.  */
14705           lbss_section = subseg_new (".lbss", 0);
14706           applicable = bfd_applicable_section_flags (stdoutput);
14707           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
14708           seg_info (lbss_section)->bss = 1;
14709
14710           subseg_set (seg, subseg);
14711         }
14712
14713       elf_com_section_ptr = &_bfd_elf_large_com_section;
14714       bss_section = lbss_section;
14715
14716       s_comm_internal (0, elf_common_parse);
14717
14718       elf_com_section_ptr = saved_com_section_ptr;
14719       bss_section = saved_bss_section;
14720     }
14721 }
14722 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */