gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2022 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
  23    x86_64 support by Jan Hubicka (jh@suse.cz)
  24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "gen-sframe.h"
  34 #include "sframe.h"
  35 #include "elf/x86-64.h"
  36 #include "opcodes/i386-init.h"
  37 #include <limits.h>
  38
  39 #ifndef INFER_ADDR_PREFIX
  40 #define INFER_ADDR_PREFIX 1
  41 #endif
  42
  43 #ifndef DEFAULT_ARCH
  44 #define DEFAULT_ARCH "i386"
  45 #endif
  46
  47 #ifndef INLINE
  48 #if __GNUC__ >= 2
  49 #define INLINE __inline__
  50 #else
  51 #define INLINE
  52 #endif
  53 #endif
  54
  55 /* Prefixes will be emitted in the order defined below.
  56    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  57    instruction, and so must come before any prefixes.
  58    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  59    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  60 #define WAIT_PREFIX     0
  61 #define SEG_PREFIX      1
  62 #define ADDR_PREFIX     2
  63 #define DATA_PREFIX     3
  64 #define REP_PREFIX      4
  65 #define HLE_PREFIX      REP_PREFIX
  66 #define BND_PREFIX      REP_PREFIX
  67 #define LOCK_PREFIX     5
  68 #define REX_PREFIX      6       /* must come last.  */
  69 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  70
  71 /* we define the syntax here (modulo base,index,scale syntax) */
  72 #define REGISTER_PREFIX '%'
  73 #define IMMEDIATE_PREFIX '$'
  74 #define ABSOLUTE_PREFIX '*'
  75
  76 /* these are the instruction mnemonic suffixes in AT&T syntax or
  77    memory operand size in Intel syntax.  */
  78 #define WORD_MNEM_SUFFIX  'w'
  79 #define BYTE_MNEM_SUFFIX  'b'
  80 #define SHORT_MNEM_SUFFIX 's'
  81 #define LONG_MNEM_SUFFIX  'l'
  82 #define QWORD_MNEM_SUFFIX  'q'
  83
  84 #define END_OF_INSN '\0'
  85
  86 #define OPERAND_TYPE_NONE { .bitfield = { .class = ClassNone } }
  87
  88 /* This matches the C -> StaticRounding alias in the opcode table.  */
  89 #define commutative staticrounding
  90
  91 /*
  92   'templates' is for grouping together 'template' structures for opcodes
  93   of the same name.  This is only used for storing the insns in the grand
  94   ole hash table of insns.
  95   The templates themselves start at START and range up to (but not including)
  96   END.
  97   */
  98 typedef struct
  99 {
 100   const insn_template *start;
 101   const insn_template *end;
 102 }
 103 templates;
 104
 105 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 106 typedef struct
 107 {
 108   unsigned int regmem;  /* codes register or memory operand */
 109   unsigned int reg;     /* codes register operand (or extended opcode) */
 110   unsigned int mode;    /* how to interpret regmem & reg */
 111 }
 112 modrm_byte;
 113
 114 /* x86-64 extension prefix.  */
 115 typedef int rex_byte;
 116
 117 /* 386 opcode byte to code indirect addressing.  */
 118 typedef struct
 119 {
 120   unsigned base;
 121   unsigned index;
 122   unsigned scale;
 123 }
 124 sib_byte;
 125
 126 /* x86 arch names, types and features */
 127 typedef struct
 128 {
 129   const char *name;             /* arch name */
 130   unsigned int len:8;           /* arch string length */
 131   bool skip:1;                  /* show_arch should skip this. */
 132   enum processor_type type;     /* arch type */
 133   i386_cpu_flags enable;                /* cpu feature enable flags */
 134   i386_cpu_flags disable;       /* cpu feature disable flags */
 135 }
 136 arch_entry;
 137
 138 static void update_code_flag (int, int);
 139 static void set_code_flag (int);
 140 static void set_16bit_gcc_code_flag (int);
 141 static void set_intel_syntax (int);
 142 static void set_intel_mnemonic (int);
 143 static void set_allow_index_reg (int);
 144 static void set_check (int);
 145 static void set_cpu_arch (int);
 146 #ifdef TE_PE
 147 static void pe_directive_secrel (int);
 148 static void pe_directive_secidx (int);
 149 #endif
 150 static void signed_cons (int);
 151 static char *output_invalid (int c);
 152 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 153                                     const char *);
 154 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 155                                        const char *);
 156 static int i386_att_operand (char *);
 157 static int i386_intel_operand (char *, int);
 158 static int i386_intel_simplify (expressionS *);
 159 static int i386_intel_parse_name (const char *, expressionS *);
 160 static const reg_entry *parse_register (char *, char **);
 161 static const char *parse_insn (const char *, char *);
 162 static char *parse_operands (char *, const char *);
 163 static void swap_operands (void);
 164 static void swap_2_operands (unsigned int, unsigned int);
 165 static enum flag_code i386_addressing_mode (void);
 166 static void optimize_imm (void);
 167 static void optimize_disp (void);
 168 static const insn_template *match_template (char);
 169 static int check_string (void);
 170 static int process_suffix (void);
 171 static int check_byte_reg (void);
 172 static int check_long_reg (void);
 173 static int check_qword_reg (void);
 174 static int check_word_reg (void);
 175 static int finalize_imm (void);
 176 static int process_operands (void);
 177 static const reg_entry *build_modrm_byte (void);
 178 static void output_insn (void);
 179 static void output_imm (fragS *, offsetT);
 180 static void output_disp (fragS *, offsetT);
 181 #ifndef I386COFF
 182 static void s_bss (int);
 183 #endif
 184 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 185 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 186
 187 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 188 static unsigned int x86_isa_1_used;
 189 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 190 static unsigned int x86_feature_2_used;
 191 /* Generate x86 used ISA and feature properties.  */
 192 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 193 #endif
 194
 195 static const char *default_arch = DEFAULT_ARCH;
 196
 197 /* parse_register() returns this when a register alias cannot be used.  */
 198 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 199                                    { Dw2Inval, Dw2Inval } };
 200
 201 static const reg_entry *reg_eax;
 202 static const reg_entry *reg_ds;
 203 static const reg_entry *reg_es;
 204 static const reg_entry *reg_ss;
 205 static const reg_entry *reg_st0;
 206 static const reg_entry *reg_k0;
 207
 208 /* VEX prefix.  */
 209 typedef struct
 210 {
 211   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 212   unsigned char bytes[4];
 213   unsigned int length;
 214   /* Destination or source register specifier.  */
 215   const reg_entry *register_specifier;
 216 } vex_prefix;
 217
 218 /* 'md_assemble ()' gathers together information and puts it into a
 219    i386_insn.  */
 220
 221 union i386_op
 222   {
 223     expressionS *disps;
 224     expressionS *imms;
 225     const reg_entry *regs;
 226   };
 227
 228 enum i386_error
 229   {
 230     no_error, /* Must be first.  */
 231     operand_size_mismatch,
 232     operand_type_mismatch,
 233     register_type_mismatch,
 234     number_of_operands_mismatch,
 235     invalid_instruction_suffix,
 236     bad_imm4,
 237     unsupported_with_intel_mnemonic,
 238     unsupported_syntax,
 239     unsupported,
 240     unsupported_on_arch,
 241     unsupported_64bit,
 242     invalid_sib_address,
 243     invalid_vsib_address,
 244     invalid_vector_register_set,
 245     invalid_tmm_register_set,
 246     invalid_dest_and_src_register_set,
 247     unsupported_vector_index_register,
 248     unsupported_broadcast,
 249     broadcast_needed,
 250     unsupported_masking,
 251     mask_not_on_destination,
 252     no_default_mask,
 253     unsupported_rc_sae,
 254     invalid_register_operand,
 255   };
 256
 257 struct _i386_insn
 258   {
 259     /* TM holds the template for the insn were currently assembling.  */
 260     insn_template tm;
 261
 262     /* SUFFIX holds the instruction size suffix for byte, word, dword
 263        or qword, if given.  */
 264     char suffix;
 265
 266     /* OPCODE_LENGTH holds the number of base opcode bytes.  */
 267     unsigned char opcode_length;
 268
 269     /* OPERANDS gives the number of given operands.  */
 270     unsigned int operands;
 271
 272     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 273        of given register, displacement, memory operands and immediate
 274        operands.  */
 275     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 276
 277     /* TYPES [i] is the type (see above #defines) which tells us how to
 278        use OP[i] for the corresponding operand.  */
 279     i386_operand_type types[MAX_OPERANDS];
 280
 281     /* Displacement expression, immediate expression, or register for each
 282        operand.  */
 283     union i386_op op[MAX_OPERANDS];
 284
 285     /* Flags for operands.  */
 286     unsigned int flags[MAX_OPERANDS];
 287 #define Operand_PCrel 1
 288 #define Operand_Mem   2
 289
 290     /* Relocation type for operand */
 291     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 292
 293     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 294        the base index byte below.  */
 295     const reg_entry *base_reg;
 296     const reg_entry *index_reg;
 297     unsigned int log2_scale_factor;
 298
 299     /* SEG gives the seg_entries of this insn.  They are zero unless
 300        explicit segment overrides are given.  */
 301     const reg_entry *seg[2];
 302
 303     /* PREFIX holds all the given prefix opcodes (usually null).
 304        PREFIXES is the number of prefix opcodes.  */
 305     unsigned int prefixes;
 306     unsigned char prefix[MAX_PREFIXES];
 307
 308     /* Register is in low 3 bits of opcode.  */
 309     bool short_form;
 310
 311     /* The operand to a branch insn indicates an absolute branch.  */
 312     bool jumpabsolute;
 313
 314     /* The operand to a branch insn indicates a far branch.  */
 315     bool far_branch;
 316
 317     /* There is a memory operand of (%dx) which should be only used
 318        with input/output instructions.  */
 319     bool input_output_operand;
 320
 321     /* Extended states.  */
 322     enum
 323       {
 324         /* Use MMX state.  */
 325         xstate_mmx = 1 << 0,
 326         /* Use XMM state.  */
 327         xstate_xmm = 1 << 1,
 328         /* Use YMM state.  */
 329         xstate_ymm = 1 << 2 | xstate_xmm,
 330         /* Use ZMM state.  */
 331         xstate_zmm = 1 << 3 | xstate_ymm,
 332         /* Use TMM state.  */
 333         xstate_tmm = 1 << 4,
 334         /* Use MASK state.  */
 335         xstate_mask = 1 << 5
 336       } xstate;
 337
 338     /* Has GOTPC or TLS relocation.  */
 339     bool has_gotpc_tls_reloc;
 340
 341     /* RM and SIB are the modrm byte and the sib byte where the
 342        addressing modes of this insn are encoded.  */
 343     modrm_byte rm;
 344     rex_byte rex;
 345     rex_byte vrex;
 346     sib_byte sib;
 347     vex_prefix vex;
 348
 349     /* Masking attributes.
 350
 351        The struct describes masking, applied to OPERAND in the instruction.
 352        REG is a pointer to the corresponding mask register.  ZEROING tells
 353        whether merging or zeroing mask is used.  */
 354     struct Mask_Operation
 355     {
 356       const reg_entry *reg;
 357       unsigned int zeroing;
 358       /* The operand where this operation is associated.  */
 359       unsigned int operand;
 360     } mask;
 361
 362     /* Rounding control and SAE attributes.  */
 363     struct RC_Operation
 364     {
 365       enum rc_type
 366         {
 367           rc_none = -1,
 368           rne,
 369           rd,
 370           ru,
 371           rz,
 372           saeonly
 373         } type;
 374       /* In Intel syntax the operand modifier form is supposed to be used, but
 375          we continue to accept the immediate forms as well.  */
 376       bool modifier;
 377     } rounding;
 378
 379     /* Broadcasting attributes.
 380
 381        The struct describes broadcasting, applied to OPERAND.  TYPE is
 382        expresses the broadcast factor.  */
 383     struct Broadcast_Operation
 384     {
 385       /* Type of broadcast: {1to2}, {1to4}, {1to8}, {1to16} or {1to32}.  */
 386       unsigned int type;
 387
 388       /* Index of broadcasted operand.  */
 389       unsigned int operand;
 390
 391       /* Number of bytes to broadcast.  */
 392       unsigned int bytes;
 393     } broadcast;
 394
 395     /* Compressed disp8*N attribute.  */
 396     unsigned int memshift;
 397
 398     /* Prefer load or store in encoding.  */
 399     enum
 400       {
 401         dir_encoding_default = 0,
 402         dir_encoding_load,
 403         dir_encoding_store,
 404         dir_encoding_swap
 405       } dir_encoding;
 406
 407     /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 408     enum
 409       {
 410         disp_encoding_default = 0,
 411         disp_encoding_8bit,
 412         disp_encoding_16bit,
 413         disp_encoding_32bit
 414       } disp_encoding;
 415
 416     /* Prefer the REX byte in encoding.  */
 417     bool rex_encoding;
 418
 419     /* Disable instruction size optimization.  */
 420     bool no_optimize;
 421
 422     /* How to encode vector instructions.  */
 423     enum
 424       {
 425         vex_encoding_default = 0,
 426         vex_encoding_vex,
 427         vex_encoding_vex3,
 428         vex_encoding_evex,
 429         vex_encoding_error
 430       } vec_encoding;
 431
 432     /* REP prefix.  */
 433     const char *rep_prefix;
 434
 435     /* HLE prefix.  */
 436     const char *hle_prefix;
 437
 438     /* Have BND prefix.  */
 439     const char *bnd_prefix;
 440
 441     /* Have NOTRACK prefix.  */
 442     const char *notrack_prefix;
 443
 444     /* Error message.  */
 445     enum i386_error error;
 446   };
 447
 448 typedef struct _i386_insn i386_insn;
 449
 450 /* Link RC type with corresponding string, that'll be looked for in
 451    asm.  */
 452 struct RC_name
 453 {
 454   enum rc_type type;
 455   const char *name;
 456   unsigned int len;
 457 };
 458
 459 static const struct RC_name RC_NamesTable[] =
 460 {
 461   {  rne, STRING_COMMA_LEN ("rn-sae") },
 462   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 463   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 464   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 465   {  saeonly,  STRING_COMMA_LEN ("sae") },
 466 };
 467
 468 /* To be indexed by segment register number.  */
 469 static const unsigned char i386_seg_prefixes[] = {
 470   ES_PREFIX_OPCODE,
 471   CS_PREFIX_OPCODE,
 472   SS_PREFIX_OPCODE,
 473   DS_PREFIX_OPCODE,
 474   FS_PREFIX_OPCODE,
 475   GS_PREFIX_OPCODE
 476 };
 477
 478 /* List of chars besides those in app.c:symbol_chars that can start an
 479    operand.  Used to prevent the scrubber eating vital white-space.  */
 480 const char extra_symbol_chars[] = "*%-([{}"
 481 #ifdef LEX_AT
 482         "@"
 483 #endif
 484 #ifdef LEX_QM
 485         "?"
 486 #endif
 487         ;
 488
 489 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))     \
 490      && !defined (TE_GNU)                               \
 491      && !defined (TE_LINUX)                             \
 492      && !defined (TE_Haiku)                             \
 493      && !defined (TE_FreeBSD)                           \
 494      && !defined (TE_DragonFly)                         \
 495      && !defined (TE_NetBSD))
 496 /* This array holds the chars that always start a comment.  If the
 497    pre-processor is disabled, these aren't very useful.  The option
 498    --divide will remove '/' from this list.  */
 499 const char *i386_comment_chars = "#/";
 500 #define SVR4_COMMENT_CHARS 1
 501 #define PREFIX_SEPARATOR '\\'
 502
 503 #else
 504 const char *i386_comment_chars = "#";
 505 #define PREFIX_SEPARATOR '/'
 506 #endif
 507
 508 /* This array holds the chars that only start a comment at the beginning of
 509    a line.  If the line seems to have the form '# 123 filename'
 510    .line and .file directives will appear in the pre-processed output.
 511    Note that input_file.c hand checks for '#' at the beginning of the
 512    first line of the input file.  This is because the compiler outputs
 513    #NO_APP at the beginning of its output.
 514    Also note that comments started like this one will always work if
 515    '/' isn't otherwise defined.  */
 516 const char line_comment_chars[] = "#/";
 517
 518 const char line_separator_chars[] = ";";
 519
 520 /* Chars that can be used to separate mant from exp in floating point
 521    nums.  */
 522 const char EXP_CHARS[] = "eE";
 523
 524 /* Chars that mean this number is a floating point constant
 525    As in 0f12.456
 526    or    0d1.2345e12.  */
 527 const char FLT_CHARS[] = "fFdDxXhHbB";
 528
 529 /* Tables for lexical analysis.  */
 530 static char mnemonic_chars[256];
 531 static char register_chars[256];
 532 static char operand_chars[256];
 533 static char identifier_chars[256];
 534
 535 /* Lexical macros.  */
 536 #define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
 537 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 538 #define is_register_char(x) (register_chars[(unsigned char) x])
 539 #define is_space_char(x) ((x) == ' ')
 540 #define is_identifier_char(x) (identifier_chars[(unsigned char) x])
 541
 542 /* All non-digit non-letter characters that may occur in an operand.  */
 543 static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!:[@]";
 544
 545 /* md_assemble() always leaves the strings it's passed unaltered.  To
 546    effect this we maintain a stack of saved characters that we've smashed
 547    with '\0's (indicating end of strings for various sub-fields of the
 548    assembler instruction).  */
 549 static char save_stack[32];
 550 static char *save_stack_p;
 551 #define END_STRING_AND_SAVE(s) \
 552         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 553 #define RESTORE_END_STRING(s) \
 554         do { *(s) = *--save_stack_p; } while (0)
 555
 556 /* The instruction we're assembling.  */
 557 static i386_insn i;
 558
 559 /* Possible templates for current insn.  */
 560 static const templates *current_templates;
 561
 562 /* Per instruction expressionS buffers: max displacements & immediates.  */
 563 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 564 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 565
 566 /* Current operand we are working on.  */
 567 static int this_operand = -1;
 568
 569 /* We support four different modes.  FLAG_CODE variable is used to distinguish
 570    these.  */
 571
 572 enum flag_code {
 573         CODE_32BIT,
 574         CODE_16BIT,
 575         CODE_64BIT };
 576
 577 static enum flag_code flag_code;
 578 static unsigned int object_64bit;
 579 static unsigned int disallow_64bit_reloc;
 580 static int use_rela_relocations = 0;
 581 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 582 static const char *tls_get_addr;
 583
 584 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
 585      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
 586      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
 587
 588 /* The ELF ABI to use.  */
 589 enum x86_elf_abi
 590 {
 591   I386_ABI,
 592   X86_64_ABI,
 593   X86_64_X32_ABI
 594 };
 595
 596 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 597 #endif
 598
 599 #if defined (TE_PE) || defined (TE_PEP)
 600 /* Use big object file format.  */
 601 static int use_big_obj = 0;
 602 #endif
 603
 604 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 605 /* 1 if generating code for a shared library.  */
 606 static int shared = 0;
 607
 608 unsigned int x86_sframe_cfa_sp_reg;
 609 /* The other CFA base register for SFrame unwind info.  */
 610 unsigned int x86_sframe_cfa_fp_reg;
 611 unsigned int x86_sframe_cfa_ra_reg;
 612
 613 #endif
 614
 615 /* 1 for intel syntax,
 616    0 if att syntax.  */
 617 static int intel_syntax = 0;
 618
 619 static enum x86_64_isa
 620 {
 621   amd64 = 1,    /* AMD64 ISA.  */
 622   intel64       /* Intel64 ISA.  */
 623 } isa64;
 624
 625 /* 1 for intel mnemonic,
 626    0 if att mnemonic.  */
 627 static int intel_mnemonic = !SYSV386_COMPAT;
 628
 629 /* 1 if pseudo registers are permitted.  */
 630 static int allow_pseudo_reg = 0;
 631
 632 /* 1 if register prefix % not required.  */
 633 static int allow_naked_reg = 0;
 634
 635 /* 1 if the assembler should add BND prefix for all control-transferring
 636    instructions supporting it, even if this prefix wasn't specified
 637    explicitly.  */
 638 static int add_bnd_prefix = 0;
 639
 640 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 641 static int allow_index_reg = 0;
 642
 643 /* 1 if the assembler should ignore LOCK prefix, even if it was
 644    specified explicitly.  */
 645 static int omit_lock_prefix = 0;
 646
 647 /* 1 if the assembler should encode lfence, mfence, and sfence as
 648    "lock addl $0, (%{re}sp)".  */
 649 static int avoid_fence = 0;
 650
 651 /* 1 if lfence should be inserted after every load.  */
 652 static int lfence_after_load = 0;
 653
 654 /* Non-zero if lfence should be inserted before indirect branch.  */
 655 static enum lfence_before_indirect_branch_kind
 656   {
 657     lfence_branch_none = 0,
 658     lfence_branch_register,
 659     lfence_branch_memory,
 660     lfence_branch_all
 661   }
 662 lfence_before_indirect_branch;
 663
 664 /* Non-zero if lfence should be inserted before ret.  */
 665 static enum lfence_before_ret_kind
 666   {
 667     lfence_before_ret_none = 0,
 668     lfence_before_ret_not,
 669     lfence_before_ret_or,
 670     lfence_before_ret_shl
 671   }
 672 lfence_before_ret;
 673
 674 /* Types of previous instruction is .byte or prefix.  */
 675 static struct
 676   {
 677     segT seg;
 678     const char *file;
 679     const char *name;
 680     unsigned int line;
 681     enum last_insn_kind
 682       {
 683         last_insn_other = 0,
 684         last_insn_directive,
 685         last_insn_prefix
 686       } kind;
 687   } last_insn;
 688
 689 /* 1 if the assembler should generate relax relocations.  */
 690
 691 static int generate_relax_relocations
 692   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 693
 694 static enum check_kind
 695   {
 696     check_none = 0,
 697     check_warning,
 698     check_error
 699   }
 700 sse_check, operand_check = check_warning;
 701
 702 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 703 static int align_branch_power = 0;
 704
 705 /* Types of branches to align.  */
 706 enum align_branch_kind
 707   {
 708     align_branch_none = 0,
 709     align_branch_jcc = 1,
 710     align_branch_fused = 2,
 711     align_branch_jmp = 3,
 712     align_branch_call = 4,
 713     align_branch_indirect = 5,
 714     align_branch_ret = 6
 715   };
 716
 717 /* Type bits of branches to align.  */
 718 enum align_branch_bit
 719   {
 720     align_branch_jcc_bit = 1 << align_branch_jcc,
 721     align_branch_fused_bit = 1 << align_branch_fused,
 722     align_branch_jmp_bit = 1 << align_branch_jmp,
 723     align_branch_call_bit = 1 << align_branch_call,
 724     align_branch_indirect_bit = 1 << align_branch_indirect,
 725     align_branch_ret_bit = 1 << align_branch_ret
 726   };
 727
 728 static unsigned int align_branch = (align_branch_jcc_bit
 729                                     | align_branch_fused_bit
 730                                     | align_branch_jmp_bit);
 731
 732 /* Types of condition jump used by macro-fusion.  */
 733 enum mf_jcc_kind
 734   {
 735     mf_jcc_jo = 0,  /* base opcode 0x70  */
 736     mf_jcc_jc,      /* base opcode 0x72  */
 737     mf_jcc_je,      /* base opcode 0x74  */
 738     mf_jcc_jna,     /* base opcode 0x76  */
 739     mf_jcc_js,      /* base opcode 0x78  */
 740     mf_jcc_jp,      /* base opcode 0x7a  */
 741     mf_jcc_jl,      /* base opcode 0x7c  */
 742     mf_jcc_jle,     /* base opcode 0x7e  */
 743   };
 744
 745 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 746 enum mf_cmp_kind
 747   {
 748     mf_cmp_test_and,  /* test/cmp */
 749     mf_cmp_alu_cmp,  /* add/sub/cmp */
 750     mf_cmp_incdec  /* inc/dec */
 751   };
 752
 753 /* The maximum padding size for fused jcc.  CMP like instruction can
 754    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 755    prefixes.   */
 756 #define MAX_FUSED_JCC_PADDING_SIZE 20
 757
 758 /* The maximum number of prefixes added for an instruction.  */
 759 static unsigned int align_branch_prefix_size = 5;
 760
 761 /* Optimization:
 762    1. Clear the REX_W bit with register operand if possible.
 763    2. Above plus use 128bit vector instruction to clear the full vector
 764       register.
 765  */
 766 static int optimize = 0;
 767
 768 /* Optimization:
 769    1. Clear the REX_W bit with register operand if possible.
 770    2. Above plus use 128bit vector instruction to clear the full vector
 771       register.
 772    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 773       "testb $imm7,%r8".
 774  */
 775 static int optimize_for_space = 0;
 776
 777 /* Register prefix used for error message.  */
 778 static const char *register_prefix = "%";
 779
 780 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 781    leave, push, and pop instructions so that gcc has the same stack
 782    frame as in 32 bit mode.  */
 783 static char stackop_size = '\0';
 784
 785 /* Non-zero to optimize code alignment.  */
 786 int optimize_align_code = 1;
 787
 788 /* Non-zero to quieten some warnings.  */
 789 static int quiet_warnings = 0;
 790
 791 /* Guard to avoid repeated warnings about non-16-bit code on 16-bit CPUs.  */
 792 static bool pre_386_16bit_warned;
 793
 794 /* CPU name.  */
 795 static const char *cpu_arch_name = NULL;
 796 static char *cpu_sub_arch_name = NULL;
 797
 798 /* CPU feature flags.  */
 799 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 800
 801 /* If we have selected a cpu we are generating instructions for.  */
 802 static int cpu_arch_tune_set = 0;
 803
 804 /* Cpu we are generating instructions for.  */
 805 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 806
 807 /* CPU feature flags of cpu we are generating instructions for.  */
 808 static i386_cpu_flags cpu_arch_tune_flags;
 809
 810 /* CPU instruction set architecture used.  */
 811 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 812
 813 /* CPU feature flags of instruction set architecture used.  */
 814 i386_cpu_flags cpu_arch_isa_flags;
 815
 816 /* If set, conditional jumps are not automatically promoted to handle
 817    larger than a byte offset.  */
 818 static bool no_cond_jump_promotion = false;
 819
 820 /* Encode SSE instructions with VEX prefix.  */
 821 static unsigned int sse2avx;
 822
 823 /* Encode aligned vector move as unaligned vector move.  */
 824 static unsigned int use_unaligned_vector_move;
 825
 826 /* Encode scalar AVX instructions with specific vector length.  */
 827 static enum
 828   {
 829     vex128 = 0,
 830     vex256
 831   } avxscalar;
 832
 833 /* Encode VEX WIG instructions with specific vex.w.  */
 834 static enum
 835   {
 836     vexw0 = 0,
 837     vexw1
 838   } vexwig;
 839
 840 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 841 static enum
 842   {
 843     evexl128 = 0,
 844     evexl256,
 845     evexl512
 846   } evexlig;
 847
 848 /* Encode EVEX WIG instructions with specific evex.w.  */
 849 static enum
 850   {
 851     evexw0 = 0,
 852     evexw1
 853   } evexwig;
 854
 855 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 856 static enum rc_type evexrcig = rne;
 857
 858 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 859 static symbolS *GOT_symbol;
 860
 861 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 862 unsigned int x86_dwarf2_return_column;
 863
 864 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 865 int x86_cie_data_alignment;
 866
 867 /* Interface to relax_segment.
 868    There are 3 major relax states for 386 jump insns because the
 869    different types of jumps add different sizes to frags when we're
 870    figuring out what sort of jump to choose to reach a given label.
 871
 872    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 873    branches which are handled by md_estimate_size_before_relax() and
 874    i386_generic_table_relax_frag().  */
 875
 876 /* Types.  */
 877 #define UNCOND_JUMP 0
 878 #define COND_JUMP 1
 879 #define COND_JUMP86 2
 880 #define BRANCH_PADDING 3
 881 #define BRANCH_PREFIX 4
 882 #define FUSED_JCC_PADDING 5
 883
 884 /* Sizes.  */
 885 #define CODE16  1
 886 #define SMALL   0
 887 #define SMALL16 (SMALL | CODE16)
 888 #define BIG     2
 889 #define BIG16   (BIG | CODE16)
 890
 891 #ifndef INLINE
 892 #ifdef __GNUC__
 893 #define INLINE __inline__
 894 #else
 895 #define INLINE
 896 #endif
 897 #endif
 898
 899 #define ENCODE_RELAX_STATE(type, size) \
 900   ((relax_substateT) (((type) << 2) | (size)))
 901 #define TYPE_FROM_RELAX_STATE(s) \
 902   ((s) >> 2)
 903 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 904     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 905
 906 /* This table is used by relax_frag to promote short jumps to long
 907    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 908    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 909    don't allow a short jump in a 32 bit code segment to be promoted to
 910    a 16 bit offset jump because it's slower (requires data size
 911    prefix), and doesn't work, unless the destination is in the bottom
 912    64k of the code segment (The top 16 bits of eip are zeroed).  */
 913
 914 const relax_typeS md_relax_table[] =
 915 {
 916   /* The fields are:
 917      1) most positive reach of this state,
 918      2) most negative reach of this state,
 919      3) how many bytes this mode will have in the variable part of the frag
 920      4) which index into the table to try if we can't fit into this one.  */
 921
 922   /* UNCOND_JUMP states.  */
 923   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 924   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 925   /* dword jmp adds 4 bytes to frag:
 926      0 extra opcode bytes, 4 displacement bytes.  */
 927   {0, 0, 4, 0},
 928   /* word jmp adds 2 byte2 to frag:
 929      0 extra opcode bytes, 2 displacement bytes.  */
 930   {0, 0, 2, 0},
 931
 932   /* COND_JUMP states.  */
 933   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 934   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 935   /* dword conditionals adds 5 bytes to frag:
 936      1 extra opcode byte, 4 displacement bytes.  */
 937   {0, 0, 5, 0},
 938   /* word conditionals add 3 bytes to frag:
 939      1 extra opcode byte, 2 displacement bytes.  */
 940   {0, 0, 3, 0},
 941
 942   /* COND_JUMP86 states.  */
 943   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 944   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 945   /* dword conditionals adds 5 bytes to frag:
 946      1 extra opcode byte, 4 displacement bytes.  */
 947   {0, 0, 5, 0},
 948   /* word conditionals add 4 bytes to frag:
 949      1 displacement byte and a 3 byte long branch insn.  */
 950   {0, 0, 4, 0}
 951 };
 952
 953 #define ARCH(n, t, f, s) \
 954   { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, CPU_ ## f ## _FLAGS, \
 955     CPU_NONE_FLAGS }
 956 #define SUBARCH(n, e, d, s) \
 957   { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, CPU_ ## e ## _FLAGS, \
 958     CPU_ ## d ## _FLAGS }
 959
 960 static const arch_entry cpu_arch[] =
 961 {
 962   /* Do not replace the first two entries - i386_target_format() and
 963      set_cpu_arch() rely on them being there in this order.  */
 964   ARCH (generic32, GENERIC32, GENERIC32, false),
 965   ARCH (generic64, GENERIC64, GENERIC64, false),
 966   ARCH (i8086, UNKNOWN, NONE, false),
 967   ARCH (i186, UNKNOWN, I186, false),
 968   ARCH (i286, UNKNOWN, I286, false),
 969   ARCH (i386, I386, I386, false),
 970   ARCH (i486, I486, I486, false),
 971   ARCH (i586, PENTIUM, I586, false),
 972   ARCH (i686, PENTIUMPRO, I686, false),
 973   ARCH (pentium, PENTIUM, I586, false),
 974   ARCH (pentiumpro, PENTIUMPRO, PENTIUMPRO, false),
 975   ARCH (pentiumii, PENTIUMPRO, P2, false),
 976   ARCH (pentiumiii, PENTIUMPRO, P3, false),
 977   ARCH (pentium4, PENTIUM4, P4, false),
 978   ARCH (prescott, NOCONA, CORE, false),
 979   ARCH (nocona, NOCONA, NOCONA, false),
 980   ARCH (yonah, CORE, CORE, true),
 981   ARCH (core, CORE, CORE, false),
 982   ARCH (merom, CORE2, CORE2, true),
 983   ARCH (core2, CORE2, CORE2, false),
 984   ARCH (corei7, COREI7, COREI7, false),
 985   ARCH (iamcu, IAMCU, IAMCU, false),
 986   ARCH (k6, K6, K6, false),
 987   ARCH (k6_2, K6, K6_2, false),
 988   ARCH (athlon, ATHLON, ATHLON, false),
 989   ARCH (sledgehammer, K8, K8, true),
 990   ARCH (opteron, K8, K8, false),
 991   ARCH (k8, K8, K8, false),
 992   ARCH (amdfam10, AMDFAM10, AMDFAM10, false),
 993   ARCH (bdver1, BD, BDVER1, false),
 994   ARCH (bdver2, BD, BDVER2, false),
 995   ARCH (bdver3, BD, BDVER3, false),
 996   ARCH (bdver4, BD, BDVER4, false),
 997   ARCH (znver1, ZNVER, ZNVER1, false),
 998   ARCH (znver2, ZNVER, ZNVER2, false),
 999   ARCH (znver3, ZNVER, ZNVER3, false),
1000   ARCH (znver4, ZNVER, ZNVER4, false),
1001   ARCH (btver1, BT, BTVER1, false),
1002   ARCH (btver2, BT, BTVER2, false),
1003
1004   SUBARCH (8087, 8087, ANY_X87, false),
1005   SUBARCH (87, NONE, ANY_X87, false), /* Disable only!  */
1006   SUBARCH (287, 287, ANY_287, false),
1007   SUBARCH (387, 387, ANY_387, false),
1008   SUBARCH (687, 687, ANY_687, false),
1009   SUBARCH (cmov, CMOV, ANY_CMOV, false),
1010   SUBARCH (fxsr, FXSR, ANY_FXSR, false),
1011   SUBARCH (mmx, MMX, ANY_MMX, false),
1012   SUBARCH (sse, SSE, ANY_SSE, false),
1013   SUBARCH (sse2, SSE2, ANY_SSE2, false),
1014   SUBARCH (sse3, SSE3, ANY_SSE3, false),
1015   SUBARCH (sse4a, SSE4A, ANY_SSE4A, false),
1016   SUBARCH (ssse3, SSSE3, ANY_SSSE3, false),
1017   SUBARCH (sse4.1, SSE4_1, ANY_SSE4_1, false),
1018   SUBARCH (sse4.2, SSE4_2, ANY_SSE4_2, false),
1019   SUBARCH (sse4, SSE4_2, ANY_SSE4_1, false),
1020   SUBARCH (avx, AVX, ANY_AVX, false),
1021   SUBARCH (avx2, AVX2, ANY_AVX2, false),
1022   SUBARCH (avx512f, AVX512F, ANY_AVX512F, false),
1023   SUBARCH (avx512cd, AVX512CD, ANY_AVX512CD, false),
1024   SUBARCH (avx512er, AVX512ER, ANY_AVX512ER, false),
1025   SUBARCH (avx512pf, AVX512PF, ANY_AVX512PF, false),
1026   SUBARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, false),
1027   SUBARCH (avx512bw, AVX512BW, ANY_AVX512BW, false),
1028   SUBARCH (avx512vl, AVX512VL, ANY_AVX512VL, false),
1029   SUBARCH (vmx, VMX, VMX, false),
1030   SUBARCH (vmfunc, VMFUNC, VMFUNC, false),
1031   SUBARCH (smx, SMX, SMX, false),
1032   SUBARCH (xsave, XSAVE, XSAVE, false),
1033   SUBARCH (xsaveopt, XSAVEOPT, XSAVEOPT, false),
1034   SUBARCH (xsavec, XSAVEC, XSAVEC, false),
1035   SUBARCH (xsaves, XSAVES, XSAVES, false),
1036   SUBARCH (aes, AES, AES, false),
1037   SUBARCH (pclmul, PCLMUL, PCLMUL, false),
1038   SUBARCH (clmul, PCLMUL, PCLMUL, true),
1039   SUBARCH (fsgsbase, FSGSBASE, FSGSBASE, false),
1040   SUBARCH (rdrnd, RDRND, RDRND, false),
1041   SUBARCH (f16c, F16C, F16C, false),
1042   SUBARCH (bmi2, BMI2, BMI2, false),
1043   SUBARCH (fma, FMA, FMA, false),
1044   SUBARCH (fma4, FMA4, FMA4, false),
1045   SUBARCH (xop, XOP, XOP, false),
1046   SUBARCH (lwp, LWP, LWP, false),
1047   SUBARCH (movbe, MOVBE, MOVBE, false),
1048   SUBARCH (cx16, CX16, CX16, false),
1049   SUBARCH (ept, EPT, EPT, false),
1050   SUBARCH (lzcnt, LZCNT, LZCNT, false),
1051   SUBARCH (popcnt, POPCNT, POPCNT, false),
1052   SUBARCH (hle, HLE, HLE, false),
1053   SUBARCH (rtm, RTM, RTM, false),
1054   SUBARCH (invpcid, INVPCID, INVPCID, false),
1055   SUBARCH (clflush, CLFLUSH, CLFLUSH, false),
1056   SUBARCH (nop, NOP, NOP, false),
1057   SUBARCH (syscall, SYSCALL, SYSCALL, false),
1058   SUBARCH (rdtscp, RDTSCP, RDTSCP, false),
1059   SUBARCH (3dnow, 3DNOW, 3DNOW, false),
1060   SUBARCH (3dnowa, 3DNOWA, 3DNOWA, false),
1061   SUBARCH (padlock, PADLOCK, PADLOCK, false),
1062   SUBARCH (pacifica, SVME, SVME, true),
1063   SUBARCH (svme, SVME, SVME, false),
1064   SUBARCH (abm, ABM, ABM, false),
1065   SUBARCH (bmi, BMI, BMI, false),
1066   SUBARCH (tbm, TBM, TBM, false),
1067   SUBARCH (adx, ADX, ADX, false),
1068   SUBARCH (rdseed, RDSEED, RDSEED, false),
1069   SUBARCH (prfchw, PRFCHW, PRFCHW, false),
1070   SUBARCH (smap, SMAP, SMAP, false),
1071   SUBARCH (mpx, MPX, MPX, false),
1072   SUBARCH (sha, SHA, SHA, false),
1073   SUBARCH (clflushopt, CLFLUSHOPT, CLFLUSHOPT, false),
1074   SUBARCH (prefetchwt1, PREFETCHWT1, PREFETCHWT1, false),
1075   SUBARCH (se1, SE1, SE1, false),
1076   SUBARCH (clwb, CLWB, CLWB, false),
1077   SUBARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, false),
1078   SUBARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, false),
1079   SUBARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, false),
1080   SUBARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, false),
1081   SUBARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, false),
1082   SUBARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, false),
1083   SUBARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, false),
1084   SUBARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, false),
1085   SUBARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, false),
1086   SUBARCH (clzero, CLZERO, CLZERO, false),
1087   SUBARCH (mwaitx, MWAITX, MWAITX, false),
1088   SUBARCH (ospke, OSPKE, OSPKE, false),
1089   SUBARCH (rdpid, RDPID, RDPID, false),
1090   SUBARCH (ptwrite, PTWRITE, PTWRITE, false),
1091   SUBARCH (ibt, IBT, ANY_IBT, false),
1092   SUBARCH (shstk, SHSTK, ANY_SHSTK, false),
1093   SUBARCH (gfni, GFNI, GFNI, false),
1094   SUBARCH (vaes, VAES, VAES, false),
1095   SUBARCH (vpclmulqdq, VPCLMULQDQ, VPCLMULQDQ, false),
1096   SUBARCH (wbnoinvd, WBNOINVD, WBNOINVD, false),
1097   SUBARCH (pconfig, PCONFIG, PCONFIG, false),
1098   SUBARCH (waitpkg, WAITPKG, WAITPKG, false),
1099   SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
1100   SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
1101   SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
1102   SUBARCH (amx_fp16, AMX_FP16, AMX_FP16, false),
1103   SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
1104   SUBARCH (movdiri, MOVDIRI, ANY_MOVDIRI, false),
1105   SUBARCH (movdir64b, MOVDIR64B, ANY_MOVDIR64B, false),
1106   SUBARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, false),
1107   SUBARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
1108            ANY_AVX512_VP2INTERSECT, false),
1109   SUBARCH (tdx, TDX, ANY_TDX, false),
1110   SUBARCH (enqcmd, ENQCMD, ANY_ENQCMD, false),
1111   SUBARCH (serialize, SERIALIZE, ANY_SERIALIZE, false),
1112   SUBARCH (rdpru, RDPRU, RDPRU, false),
1113   SUBARCH (mcommit, MCOMMIT, MCOMMIT, false),
1114   SUBARCH (sev_es, SEV_ES, SEV_ES, false),
1115   SUBARCH (tsxldtrk, TSXLDTRK, ANY_TSXLDTRK, false),
1116   SUBARCH (kl, KL, ANY_KL, false),
1117   SUBARCH (widekl, WIDEKL, ANY_WIDEKL, false),
1118   SUBARCH (uintr, UINTR, ANY_UINTR, false),
1119   SUBARCH (hreset, HRESET, ANY_HRESET, false),
1120   SUBARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, false),
1121   SUBARCH (prefetchi, PREFETCHI, PREFETCHI, false),
1122   SUBARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, false),
1123   SUBARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, false),
1124   SUBARCH (cmpccxadd, CMPCCXADD, ANY_CMPCCXADD, false),
1125   SUBARCH (wrmsrns, WRMSRNS, ANY_WRMSRNS, false),
1126   SUBARCH (msrlist, MSRLIST, ANY_MSRLIST, false),
1127   SUBARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, false),
1128   SUBARCH (rao_int, RAO_INT, ANY_RAO_INT, false),
1129   SUBARCH (rmpquery, RMPQUERY, RMPQUERY, false),
1130 };
1131
1132 #undef SUBARCH
1133 #undef ARCH
1134
1135 #ifdef I386COFF
1136 /* Like s_lcomm_internal in gas/read.c but the alignment string
1137    is allowed to be optional.  */
1138
1139 static symbolS *
1140 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1141 {
1142   addressT align = 0;
1143
1144   SKIP_WHITESPACE ();
1145
1146   if (needs_align
1147       && *input_line_pointer == ',')
1148     {
1149       align = parse_align (needs_align - 1);
1150
1151       if (align == (addressT) -1)
1152         return NULL;
1153     }
1154   else
1155     {
1156       if (size >= 8)
1157         align = 3;
1158       else if (size >= 4)
1159         align = 2;
1160       else if (size >= 2)
1161         align = 1;
1162       else
1163         align = 0;
1164     }
1165
1166   bss_alloc (symbolP, size, align);
1167   return symbolP;
1168 }
1169
1170 static void
1171 pe_lcomm (int needs_align)
1172 {
1173   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1174 }
1175 #endif
1176
1177 const pseudo_typeS md_pseudo_table[] =
1178 {
1179 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1180   {"align", s_align_bytes, 0},
1181 #else
1182   {"align", s_align_ptwo, 0},
1183 #endif
1184   {"arch", set_cpu_arch, 0},
1185 #ifndef I386COFF
1186   {"bss", s_bss, 0},
1187 #else
1188   {"lcomm", pe_lcomm, 1},
1189 #endif
1190   {"ffloat", float_cons, 'f'},
1191   {"dfloat", float_cons, 'd'},
1192   {"tfloat", float_cons, 'x'},
1193   {"hfloat", float_cons, 'h'},
1194   {"bfloat16", float_cons, 'b'},
1195   {"value", cons, 2},
1196   {"slong", signed_cons, 4},
1197   {"noopt", s_ignore, 0},
1198   {"optim", s_ignore, 0},
1199   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1200   {"code16", set_code_flag, CODE_16BIT},
1201   {"code32", set_code_flag, CODE_32BIT},
1202 #ifdef BFD64
1203   {"code64", set_code_flag, CODE_64BIT},
1204 #endif
1205   {"intel_syntax", set_intel_syntax, 1},
1206   {"att_syntax", set_intel_syntax, 0},
1207   {"intel_mnemonic", set_intel_mnemonic, 1},
1208   {"att_mnemonic", set_intel_mnemonic, 0},
1209   {"allow_index_reg", set_allow_index_reg, 1},
1210   {"disallow_index_reg", set_allow_index_reg, 0},
1211   {"sse_check", set_check, 0},
1212   {"operand_check", set_check, 1},
1213 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1214   {"largecomm", handle_large_common, 0},
1215 #else
1216   {"file", dwarf2_directive_file, 0},
1217   {"loc", dwarf2_directive_loc, 0},
1218   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1219 #endif
1220 #ifdef TE_PE
1221   {"secrel32", pe_directive_secrel, 0},
1222   {"secidx", pe_directive_secidx, 0},
1223 #endif
1224   {0, 0, 0}
1225 };
1226
1227 /* For interface with expression ().  */
1228 extern char *input_line_pointer;
1229
1230 /* Hash table for instruction mnemonic lookup.  */
1231 static htab_t op_hash;
1232
1233 /* Hash table for register lookup.  */
1234 static htab_t reg_hash;
1235 \f
1236   /* Various efficient no-op patterns for aligning code labels.
1237      Note: Don't try to assemble the instructions in the comments.
1238      0L and 0w are not legal.  */
1239 static const unsigned char f32_1[] =
1240   {0x90};                               /* nop                  */
1241 static const unsigned char f32_2[] =
1242   {0x66,0x90};                          /* xchg %ax,%ax         */
1243 static const unsigned char f32_3[] =
1244   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1245 static const unsigned char f32_4[] =
1246   {0x8d,0x74,0x26,0x00};                /* leal 0(%esi,1),%esi  */
1247 static const unsigned char f32_6[] =
1248   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1249 static const unsigned char f32_7[] =
1250   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
1251 static const unsigned char f16_3[] =
1252   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1253 static const unsigned char f16_4[] =
1254   {0x8d,0xb4,0x00,0x00};                /* lea 0W(%si),%si      */
1255 static const unsigned char jump_disp8[] =
1256   {0xeb};                               /* jmp disp8           */
1257 static const unsigned char jump32_disp32[] =
1258   {0xe9};                               /* jmp disp32          */
1259 static const unsigned char jump16_disp32[] =
1260   {0x66,0xe9};                          /* jmp disp32          */
1261 /* 32-bit NOPs patterns.  */
1262 static const unsigned char *const f32_patt[] = {
1263   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1264 };
1265 /* 16-bit NOPs patterns.  */
1266 static const unsigned char *const f16_patt[] = {
1267   f32_1, f32_2, f16_3, f16_4
1268 };
1269 /* nopl (%[re]ax) */
1270 static const unsigned char alt_3[] =
1271   {0x0f,0x1f,0x00};
1272 /* nopl 0(%[re]ax) */
1273 static const unsigned char alt_4[] =
1274   {0x0f,0x1f,0x40,0x00};
1275 /* nopl 0(%[re]ax,%[re]ax,1) */
1276 static const unsigned char alt_5[] =
1277   {0x0f,0x1f,0x44,0x00,0x00};
1278 /* nopw 0(%[re]ax,%[re]ax,1) */
1279 static const unsigned char alt_6[] =
1280   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1281 /* nopl 0L(%[re]ax) */
1282 static const unsigned char alt_7[] =
1283   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1284 /* nopl 0L(%[re]ax,%[re]ax,1) */
1285 static const unsigned char alt_8[] =
1286   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1287 /* nopw 0L(%[re]ax,%[re]ax,1) */
1288 static const unsigned char alt_9[] =
1289   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1290 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1291 static const unsigned char alt_10[] =
1292   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1293 /* data16 nopw %cs:0L(%eax,%eax,1) */
1294 static const unsigned char alt_11[] =
1295   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1296 /* 32-bit and 64-bit NOPs patterns.  */
1297 static const unsigned char *const alt_patt[] = {
1298   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1299   alt_9, alt_10, alt_11
1300 };
1301
1302 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1303    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1304
1305 static void
1306 i386_output_nops (char *where, const unsigned char *const *patt,
1307                   int count, int max_single_nop_size)
1308
1309 {
1310   /* Place the longer NOP first.  */
1311   int last;
1312   int offset;
1313   const unsigned char *nops;
1314
1315   if (max_single_nop_size < 1)
1316     {
1317       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1318                 max_single_nop_size);
1319       return;
1320     }
1321
1322   nops = patt[max_single_nop_size - 1];
1323
1324   /* Use the smaller one if the requsted one isn't available.  */
1325   if (nops == NULL)
1326     {
1327       max_single_nop_size--;
1328       nops = patt[max_single_nop_size - 1];
1329     }
1330
1331   last = count % max_single_nop_size;
1332
1333   count -= last;
1334   for (offset = 0; offset < count; offset += max_single_nop_size)
1335     memcpy (where + offset, nops, max_single_nop_size);
1336
1337   if (last)
1338     {
1339       nops = patt[last - 1];
1340       if (nops == NULL)
1341         {
1342           /* Use the smaller one plus one-byte NOP if the needed one
1343              isn't available.  */
1344           last--;
1345           nops = patt[last - 1];
1346           memcpy (where + offset, nops, last);
1347           where[offset + last] = *patt[0];
1348         }
1349       else
1350         memcpy (where + offset, nops, last);
1351     }
1352 }
1353
1354 static INLINE int
1355 fits_in_imm7 (offsetT num)
1356 {
1357   return (num & 0x7f) == num;
1358 }
1359
1360 static INLINE int
1361 fits_in_imm31 (offsetT num)
1362 {
1363   return (num & 0x7fffffff) == num;
1364 }
1365
1366 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1367    single NOP instruction LIMIT.  */
1368
1369 void
1370 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1371 {
1372   const unsigned char *const *patt = NULL;
1373   int max_single_nop_size;
1374   /* Maximum number of NOPs before switching to jump over NOPs.  */
1375   int max_number_of_nops;
1376
1377   switch (fragP->fr_type)
1378     {
1379     case rs_fill_nop:
1380     case rs_align_code:
1381       break;
1382     case rs_machine_dependent:
1383       /* Allow NOP padding for jumps and calls.  */
1384       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1385           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1386         break;
1387       /* Fall through.  */
1388     default:
1389       return;
1390     }
1391
1392   /* We need to decide which NOP sequence to use for 32bit and
1393      64bit. When -mtune= is used:
1394
1395      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1396      PROCESSOR_GENERIC32, f32_patt will be used.
1397      2. For the rest, alt_patt will be used.
1398
1399      When -mtune= isn't used, alt_patt will be used if
1400      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1401      be used.
1402
1403      When -march= or .arch is used, we can't use anything beyond
1404      cpu_arch_isa_flags.   */
1405
1406   if (flag_code == CODE_16BIT)
1407     {
1408       patt = f16_patt;
1409       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1410       /* Limit number of NOPs to 2 in 16-bit mode.  */
1411       max_number_of_nops = 2;
1412     }
1413   else
1414     {
1415       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1416         {
1417           /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1418           switch (cpu_arch_tune)
1419             {
1420             case PROCESSOR_UNKNOWN:
1421               /* We use cpu_arch_isa_flags to check if we SHOULD
1422                  optimize with nops.  */
1423               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1424                 patt = alt_patt;
1425               else
1426                 patt = f32_patt;
1427               break;
1428             case PROCESSOR_PENTIUM4:
1429             case PROCESSOR_NOCONA:
1430             case PROCESSOR_CORE:
1431             case PROCESSOR_CORE2:
1432             case PROCESSOR_COREI7:
1433             case PROCESSOR_GENERIC64:
1434             case PROCESSOR_K6:
1435             case PROCESSOR_ATHLON:
1436             case PROCESSOR_K8:
1437             case PROCESSOR_AMDFAM10:
1438             case PROCESSOR_BD:
1439             case PROCESSOR_ZNVER:
1440             case PROCESSOR_BT:
1441               patt = alt_patt;
1442               break;
1443             case PROCESSOR_I386:
1444             case PROCESSOR_I486:
1445             case PROCESSOR_PENTIUM:
1446             case PROCESSOR_PENTIUMPRO:
1447             case PROCESSOR_IAMCU:
1448             case PROCESSOR_GENERIC32:
1449               patt = f32_patt;
1450               break;
1451             case PROCESSOR_NONE:
1452               abort ();
1453             }
1454         }
1455       else
1456         {
1457           switch (fragP->tc_frag_data.tune)
1458             {
1459             case PROCESSOR_UNKNOWN:
1460               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1461                  PROCESSOR_UNKNOWN.  */
1462               abort ();
1463               break;
1464
1465             case PROCESSOR_I386:
1466             case PROCESSOR_I486:
1467             case PROCESSOR_PENTIUM:
1468             case PROCESSOR_IAMCU:
1469             case PROCESSOR_K6:
1470             case PROCESSOR_ATHLON:
1471             case PROCESSOR_K8:
1472             case PROCESSOR_AMDFAM10:
1473             case PROCESSOR_BD:
1474             case PROCESSOR_ZNVER:
1475             case PROCESSOR_BT:
1476             case PROCESSOR_GENERIC32:
1477               /* We use cpu_arch_isa_flags to check if we CAN optimize
1478                  with nops.  */
1479               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1480                 patt = alt_patt;
1481               else
1482                 patt = f32_patt;
1483               break;
1484             case PROCESSOR_PENTIUMPRO:
1485             case PROCESSOR_PENTIUM4:
1486             case PROCESSOR_NOCONA:
1487             case PROCESSOR_CORE:
1488             case PROCESSOR_CORE2:
1489             case PROCESSOR_COREI7:
1490               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1491                 patt = alt_patt;
1492               else
1493                 patt = f32_patt;
1494               break;
1495             case PROCESSOR_GENERIC64:
1496               patt = alt_patt;
1497               break;
1498             case PROCESSOR_NONE:
1499               abort ();
1500             }
1501         }
1502
1503       if (patt == f32_patt)
1504         {
1505           max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1506           /* Limit number of NOPs to 2 for older processors.  */
1507           max_number_of_nops = 2;
1508         }
1509       else
1510         {
1511           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1512           /* Limit number of NOPs to 7 for newer processors.  */
1513           max_number_of_nops = 7;
1514         }
1515     }
1516
1517   if (limit == 0)
1518     limit = max_single_nop_size;
1519
1520   if (fragP->fr_type == rs_fill_nop)
1521     {
1522       /* Output NOPs for .nop directive.  */
1523       if (limit > max_single_nop_size)
1524         {
1525           as_bad_where (fragP->fr_file, fragP->fr_line,
1526                         _("invalid single nop size: %d "
1527                           "(expect within [0, %d])"),
1528                         limit, max_single_nop_size);
1529           return;
1530         }
1531     }
1532   else if (fragP->fr_type != rs_machine_dependent)
1533     fragP->fr_var = count;
1534
1535   if ((count / max_single_nop_size) > max_number_of_nops)
1536     {
1537       /* Generate jump over NOPs.  */
1538       offsetT disp = count - 2;
1539       if (fits_in_imm7 (disp))
1540         {
1541           /* Use "jmp disp8" if possible.  */
1542           count = disp;
1543           where[0] = jump_disp8[0];
1544           where[1] = count;
1545           where += 2;
1546         }
1547       else
1548         {
1549           unsigned int size_of_jump;
1550
1551           if (flag_code == CODE_16BIT)
1552             {
1553               where[0] = jump16_disp32[0];
1554               where[1] = jump16_disp32[1];
1555               size_of_jump = 2;
1556             }
1557           else
1558             {
1559               where[0] = jump32_disp32[0];
1560               size_of_jump = 1;
1561             }
1562
1563           count -= size_of_jump + 4;
1564           if (!fits_in_imm31 (count))
1565             {
1566               as_bad_where (fragP->fr_file, fragP->fr_line,
1567                             _("jump over nop padding out of range"));
1568               return;
1569             }
1570
1571           md_number_to_chars (where + size_of_jump, count, 4);
1572           where += size_of_jump + 4;
1573         }
1574     }
1575
1576   /* Generate multiple NOPs.  */
1577   i386_output_nops (where, patt, count, limit);
1578 }
1579
1580 static INLINE int
1581 operand_type_all_zero (const union i386_operand_type *x)
1582 {
1583   switch (ARRAY_SIZE(x->array))
1584     {
1585     case 3:
1586       if (x->array[2])
1587         return 0;
1588       /* Fall through.  */
1589     case 2:
1590       if (x->array[1])
1591         return 0;
1592       /* Fall through.  */
1593     case 1:
1594       return !x->array[0];
1595     default:
1596       abort ();
1597     }
1598 }
1599
1600 static INLINE void
1601 operand_type_set (union i386_operand_type *x, unsigned int v)
1602 {
1603   switch (ARRAY_SIZE(x->array))
1604     {
1605     case 3:
1606       x->array[2] = v;
1607       /* Fall through.  */
1608     case 2:
1609       x->array[1] = v;
1610       /* Fall through.  */
1611     case 1:
1612       x->array[0] = v;
1613       /* Fall through.  */
1614       break;
1615     default:
1616       abort ();
1617     }
1618
1619   x->bitfield.class = ClassNone;
1620   x->bitfield.instance = InstanceNone;
1621 }
1622
1623 static INLINE int
1624 operand_type_equal (const union i386_operand_type *x,
1625                     const union i386_operand_type *y)
1626 {
1627   switch (ARRAY_SIZE(x->array))
1628     {
1629     case 3:
1630       if (x->array[2] != y->array[2])
1631         return 0;
1632       /* Fall through.  */
1633     case 2:
1634       if (x->array[1] != y->array[1])
1635         return 0;
1636       /* Fall through.  */
1637     case 1:
1638       return x->array[0] == y->array[0];
1639       break;
1640     default:
1641       abort ();
1642     }
1643 }
1644
1645 static INLINE int
1646 cpu_flags_all_zero (const union i386_cpu_flags *x)
1647 {
1648   switch (ARRAY_SIZE(x->array))
1649     {
1650     case 5:
1651       if (x->array[4])
1652         return 0;
1653       /* Fall through.  */
1654     case 4:
1655       if (x->array[3])
1656         return 0;
1657       /* Fall through.  */
1658     case 3:
1659       if (x->array[2])
1660         return 0;
1661       /* Fall through.  */
1662     case 2:
1663       if (x->array[1])
1664         return 0;
1665       /* Fall through.  */
1666     case 1:
1667       return !x->array[0];
1668     default:
1669       abort ();
1670     }
1671 }
1672
1673 static INLINE int
1674 cpu_flags_equal (const union i386_cpu_flags *x,
1675                  const union i386_cpu_flags *y)
1676 {
1677   switch (ARRAY_SIZE(x->array))
1678     {
1679     case 5:
1680       if (x->array[4] != y->array[4])
1681         return 0;
1682       /* Fall through.  */
1683     case 4:
1684       if (x->array[3] != y->array[3])
1685         return 0;
1686       /* Fall through.  */
1687     case 3:
1688       if (x->array[2] != y->array[2])
1689         return 0;
1690       /* Fall through.  */
1691     case 2:
1692       if (x->array[1] != y->array[1])
1693         return 0;
1694       /* Fall through.  */
1695     case 1:
1696       return x->array[0] == y->array[0];
1697       break;
1698     default:
1699       abort ();
1700     }
1701 }
1702
1703 static INLINE int
1704 cpu_flags_check_cpu64 (i386_cpu_flags f)
1705 {
1706   return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
1707            || (flag_code != CODE_64BIT && f.bitfield.cpu64));
1708 }
1709
1710 static INLINE i386_cpu_flags
1711 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1712 {
1713   switch (ARRAY_SIZE (x.array))
1714     {
1715     case 5:
1716       x.array [4] &= y.array [4];
1717       /* Fall through.  */
1718     case 4:
1719       x.array [3] &= y.array [3];
1720       /* Fall through.  */
1721     case 3:
1722       x.array [2] &= y.array [2];
1723       /* Fall through.  */
1724     case 2:
1725       x.array [1] &= y.array [1];
1726       /* Fall through.  */
1727     case 1:
1728       x.array [0] &= y.array [0];
1729       break;
1730     default:
1731       abort ();
1732     }
1733   return x;
1734 }
1735
1736 static INLINE i386_cpu_flags
1737 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1738 {
1739   switch (ARRAY_SIZE (x.array))
1740     {
1741     case 5:
1742       x.array [4] |= y.array [4];
1743       /* Fall through.  */
1744     case 4:
1745       x.array [3] |= y.array [3];
1746       /* Fall through.  */
1747     case 3:
1748       x.array [2] |= y.array [2];
1749       /* Fall through.  */
1750     case 2:
1751       x.array [1] |= y.array [1];
1752       /* Fall through.  */
1753     case 1:
1754       x.array [0] |= y.array [0];
1755       break;
1756     default:
1757       abort ();
1758     }
1759   return x;
1760 }
1761
1762 static INLINE i386_cpu_flags
1763 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1764 {
1765   switch (ARRAY_SIZE (x.array))
1766     {
1767     case 5:
1768       x.array [4] &= ~y.array [4];
1769       /* Fall through.  */
1770     case 4:
1771       x.array [3] &= ~y.array [3];
1772       /* Fall through.  */
1773     case 3:
1774       x.array [2] &= ~y.array [2];
1775       /* Fall through.  */
1776     case 2:
1777       x.array [1] &= ~y.array [1];
1778       /* Fall through.  */
1779     case 1:
1780       x.array [0] &= ~y.array [0];
1781       break;
1782     default:
1783       abort ();
1784     }
1785   return x;
1786 }
1787
1788 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1789
1790 #define CPU_FLAGS_ARCH_MATCH            0x1
1791 #define CPU_FLAGS_64BIT_MATCH           0x2
1792
1793 #define CPU_FLAGS_PERFECT_MATCH \
1794   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1795
1796 /* Return CPU flags match bits. */
1797
1798 static int
1799 cpu_flags_match (const insn_template *t)
1800 {
1801   i386_cpu_flags x = t->cpu_flags;
1802   int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
1803
1804   x.bitfield.cpu64 = 0;
1805   x.bitfield.cpuno64 = 0;
1806
1807   if (cpu_flags_all_zero (&x))
1808     {
1809       /* This instruction is available on all archs.  */
1810       match |= CPU_FLAGS_ARCH_MATCH;
1811     }
1812   else
1813     {
1814       /* This instruction is available only on some archs.  */
1815       i386_cpu_flags cpu = cpu_arch_flags;
1816
1817       /* AVX512VL is no standalone feature - match it and then strip it.  */
1818       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1819         return match;
1820       x.bitfield.cpuavx512vl = 0;
1821
1822       /* AVX and AVX2 present at the same time express an operand size
1823          dependency - strip AVX2 for the purposes here.  The operand size
1824          dependent check occurs in check_vecOperands().  */
1825       if (x.bitfield.cpuavx && x.bitfield.cpuavx2)
1826         x.bitfield.cpuavx2 = 0;
1827
1828       cpu = cpu_flags_and (x, cpu);
1829       if (!cpu_flags_all_zero (&cpu))
1830         {
1831           if (x.bitfield.cpuavx)
1832             {
1833               /* We need to check a few extra flags with AVX.  */
1834               if (cpu.bitfield.cpuavx
1835                   && (!t->opcode_modifier.sse2avx
1836                       || (sse2avx && !i.prefix[DATA_PREFIX]))
1837                   && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1838                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1839                   && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
1840                 match |= CPU_FLAGS_ARCH_MATCH;
1841             }
1842           else if (x.bitfield.cpuavx512f)
1843             {
1844               /* We need to check a few extra flags with AVX512F.  */
1845               if (cpu.bitfield.cpuavx512f
1846                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1847                   && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1848                   && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1849                 match |= CPU_FLAGS_ARCH_MATCH;
1850             }
1851           else
1852             match |= CPU_FLAGS_ARCH_MATCH;
1853         }
1854     }
1855   return match;
1856 }
1857
1858 static INLINE i386_operand_type
1859 operand_type_and (i386_operand_type x, i386_operand_type y)
1860 {
1861   if (x.bitfield.class != y.bitfield.class)
1862     x.bitfield.class = ClassNone;
1863   if (x.bitfield.instance != y.bitfield.instance)
1864     x.bitfield.instance = InstanceNone;
1865
1866   switch (ARRAY_SIZE (x.array))
1867     {
1868     case 3:
1869       x.array [2] &= y.array [2];
1870       /* Fall through.  */
1871     case 2:
1872       x.array [1] &= y.array [1];
1873       /* Fall through.  */
1874     case 1:
1875       x.array [0] &= y.array [0];
1876       break;
1877     default:
1878       abort ();
1879     }
1880   return x;
1881 }
1882
1883 static INLINE i386_operand_type
1884 operand_type_and_not (i386_operand_type x, i386_operand_type y)
1885 {
1886   gas_assert (y.bitfield.class == ClassNone);
1887   gas_assert (y.bitfield.instance == InstanceNone);
1888
1889   switch (ARRAY_SIZE (x.array))
1890     {
1891     case 3:
1892       x.array [2] &= ~y.array [2];
1893       /* Fall through.  */
1894     case 2:
1895       x.array [1] &= ~y.array [1];
1896       /* Fall through.  */
1897     case 1:
1898       x.array [0] &= ~y.array [0];
1899       break;
1900     default:
1901       abort ();
1902     }
1903   return x;
1904 }
1905
1906 static INLINE i386_operand_type
1907 operand_type_or (i386_operand_type x, i386_operand_type y)
1908 {
1909   gas_assert (x.bitfield.class == ClassNone ||
1910               y.bitfield.class == ClassNone ||
1911               x.bitfield.class == y.bitfield.class);
1912   gas_assert (x.bitfield.instance == InstanceNone ||
1913               y.bitfield.instance == InstanceNone ||
1914               x.bitfield.instance == y.bitfield.instance);
1915
1916   switch (ARRAY_SIZE (x.array))
1917     {
1918     case 3:
1919       x.array [2] |= y.array [2];
1920       /* Fall through.  */
1921     case 2:
1922       x.array [1] |= y.array [1];
1923       /* Fall through.  */
1924     case 1:
1925       x.array [0] |= y.array [0];
1926       break;
1927     default:
1928       abort ();
1929     }
1930   return x;
1931 }
1932
1933 static INLINE i386_operand_type
1934 operand_type_xor (i386_operand_type x, i386_operand_type y)
1935 {
1936   gas_assert (y.bitfield.class == ClassNone);
1937   gas_assert (y.bitfield.instance == InstanceNone);
1938
1939   switch (ARRAY_SIZE (x.array))
1940     {
1941     case 3:
1942       x.array [2] ^= y.array [2];
1943       /* Fall through.  */
1944     case 2:
1945       x.array [1] ^= y.array [1];
1946       /* Fall through.  */
1947     case 1:
1948       x.array [0] ^= y.array [0];
1949       break;
1950     default:
1951       abort ();
1952     }
1953   return x;
1954 }
1955
1956 static const i386_operand_type anydisp = {
1957   .bitfield = { .disp8 = 1, .disp16 = 1, .disp32 = 1, .disp64 = 1 }
1958 };
1959
1960 enum operand_type
1961 {
1962   reg,
1963   imm,
1964   disp,
1965   anymem
1966 };
1967
1968 static INLINE int
1969 operand_type_check (i386_operand_type t, enum operand_type c)
1970 {
1971   switch (c)
1972     {
1973     case reg:
1974       return t.bitfield.class == Reg;
1975
1976     case imm:
1977       return (t.bitfield.imm8
1978               || t.bitfield.imm8s
1979               || t.bitfield.imm16
1980               || t.bitfield.imm32
1981               || t.bitfield.imm32s
1982               || t.bitfield.imm64);
1983
1984     case disp:
1985       return (t.bitfield.disp8
1986               || t.bitfield.disp16
1987               || t.bitfield.disp32
1988               || t.bitfield.disp64);
1989
1990     case anymem:
1991       return (t.bitfield.disp8
1992               || t.bitfield.disp16
1993               || t.bitfield.disp32
1994               || t.bitfield.disp64
1995               || t.bitfield.baseindex);
1996
1997     default:
1998       abort ();
1999     }
2000
2001   return 0;
2002 }
2003
2004 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2005    between operand GIVEN and opeand WANTED for instruction template T.  */
2006
2007 static INLINE int
2008 match_operand_size (const insn_template *t, unsigned int wanted,
2009                     unsigned int given)
2010 {
2011   return !((i.types[given].bitfield.byte
2012             && !t->operand_types[wanted].bitfield.byte)
2013            || (i.types[given].bitfield.word
2014                && !t->operand_types[wanted].bitfield.word)
2015            || (i.types[given].bitfield.dword
2016                && !t->operand_types[wanted].bitfield.dword)
2017            || (i.types[given].bitfield.qword
2018                && (!t->operand_types[wanted].bitfield.qword
2019                    /* Don't allow 64-bit (memory) operands outside of 64-bit
2020                       mode, when they're used where a 64-bit GPR could also
2021                       be used.  Checking is needed for Intel Syntax only.  */
2022                    || (intel_syntax
2023                        && flag_code != CODE_64BIT
2024                        && (t->operand_types[wanted].bitfield.class == Reg
2025                            || t->operand_types[wanted].bitfield.class == Accum
2026                            || t->opcode_modifier.isstring))))
2027            || (i.types[given].bitfield.tbyte
2028                && !t->operand_types[wanted].bitfield.tbyte));
2029 }
2030
2031 /* Return 1 if there is no conflict in SIMD register between operand
2032    GIVEN and opeand WANTED for instruction template T.  */
2033
2034 static INLINE int
2035 match_simd_size (const insn_template *t, unsigned int wanted,
2036                  unsigned int given)
2037 {
2038   return !((i.types[given].bitfield.xmmword
2039             && !t->operand_types[wanted].bitfield.xmmword)
2040            || (i.types[given].bitfield.ymmword
2041                && !t->operand_types[wanted].bitfield.ymmword)
2042            || (i.types[given].bitfield.zmmword
2043                && !t->operand_types[wanted].bitfield.zmmword)
2044            || (i.types[given].bitfield.tmmword
2045                && !t->operand_types[wanted].bitfield.tmmword));
2046 }
2047
2048 /* Return 1 if there is no conflict in any size between operand GIVEN
2049    and opeand WANTED for instruction template T.  */
2050
2051 static INLINE int
2052 match_mem_size (const insn_template *t, unsigned int wanted,
2053                 unsigned int given)
2054 {
2055   return (match_operand_size (t, wanted, given)
2056           && !((i.types[given].bitfield.unspecified
2057                 && !i.broadcast.type
2058                 && !i.broadcast.bytes
2059                 && !t->operand_types[wanted].bitfield.unspecified)
2060                || (i.types[given].bitfield.fword
2061                    && !t->operand_types[wanted].bitfield.fword)
2062                /* For scalar opcode templates to allow register and memory
2063                   operands at the same time, some special casing is needed
2064                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2065                   down-conversion vpmov*.  */
2066                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2067                     && t->operand_types[wanted].bitfield.byte
2068                        + t->operand_types[wanted].bitfield.word
2069                        + t->operand_types[wanted].bitfield.dword
2070                        + t->operand_types[wanted].bitfield.qword
2071                        > !!t->opcode_modifier.broadcast)
2072                    ? (i.types[given].bitfield.xmmword
2073                       || i.types[given].bitfield.ymmword
2074                       || i.types[given].bitfield.zmmword)
2075                    : !match_simd_size(t, wanted, given))));
2076 }
2077
2078 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2079    operands for instruction template T, and it has MATCH_REVERSE set if there
2080    is no size conflict on any operands for the template with operands reversed
2081    (and the template allows for reversing in the first place).  */
2082
2083 #define MATCH_STRAIGHT 1
2084 #define MATCH_REVERSE  2
2085
2086 static INLINE unsigned int
2087 operand_size_match (const insn_template *t)
2088 {
2089   unsigned int j, match = MATCH_STRAIGHT;
2090
2091   /* Don't check non-absolute jump instructions.  */
2092   if (t->opcode_modifier.jump
2093       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2094     return match;
2095
2096   /* Check memory and accumulator operand size.  */
2097   for (j = 0; j < i.operands; j++)
2098     {
2099       if (i.types[j].bitfield.class != Reg
2100           && i.types[j].bitfield.class != RegSIMD
2101           && t->opcode_modifier.operandconstraint == ANY_SIZE)
2102         continue;
2103
2104       if (t->operand_types[j].bitfield.class == Reg
2105           && !match_operand_size (t, j, j))
2106         {
2107           match = 0;
2108           break;
2109         }
2110
2111       if (t->operand_types[j].bitfield.class == RegSIMD
2112           && !match_simd_size (t, j, j))
2113         {
2114           match = 0;
2115           break;
2116         }
2117
2118       if (t->operand_types[j].bitfield.instance == Accum
2119           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2120         {
2121           match = 0;
2122           break;
2123         }
2124
2125       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2126         {
2127           match = 0;
2128           break;
2129         }
2130     }
2131
2132   if (!t->opcode_modifier.d)
2133     return match;
2134
2135   /* Check reverse.  */
2136   gas_assert ((i.operands >= 2 && i.operands <= 3)
2137               || t->opcode_modifier.vexsources);
2138
2139   for (j = 0; j < i.operands; j++)
2140     {
2141       unsigned int given = i.operands - j - 1;
2142
2143       /* For 4- and 5-operand insns VEX.W controls just the first two
2144          register operands.  */
2145       if (t->opcode_modifier.vexsources)
2146         given = j < 2 ? 1 - j : j;
2147
2148       if (t->operand_types[j].bitfield.class == Reg
2149           && !match_operand_size (t, j, given))
2150         return match;
2151
2152       if (t->operand_types[j].bitfield.class == RegSIMD
2153           && !match_simd_size (t, j, given))
2154         return match;
2155
2156       if (t->operand_types[j].bitfield.instance == Accum
2157           && (!match_operand_size (t, j, given)
2158               || !match_simd_size (t, j, given)))
2159         return match;
2160
2161       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2162         return match;
2163     }
2164
2165   return match | MATCH_REVERSE;
2166 }
2167
2168 static INLINE int
2169 operand_type_match (i386_operand_type overlap,
2170                     i386_operand_type given)
2171 {
2172   i386_operand_type temp = overlap;
2173
2174   temp.bitfield.unspecified = 0;
2175   temp.bitfield.byte = 0;
2176   temp.bitfield.word = 0;
2177   temp.bitfield.dword = 0;
2178   temp.bitfield.fword = 0;
2179   temp.bitfield.qword = 0;
2180   temp.bitfield.tbyte = 0;
2181   temp.bitfield.xmmword = 0;
2182   temp.bitfield.ymmword = 0;
2183   temp.bitfield.zmmword = 0;
2184   temp.bitfield.tmmword = 0;
2185   if (operand_type_all_zero (&temp))
2186     goto mismatch;
2187
2188   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2189     return 1;
2190
2191  mismatch:
2192   i.error = operand_type_mismatch;
2193   return 0;
2194 }
2195
2196 /* If given types g0 and g1 are registers they must be of the same type
2197    unless the expected operand type register overlap is null.
2198    Intel syntax sized memory operands are also checked here.  */
2199
2200 static INLINE int
2201 operand_type_register_match (i386_operand_type g0,
2202                              i386_operand_type t0,
2203                              i386_operand_type g1,
2204                              i386_operand_type t1)
2205 {
2206   if (g0.bitfield.class != Reg
2207       && g0.bitfield.class != RegSIMD
2208       && (g0.bitfield.unspecified
2209           || !operand_type_check (g0, anymem)))
2210     return 1;
2211
2212   if (g1.bitfield.class != Reg
2213       && g1.bitfield.class != RegSIMD
2214       && (g1.bitfield.unspecified
2215           || !operand_type_check (g1, anymem)))
2216     return 1;
2217
2218   if (g0.bitfield.byte == g1.bitfield.byte
2219       && g0.bitfield.word == g1.bitfield.word
2220       && g0.bitfield.dword == g1.bitfield.dword
2221       && g0.bitfield.qword == g1.bitfield.qword
2222       && g0.bitfield.xmmword == g1.bitfield.xmmword
2223       && g0.bitfield.ymmword == g1.bitfield.ymmword
2224       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2225     return 1;
2226
2227   /* If expectations overlap in no more than a single size, all is fine. */
2228   g0 = operand_type_and (t0, t1);
2229   if (g0.bitfield.byte
2230       + g0.bitfield.word
2231       + g0.bitfield.dword
2232       + g0.bitfield.qword
2233       + g0.bitfield.xmmword
2234       + g0.bitfield.ymmword
2235       + g0.bitfield.zmmword <= 1)
2236     return 1;
2237
2238   i.error = register_type_mismatch;
2239
2240   return 0;
2241 }
2242
2243 static INLINE unsigned int
2244 register_number (const reg_entry *r)
2245 {
2246   unsigned int nr = r->reg_num;
2247
2248   if (r->reg_flags & RegRex)
2249     nr += 8;
2250
2251   if (r->reg_flags & RegVRex)
2252     nr += 16;
2253
2254   return nr;
2255 }
2256
2257 static INLINE unsigned int
2258 mode_from_disp_size (i386_operand_type t)
2259 {
2260   if (t.bitfield.disp8)
2261     return 1;
2262   else if (t.bitfield.disp16
2263            || t.bitfield.disp32)
2264     return 2;
2265   else
2266     return 0;
2267 }
2268
2269 static INLINE int
2270 fits_in_signed_byte (addressT num)
2271 {
2272   return num + 0x80 <= 0xff;
2273 }
2274
2275 static INLINE int
2276 fits_in_unsigned_byte (addressT num)
2277 {
2278   return num <= 0xff;
2279 }
2280
2281 static INLINE int
2282 fits_in_unsigned_word (addressT num)
2283 {
2284   return num <= 0xffff;
2285 }
2286
2287 static INLINE int
2288 fits_in_signed_word (addressT num)
2289 {
2290   return num + 0x8000 <= 0xffff;
2291 }
2292
2293 static INLINE int
2294 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2295 {
2296 #ifndef BFD64
2297   return 1;
2298 #else
2299   return num + 0x80000000 <= 0xffffffff;
2300 #endif
2301 }                               /* fits_in_signed_long() */
2302
2303 static INLINE int
2304 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2305 {
2306 #ifndef BFD64
2307   return 1;
2308 #else
2309   return num <= 0xffffffff;
2310 #endif
2311 }                               /* fits_in_unsigned_long() */
2312
2313 static INLINE valueT extend_to_32bit_address (addressT num)
2314 {
2315 #ifdef BFD64
2316   if (fits_in_unsigned_long(num))
2317     return (num ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2318
2319   if (!fits_in_signed_long (num))
2320     return num & 0xffffffff;
2321 #endif
2322
2323   return num;
2324 }
2325
2326 static INLINE int
2327 fits_in_disp8 (offsetT num)
2328 {
2329   int shift = i.memshift;
2330   unsigned int mask;
2331
2332   if (shift == -1)
2333     abort ();
2334
2335   mask = (1 << shift) - 1;
2336
2337   /* Return 0 if NUM isn't properly aligned.  */
2338   if ((num & mask))
2339     return 0;
2340
2341   /* Check if NUM will fit in 8bit after shift.  */
2342   return fits_in_signed_byte (num >> shift);
2343 }
2344
2345 static INLINE int
2346 fits_in_imm4 (offsetT num)
2347 {
2348   return (num & 0xf) == num;
2349 }
2350
2351 static i386_operand_type
2352 smallest_imm_type (offsetT num)
2353 {
2354   i386_operand_type t;
2355
2356   operand_type_set (&t, 0);
2357   t.bitfield.imm64 = 1;
2358
2359   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2360     {
2361       /* This code is disabled on the 486 because all the Imm1 forms
2362          in the opcode table are slower on the i486.  They're the
2363          versions with the implicitly specified single-position
2364          displacement, which has another syntax if you really want to
2365          use that form.  */
2366       t.bitfield.imm1 = 1;
2367       t.bitfield.imm8 = 1;
2368       t.bitfield.imm8s = 1;
2369       t.bitfield.imm16 = 1;
2370       t.bitfield.imm32 = 1;
2371       t.bitfield.imm32s = 1;
2372     }
2373   else if (fits_in_signed_byte (num))
2374     {
2375       t.bitfield.imm8 = 1;
2376       t.bitfield.imm8s = 1;
2377       t.bitfield.imm16 = 1;
2378       t.bitfield.imm32 = 1;
2379       t.bitfield.imm32s = 1;
2380     }
2381   else if (fits_in_unsigned_byte (num))
2382     {
2383       t.bitfield.imm8 = 1;
2384       t.bitfield.imm16 = 1;
2385       t.bitfield.imm32 = 1;
2386       t.bitfield.imm32s = 1;
2387     }
2388   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2389     {
2390       t.bitfield.imm16 = 1;
2391       t.bitfield.imm32 = 1;
2392       t.bitfield.imm32s = 1;
2393     }
2394   else if (fits_in_signed_long (num))
2395     {
2396       t.bitfield.imm32 = 1;
2397       t.bitfield.imm32s = 1;
2398     }
2399   else if (fits_in_unsigned_long (num))
2400     t.bitfield.imm32 = 1;
2401
2402   return t;
2403 }
2404
2405 static offsetT
2406 offset_in_range (offsetT val, int size)
2407 {
2408   addressT mask;
2409
2410   switch (size)
2411     {
2412     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2413     case 2: mask = ((addressT) 1 << 16) - 1; break;
2414 #ifdef BFD64
2415     case 4: mask = ((addressT) 1 << 32) - 1; break;
2416 #endif
2417     case sizeof (val): return val;
2418     default: abort ();
2419     }
2420
2421   if ((val & ~mask) != 0 && (-val & ~mask) != 0)
2422     as_warn (_("0x%" PRIx64 " shortened to 0x%" PRIx64),
2423              (uint64_t) val, (uint64_t) (val & mask));
2424
2425   return val & mask;
2426 }
2427
2428 enum PREFIX_GROUP
2429 {
2430   PREFIX_EXIST = 0,
2431   PREFIX_LOCK,
2432   PREFIX_REP,
2433   PREFIX_DS,
2434   PREFIX_OTHER
2435 };
2436
2437 /* Returns
2438    a. PREFIX_EXIST if attempting to add a prefix where one from the
2439    same class already exists.
2440    b. PREFIX_LOCK if lock prefix is added.
2441    c. PREFIX_REP if rep/repne prefix is added.
2442    d. PREFIX_DS if ds prefix is added.
2443    e. PREFIX_OTHER if other prefix is added.
2444  */
2445
2446 static enum PREFIX_GROUP
2447 add_prefix (unsigned int prefix)
2448 {
2449   enum PREFIX_GROUP ret = PREFIX_OTHER;
2450   unsigned int q;
2451
2452   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2453       && flag_code == CODE_64BIT)
2454     {
2455       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2456           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2457           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2458           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2459         ret = PREFIX_EXIST;
2460       q = REX_PREFIX;
2461     }
2462   else
2463     {
2464       switch (prefix)
2465         {
2466         default:
2467           abort ();
2468
2469         case DS_PREFIX_OPCODE:
2470           ret = PREFIX_DS;
2471           /* Fall through.  */
2472         case CS_PREFIX_OPCODE:
2473         case ES_PREFIX_OPCODE:
2474         case FS_PREFIX_OPCODE:
2475         case GS_PREFIX_OPCODE:
2476         case SS_PREFIX_OPCODE:
2477           q = SEG_PREFIX;
2478           break;
2479
2480         case REPNE_PREFIX_OPCODE:
2481         case REPE_PREFIX_OPCODE:
2482           q = REP_PREFIX;
2483           ret = PREFIX_REP;
2484           break;
2485
2486         case LOCK_PREFIX_OPCODE:
2487           q = LOCK_PREFIX;
2488           ret = PREFIX_LOCK;
2489           break;
2490
2491         case FWAIT_OPCODE:
2492           q = WAIT_PREFIX;
2493           break;
2494
2495         case ADDR_PREFIX_OPCODE:
2496           q = ADDR_PREFIX;
2497           break;
2498
2499         case DATA_PREFIX_OPCODE:
2500           q = DATA_PREFIX;
2501           break;
2502         }
2503       if (i.prefix[q] != 0)
2504         ret = PREFIX_EXIST;
2505     }
2506
2507   if (ret)
2508     {
2509       if (!i.prefix[q])
2510         ++i.prefixes;
2511       i.prefix[q] |= prefix;
2512     }
2513   else
2514     as_bad (_("same type of prefix used twice"));
2515
2516   return ret;
2517 }
2518
2519 static void
2520 update_code_flag (int value, int check)
2521 {
2522   PRINTF_LIKE ((*as_error));
2523
2524   flag_code = (enum flag_code) value;
2525   if (flag_code == CODE_64BIT)
2526     {
2527       cpu_arch_flags.bitfield.cpu64 = 1;
2528       cpu_arch_flags.bitfield.cpuno64 = 0;
2529     }
2530   else
2531     {
2532       cpu_arch_flags.bitfield.cpu64 = 0;
2533       cpu_arch_flags.bitfield.cpuno64 = 1;
2534     }
2535   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
2536     {
2537       if (check)
2538         as_error = as_fatal;
2539       else
2540         as_error = as_bad;
2541       (*as_error) (_("64bit mode not supported on `%s'."),
2542                    cpu_arch_name ? cpu_arch_name : default_arch);
2543     }
2544   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2545     {
2546       if (check)
2547         as_error = as_fatal;
2548       else
2549         as_error = as_bad;
2550       (*as_error) (_("32bit mode not supported on `%s'."),
2551                    cpu_arch_name ? cpu_arch_name : default_arch);
2552     }
2553   stackop_size = '\0';
2554 }
2555
2556 static void
2557 set_code_flag (int value)
2558 {
2559   update_code_flag (value, 0);
2560 }
2561
2562 static void
2563 set_16bit_gcc_code_flag (int new_code_flag)
2564 {
2565   flag_code = (enum flag_code) new_code_flag;
2566   if (flag_code != CODE_16BIT)
2567     abort ();
2568   cpu_arch_flags.bitfield.cpu64 = 0;
2569   cpu_arch_flags.bitfield.cpuno64 = 1;
2570   stackop_size = LONG_MNEM_SUFFIX;
2571 }
2572
2573 static void
2574 set_intel_syntax (int syntax_flag)
2575 {
2576   /* Find out if register prefixing is specified.  */
2577   int ask_naked_reg = 0;
2578
2579   SKIP_WHITESPACE ();
2580   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2581     {
2582       char *string;
2583       int e = get_symbol_name (&string);
2584
2585       if (strcmp (string, "prefix") == 0)
2586         ask_naked_reg = 1;
2587       else if (strcmp (string, "noprefix") == 0)
2588         ask_naked_reg = -1;
2589       else
2590         as_bad (_("bad argument to syntax directive."));
2591       (void) restore_line_pointer (e);
2592     }
2593   demand_empty_rest_of_line ();
2594
2595   intel_syntax = syntax_flag;
2596
2597   if (ask_naked_reg == 0)
2598     allow_naked_reg = (intel_syntax
2599                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2600   else
2601     allow_naked_reg = (ask_naked_reg < 0);
2602
2603   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2604
2605   identifier_chars['%'] = intel_syntax && allow_naked_reg ? '%' : 0;
2606   identifier_chars['$'] = intel_syntax ? '$' : 0;
2607   register_prefix = allow_naked_reg ? "" : "%";
2608 }
2609
2610 static void
2611 set_intel_mnemonic (int mnemonic_flag)
2612 {
2613   intel_mnemonic = mnemonic_flag;
2614 }
2615
2616 static void
2617 set_allow_index_reg (int flag)
2618 {
2619   allow_index_reg = flag;
2620 }
2621
2622 static void
2623 set_check (int what)
2624 {
2625   enum check_kind *kind;
2626   const char *str;
2627
2628   if (what)
2629     {
2630       kind = &operand_check;
2631       str = "operand";
2632     }
2633   else
2634     {
2635       kind = &sse_check;
2636       str = "sse";
2637     }
2638
2639   SKIP_WHITESPACE ();
2640
2641   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2642     {
2643       char *string;
2644       int e = get_symbol_name (&string);
2645
2646       if (strcmp (string, "none") == 0)
2647         *kind = check_none;
2648       else if (strcmp (string, "warning") == 0)
2649         *kind = check_warning;
2650       else if (strcmp (string, "error") == 0)
2651         *kind = check_error;
2652       else
2653         as_bad (_("bad argument to %s_check directive."), str);
2654       (void) restore_line_pointer (e);
2655     }
2656   else
2657     as_bad (_("missing argument for %s_check directive"), str);
2658
2659   demand_empty_rest_of_line ();
2660 }
2661
2662 static void
2663 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2664                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2665 {
2666 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2667   static const char *arch;
2668
2669   /* Intel MCU is only supported on ELF.  */
2670   if (!IS_ELF)
2671     return;
2672
2673   if (!arch)
2674     {
2675       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2676          use default_arch.  */
2677       arch = cpu_arch_name;
2678       if (!arch)
2679         arch = default_arch;
2680     }
2681
2682   /* If we are targeting Intel MCU, we must enable it.  */
2683   if ((get_elf_backend_data (stdoutput)->elf_machine_code == EM_IAMCU)
2684       == new_flag.bitfield.cpuiamcu)
2685     return;
2686
2687   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2688 #endif
2689 }
2690
2691 static void
2692 extend_cpu_sub_arch_name (const char *name)
2693 {
2694   if (cpu_sub_arch_name)
2695     cpu_sub_arch_name = reconcat (cpu_sub_arch_name, cpu_sub_arch_name,
2696                                   ".", name, (const char *) NULL);
2697   else
2698     cpu_sub_arch_name = concat (".", name, (const char *) NULL);
2699 }
2700
2701 static void
2702 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2703 {
2704   typedef struct arch_stack_entry
2705   {
2706     const struct arch_stack_entry *prev;
2707     const char *name;
2708     char *sub_name;
2709     i386_cpu_flags flags;
2710     i386_cpu_flags isa_flags;
2711     enum processor_type isa;
2712     enum flag_code flag_code;
2713     char stackop_size;
2714     bool no_cond_jump_promotion;
2715   } arch_stack_entry;
2716   static const arch_stack_entry *arch_stack_top;
2717
2718   SKIP_WHITESPACE ();
2719
2720   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2721     {
2722       char *s;
2723       int e = get_symbol_name (&s);
2724       const char *string = s;
2725       unsigned int j = 0;
2726       i386_cpu_flags flags;
2727
2728       if (strcmp (string, "default") == 0)
2729         {
2730           if (strcmp (default_arch, "iamcu") == 0)
2731             string = default_arch;
2732           else
2733             {
2734               static const i386_cpu_flags cpu_unknown_flags = CPU_UNKNOWN_FLAGS;
2735
2736               cpu_arch_name = NULL;
2737               free (cpu_sub_arch_name);
2738               cpu_sub_arch_name = NULL;
2739               cpu_arch_flags = cpu_unknown_flags;
2740               if (flag_code == CODE_64BIT)
2741                 {
2742                   cpu_arch_flags.bitfield.cpu64 = 1;
2743                   cpu_arch_flags.bitfield.cpuno64 = 0;
2744                 }
2745               else
2746                 {
2747                   cpu_arch_flags.bitfield.cpu64 = 0;
2748                   cpu_arch_flags.bitfield.cpuno64 = 1;
2749                 }
2750               cpu_arch_isa = PROCESSOR_UNKNOWN;
2751               cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
2752               if (!cpu_arch_tune_set)
2753                 {
2754                   cpu_arch_tune = cpu_arch_isa;
2755                   cpu_arch_tune_flags = cpu_arch_isa_flags;
2756                 }
2757
2758               j = ARRAY_SIZE (cpu_arch) + 1;
2759             }
2760         }
2761       else if (strcmp (string, "push") == 0)
2762         {
2763           arch_stack_entry *top = XNEW (arch_stack_entry);
2764
2765           top->name = cpu_arch_name;
2766           if (cpu_sub_arch_name)
2767             top->sub_name = xstrdup (cpu_sub_arch_name);
2768           else
2769             top->sub_name = NULL;
2770           top->flags = cpu_arch_flags;
2771           top->isa = cpu_arch_isa;
2772           top->isa_flags = cpu_arch_isa_flags;
2773           top->flag_code = flag_code;
2774           top->stackop_size = stackop_size;
2775           top->no_cond_jump_promotion = no_cond_jump_promotion;
2776
2777           top->prev = arch_stack_top;
2778           arch_stack_top = top;
2779
2780           (void) restore_line_pointer (e);
2781           demand_empty_rest_of_line ();
2782           return;
2783         }
2784       else if (strcmp (string, "pop") == 0)
2785         {
2786           const arch_stack_entry *top = arch_stack_top;
2787
2788           if (!top)
2789             as_bad (_(".arch stack is empty"));
2790           else if (top->flag_code != flag_code
2791                    || top->stackop_size != stackop_size)
2792             {
2793               static const unsigned int bits[] = {
2794                 [CODE_16BIT] = 16,
2795                 [CODE_32BIT] = 32,
2796                 [CODE_64BIT] = 64,
2797               };
2798
2799               as_bad (_("this `.arch pop' requires `.code%u%s' to be in effect"),
2800                       bits[top->flag_code],
2801                       top->stackop_size == LONG_MNEM_SUFFIX ? "gcc" : "");
2802             }
2803           else
2804             {
2805               arch_stack_top = top->prev;
2806
2807               cpu_arch_name = top->name;
2808               free (cpu_sub_arch_name);
2809               cpu_sub_arch_name = top->sub_name;
2810               cpu_arch_flags = top->flags;
2811               cpu_arch_isa = top->isa;
2812               cpu_arch_isa_flags = top->isa_flags;
2813               no_cond_jump_promotion = top->no_cond_jump_promotion;
2814
2815               XDELETE (top);
2816             }
2817
2818           (void) restore_line_pointer (e);
2819           demand_empty_rest_of_line ();
2820           return;
2821         }
2822
2823       for (; j < ARRAY_SIZE (cpu_arch); j++)
2824         {
2825           if (strcmp (string + (*string == '.'), cpu_arch[j].name) == 0
2826              && (*string == '.') == (cpu_arch[j].type == PROCESSOR_NONE))
2827             {
2828               if (*string != '.')
2829                 {
2830                   check_cpu_arch_compatible (string, cpu_arch[j].enable);
2831
2832                   cpu_arch_name = cpu_arch[j].name;
2833                   free (cpu_sub_arch_name);
2834                   cpu_sub_arch_name = NULL;
2835                   cpu_arch_flags = cpu_arch[j].enable;
2836                   if (flag_code == CODE_64BIT)
2837                     {
2838                       cpu_arch_flags.bitfield.cpu64 = 1;
2839                       cpu_arch_flags.bitfield.cpuno64 = 0;
2840                     }
2841                   else
2842                     {
2843                       cpu_arch_flags.bitfield.cpu64 = 0;
2844                       cpu_arch_flags.bitfield.cpuno64 = 1;
2845                     }
2846                   cpu_arch_isa = cpu_arch[j].type;
2847                   cpu_arch_isa_flags = cpu_arch[j].enable;
2848                   if (!cpu_arch_tune_set)
2849                     {
2850                       cpu_arch_tune = cpu_arch_isa;
2851                       cpu_arch_tune_flags = cpu_arch_isa_flags;
2852                     }
2853                   pre_386_16bit_warned = false;
2854                   break;
2855                 }
2856
2857               if (cpu_flags_all_zero (&cpu_arch[j].enable))
2858                 continue;
2859
2860               flags = cpu_flags_or (cpu_arch_flags,
2861                                     cpu_arch[j].enable);
2862
2863               if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2864                 {
2865                   extend_cpu_sub_arch_name (string + 1);
2866                   cpu_arch_flags = flags;
2867                   cpu_arch_isa_flags = flags;
2868                 }
2869               else
2870                 cpu_arch_isa_flags
2871                   = cpu_flags_or (cpu_arch_isa_flags,
2872                                   cpu_arch[j].enable);
2873               (void) restore_line_pointer (e);
2874               demand_empty_rest_of_line ();
2875               return;
2876             }
2877         }
2878
2879       if (startswith (string, ".no") && j >= ARRAY_SIZE (cpu_arch))
2880         {
2881           /* Disable an ISA extension.  */
2882           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
2883             if (cpu_arch[j].type == PROCESSOR_NONE
2884                 && strcmp (string + 3, cpu_arch[j].name) == 0)
2885               {
2886                 flags = cpu_flags_and_not (cpu_arch_flags,
2887                                            cpu_arch[j].disable);
2888                 if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2889                   {
2890                     extend_cpu_sub_arch_name (string + 1);
2891                     cpu_arch_flags = flags;
2892                     cpu_arch_isa_flags = flags;
2893                   }
2894                 (void) restore_line_pointer (e);
2895                 demand_empty_rest_of_line ();
2896                 return;
2897               }
2898         }
2899
2900       if (j == ARRAY_SIZE (cpu_arch))
2901         as_bad (_("no such architecture: `%s'"), string);
2902
2903       *input_line_pointer = e;
2904     }
2905   else
2906     as_bad (_("missing cpu architecture"));
2907
2908   no_cond_jump_promotion = 0;
2909   if (*input_line_pointer == ','
2910       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
2911     {
2912       char *string;
2913       char e;
2914
2915       ++input_line_pointer;
2916       e = get_symbol_name (&string);
2917
2918       if (strcmp (string, "nojumps") == 0)
2919         no_cond_jump_promotion = 1;
2920       else if (strcmp (string, "jumps") == 0)
2921         ;
2922       else
2923         as_bad (_("no such architecture modifier: `%s'"), string);
2924
2925       (void) restore_line_pointer (e);
2926     }
2927
2928   demand_empty_rest_of_line ();
2929 }
2930
2931 enum bfd_architecture
2932 i386_arch (void)
2933 {
2934   if (cpu_arch_isa == PROCESSOR_IAMCU)
2935     {
2936       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2937           || flag_code == CODE_64BIT)
2938         as_fatal (_("Intel MCU is 32bit ELF only"));
2939       return bfd_arch_iamcu;
2940     }
2941   else
2942     return bfd_arch_i386;
2943 }
2944
2945 unsigned long
2946 i386_mach (void)
2947 {
2948   if (startswith (default_arch, "x86_64"))
2949     {
2950       if (default_arch[6] == '\0')
2951         return bfd_mach_x86_64;
2952       else
2953         return bfd_mach_x64_32;
2954     }
2955   else if (!strcmp (default_arch, "i386")
2956            || !strcmp (default_arch, "iamcu"))
2957     {
2958       if (cpu_arch_isa == PROCESSOR_IAMCU)
2959         {
2960           if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
2961             as_fatal (_("Intel MCU is 32bit ELF only"));
2962           return bfd_mach_i386_iamcu;
2963         }
2964       else
2965         return bfd_mach_i386_i386;
2966     }
2967   else
2968     as_fatal (_("unknown architecture"));
2969 }
2970 \f
2971 #include "opcodes/i386-tbl.h"
2972
2973 void
2974 md_begin (void)
2975 {
2976   /* Support pseudo prefixes like {disp32}.  */
2977   lex_type ['{'] = LEX_BEGIN_NAME;
2978
2979   /* Initialize op_hash hash table.  */
2980   op_hash = str_htab_create ();
2981
2982   {
2983     const insn_template *const *sets = i386_op_sets;
2984     const insn_template *const *end = sets + ARRAY_SIZE (i386_op_sets) - 1;
2985
2986     /* Type checks to compensate for the conversion through void * which
2987        occurs during hash table insertion / lookup.  */
2988     (void) sizeof (sets == &current_templates->start);
2989     (void) sizeof (end == &current_templates->end);
2990     for (; sets < end; ++sets)
2991       if (str_hash_insert (op_hash, (*sets)->name, sets, 0))
2992         as_fatal (_("duplicate %s"), (*sets)->name);
2993   }
2994
2995   /* Initialize reg_hash hash table.  */
2996   reg_hash = str_htab_create ();
2997   {
2998     const reg_entry *regtab;
2999     unsigned int regtab_size = i386_regtab_size;
3000
3001     for (regtab = i386_regtab; regtab_size--; regtab++)
3002       {
3003         switch (regtab->reg_type.bitfield.class)
3004           {
3005           case Reg:
3006             if (regtab->reg_type.bitfield.dword)
3007               {
3008                 if (regtab->reg_type.bitfield.instance == Accum)
3009                   reg_eax = regtab;
3010               }
3011             else if (regtab->reg_type.bitfield.tbyte)
3012               {
3013                 /* There's no point inserting st(<N>) in the hash table, as
3014                    parentheses aren't included in register_chars[] anyway.  */
3015                 if (regtab->reg_type.bitfield.instance != Accum)
3016                   continue;
3017                 reg_st0 = regtab;
3018               }
3019             break;
3020
3021           case SReg:
3022             switch (regtab->reg_num)
3023               {
3024               case 0: reg_es = regtab; break;
3025               case 2: reg_ss = regtab; break;
3026               case 3: reg_ds = regtab; break;
3027               }
3028             break;
3029
3030           case RegMask:
3031             if (!regtab->reg_num)
3032               reg_k0 = regtab;
3033             break;
3034           }
3035
3036         if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3037           as_fatal (_("duplicate %s"), regtab->reg_name);
3038       }
3039   }
3040
3041   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3042   {
3043     int c;
3044     char *p;
3045
3046     for (c = 0; c < 256; c++)
3047       {
3048         if (ISDIGIT (c) || ISLOWER (c))
3049           {
3050             mnemonic_chars[c] = c;
3051             register_chars[c] = c;
3052             operand_chars[c] = c;
3053           }
3054         else if (ISUPPER (c))
3055           {
3056             mnemonic_chars[c] = TOLOWER (c);
3057             register_chars[c] = mnemonic_chars[c];
3058             operand_chars[c] = c;
3059           }
3060         else if (c == '{' || c == '}')
3061           {
3062             mnemonic_chars[c] = c;
3063             operand_chars[c] = c;
3064           }
3065 #ifdef SVR4_COMMENT_CHARS
3066         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3067           operand_chars[c] = c;
3068 #endif
3069
3070         if (ISALPHA (c) || ISDIGIT (c))
3071           identifier_chars[c] = c;
3072         else if (c >= 128)
3073           {
3074             identifier_chars[c] = c;
3075             operand_chars[c] = c;
3076           }
3077       }
3078
3079 #ifdef LEX_AT
3080     identifier_chars['@'] = '@';
3081 #endif
3082 #ifdef LEX_QM
3083     identifier_chars['?'] = '?';
3084     operand_chars['?'] = '?';
3085 #endif
3086     mnemonic_chars['_'] = '_';
3087     mnemonic_chars['-'] = '-';
3088     mnemonic_chars['.'] = '.';
3089     identifier_chars['_'] = '_';
3090     identifier_chars['.'] = '.';
3091
3092     for (p = operand_special_chars; *p != '\0'; p++)
3093       operand_chars[(unsigned char) *p] = *p;
3094   }
3095
3096   if (flag_code == CODE_64BIT)
3097     {
3098 #if defined (OBJ_COFF) && defined (TE_PE)
3099       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3100                                   ? 32 : 16);
3101 #else
3102       x86_dwarf2_return_column = 16;
3103 #endif
3104       x86_cie_data_alignment = -8;
3105 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3106       x86_sframe_cfa_sp_reg = 7;
3107       x86_sframe_cfa_fp_reg = 6;
3108 #endif
3109     }
3110   else
3111     {
3112       x86_dwarf2_return_column = 8;
3113       x86_cie_data_alignment = -4;
3114     }
3115
3116   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3117      can be turned into BRANCH_PREFIX frag.  */
3118   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3119     abort ();
3120 }
3121
3122 void
3123 i386_print_statistics (FILE *file)
3124 {
3125   htab_print_statistics (file, "i386 opcode", op_hash);
3126   htab_print_statistics (file, "i386 register", reg_hash);
3127 }
3128
3129 void
3130 i386_md_end (void)
3131 {
3132   htab_delete (op_hash);
3133   htab_delete (reg_hash);
3134 }
3135 \f
3136 #ifdef DEBUG386
3137
3138 /* Debugging routines for md_assemble.  */
3139 static void pte (insn_template *);
3140 static void pt (i386_operand_type);
3141 static void pe (expressionS *);
3142 static void ps (symbolS *);
3143
3144 static void
3145 pi (const char *line, i386_insn *x)
3146 {
3147   unsigned int j;
3148
3149   fprintf (stdout, "%s: template ", line);
3150   pte (&x->tm);
3151   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3152            x->base_reg ? x->base_reg->reg_name : "none",
3153            x->index_reg ? x->index_reg->reg_name : "none",
3154            x->log2_scale_factor);
3155   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3156            x->rm.mode, x->rm.reg, x->rm.regmem);
3157   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3158            x->sib.base, x->sib.index, x->sib.scale);
3159   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3160            (x->rex & REX_W) != 0,
3161            (x->rex & REX_R) != 0,
3162            (x->rex & REX_X) != 0,
3163            (x->rex & REX_B) != 0);
3164   for (j = 0; j < x->operands; j++)
3165     {
3166       fprintf (stdout, "    #%d:  ", j + 1);
3167       pt (x->types[j]);
3168       fprintf (stdout, "\n");
3169       if (x->types[j].bitfield.class == Reg
3170           || x->types[j].bitfield.class == RegMMX
3171           || x->types[j].bitfield.class == RegSIMD
3172           || x->types[j].bitfield.class == RegMask
3173           || x->types[j].bitfield.class == SReg
3174           || x->types[j].bitfield.class == RegCR
3175           || x->types[j].bitfield.class == RegDR
3176           || x->types[j].bitfield.class == RegTR
3177           || x->types[j].bitfield.class == RegBND)
3178         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3179       if (operand_type_check (x->types[j], imm))
3180         pe (x->op[j].imms);
3181       if (operand_type_check (x->types[j], disp))
3182         pe (x->op[j].disps);
3183     }
3184 }
3185
3186 static void
3187 pte (insn_template *t)
3188 {
3189   static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
3190   static const char *const opc_spc[] = {
3191     NULL, "0f", "0f38", "0f3a", NULL, "evexmap5", "evexmap6", NULL,
3192     "XOP08", "XOP09", "XOP0A",
3193   };
3194   unsigned int j;
3195
3196   fprintf (stdout, " %d operands ", t->operands);
3197   if (opc_pfx[t->opcode_modifier.opcodeprefix])
3198     fprintf (stdout, "pfx %x ", opc_pfx[t->opcode_modifier.opcodeprefix]);
3199   if (opc_spc[t->opcode_modifier.opcodespace])
3200     fprintf (stdout, "space %s ", opc_spc[t->opcode_modifier.opcodespace]);
3201   fprintf (stdout, "opcode %x ", t->base_opcode);
3202   if (t->extension_opcode != None)
3203     fprintf (stdout, "ext %x ", t->extension_opcode);
3204   if (t->opcode_modifier.d)
3205     fprintf (stdout, "D");
3206   if (t->opcode_modifier.w)
3207     fprintf (stdout, "W");
3208   fprintf (stdout, "\n");
3209   for (j = 0; j < t->operands; j++)
3210     {
3211       fprintf (stdout, "    #%d type ", j + 1);
3212       pt (t->operand_types[j]);
3213       fprintf (stdout, "\n");
3214     }
3215 }
3216
3217 static void
3218 pe (expressionS *e)
3219 {
3220   fprintf (stdout, "    operation     %d\n", e->X_op);
3221   fprintf (stdout, "    add_number    %" PRId64 " (%" PRIx64 ")\n",
3222            (int64_t) e->X_add_number, (uint64_t) (valueT) e->X_add_number);
3223   if (e->X_add_symbol)
3224     {
3225       fprintf (stdout, "    add_symbol    ");
3226       ps (e->X_add_symbol);
3227       fprintf (stdout, "\n");
3228     }
3229   if (e->X_op_symbol)
3230     {
3231       fprintf (stdout, "    op_symbol    ");
3232       ps (e->X_op_symbol);
3233       fprintf (stdout, "\n");
3234     }
3235 }
3236
3237 static void
3238 ps (symbolS *s)
3239 {
3240   fprintf (stdout, "%s type %s%s",
3241            S_GET_NAME (s),
3242            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3243            segment_name (S_GET_SEGMENT (s)));
3244 }
3245
3246 static struct type_name
3247   {
3248     i386_operand_type mask;
3249     const char *name;
3250   }
3251 const type_names[] =
3252 {
3253   { { .bitfield = { .class = Reg, .byte = 1 } }, "r8" },
3254   { { .bitfield = { .class = Reg, .word = 1 } }, "r16" },
3255   { { .bitfield = { .class = Reg, .dword = 1 } }, "r32" },
3256   { { .bitfield = { .class = Reg, .qword = 1 } }, "r64" },
3257   { { .bitfield = { .instance = Accum, .byte = 1 } }, "acc8" },
3258   { { .bitfield = { .instance = Accum, .word = 1 } }, "acc16" },
3259   { { .bitfield = { .instance = Accum, .dword = 1 } }, "acc32" },
3260   { { .bitfield = { .instance = Accum, .qword = 1 } }, "acc64" },
3261   { { .bitfield = { .imm8 = 1 } }, "i8" },
3262   { { .bitfield = { .imm8s = 1 } }, "i8s" },
3263   { { .bitfield = { .imm16 = 1 } }, "i16" },
3264   { { .bitfield = { .imm32 = 1 } }, "i32" },
3265   { { .bitfield = { .imm32s = 1 } }, "i32s" },
3266   { { .bitfield = { .imm64 = 1 } }, "i64" },
3267   { { .bitfield = { .imm1 = 1 } }, "i1" },
3268   { { .bitfield = { .baseindex = 1 } }, "BaseIndex" },
3269   { { .bitfield = { .disp8 = 1 } }, "d8" },
3270   { { .bitfield = { .disp16 = 1 } }, "d16" },
3271   { { .bitfield = { .disp32 = 1 } }, "d32" },
3272   { { .bitfield = { .disp64 = 1 } }, "d64" },
3273   { { .bitfield = { .instance = RegD, .word = 1 } }, "InOutPortReg" },
3274   { { .bitfield = { .instance = RegC, .byte = 1 } }, "ShiftCount" },
3275   { { .bitfield = { .class = RegCR } }, "control reg" },
3276   { { .bitfield = { .class = RegTR } }, "test reg" },
3277   { { .bitfield = { .class = RegDR } }, "debug reg" },
3278   { { .bitfield = { .class = Reg, .tbyte = 1 } }, "FReg" },
3279   { { .bitfield = { .instance = Accum, .tbyte = 1 } }, "FAcc" },
3280   { { .bitfield = { .class = SReg } }, "SReg" },
3281   { { .bitfield = { .class = RegMMX } }, "rMMX" },
3282   { { .bitfield = { .class = RegSIMD, .xmmword = 1 } }, "rXMM" },
3283   { { .bitfield = { .class = RegSIMD, .ymmword = 1 } }, "rYMM" },
3284   { { .bitfield = { .class = RegSIMD, .zmmword = 1 } }, "rZMM" },
3285   { { .bitfield = { .class = RegSIMD, .tmmword = 1 } }, "rTMM" },
3286   { { .bitfield = { .class = RegMask } }, "Mask reg" },
3287 };
3288
3289 static void
3290 pt (i386_operand_type t)
3291 {
3292   unsigned int j;
3293   i386_operand_type a;
3294
3295   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3296     {
3297       a = operand_type_and (t, type_names[j].mask);
3298       if (operand_type_equal (&a, &type_names[j].mask))
3299         fprintf (stdout, "%s, ",  type_names[j].name);
3300     }
3301   fflush (stdout);
3302 }
3303
3304 #endif /* DEBUG386 */
3305 \f
3306 static bfd_reloc_code_real_type
3307 reloc (unsigned int size,
3308        int pcrel,
3309        int sign,
3310        bfd_reloc_code_real_type other)
3311 {
3312   if (other != NO_RELOC)
3313     {
3314       reloc_howto_type *rel;
3315
3316       if (size == 8)
3317         switch (other)
3318           {
3319           case BFD_RELOC_X86_64_GOT32:
3320             return BFD_RELOC_X86_64_GOT64;
3321             break;
3322           case BFD_RELOC_X86_64_GOTPLT64:
3323             return BFD_RELOC_X86_64_GOTPLT64;
3324             break;
3325           case BFD_RELOC_X86_64_PLTOFF64:
3326             return BFD_RELOC_X86_64_PLTOFF64;
3327             break;
3328           case BFD_RELOC_X86_64_GOTPC32:
3329             other = BFD_RELOC_X86_64_GOTPC64;
3330             break;
3331           case BFD_RELOC_X86_64_GOTPCREL:
3332             other = BFD_RELOC_X86_64_GOTPCREL64;
3333             break;
3334           case BFD_RELOC_X86_64_TPOFF32:
3335             other = BFD_RELOC_X86_64_TPOFF64;
3336             break;
3337           case BFD_RELOC_X86_64_DTPOFF32:
3338             other = BFD_RELOC_X86_64_DTPOFF64;
3339             break;
3340           default:
3341             break;
3342           }
3343
3344 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3345       if (other == BFD_RELOC_SIZE32)
3346         {
3347           if (size == 8)
3348             other = BFD_RELOC_SIZE64;
3349           if (pcrel)
3350             {
3351               as_bad (_("there are no pc-relative size relocations"));
3352               return NO_RELOC;
3353             }
3354         }
3355 #endif
3356
3357       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3358       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3359         sign = -1;
3360
3361       rel = bfd_reloc_type_lookup (stdoutput, other);
3362       if (!rel)
3363         as_bad (_("unknown relocation (%u)"), other);
3364       else if (size != bfd_get_reloc_size (rel))
3365         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3366                 bfd_get_reloc_size (rel),
3367                 size);
3368       else if (pcrel && !rel->pc_relative)
3369         as_bad (_("non-pc-relative relocation for pc-relative field"));
3370       else if ((rel->complain_on_overflow == complain_overflow_signed
3371                 && !sign)
3372                || (rel->complain_on_overflow == complain_overflow_unsigned
3373                    && sign > 0))
3374         as_bad (_("relocated field and relocation type differ in signedness"));
3375       else
3376         return other;
3377       return NO_RELOC;
3378     }
3379
3380   if (pcrel)
3381     {
3382       if (!sign)
3383         as_bad (_("there are no unsigned pc-relative relocations"));
3384       switch (size)
3385         {
3386         case 1: return BFD_RELOC_8_PCREL;
3387         case 2: return BFD_RELOC_16_PCREL;
3388         case 4: return BFD_RELOC_32_PCREL;
3389         case 8: return BFD_RELOC_64_PCREL;
3390         }
3391       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3392     }
3393   else
3394     {
3395       if (sign > 0)
3396         switch (size)
3397           {
3398           case 4: return BFD_RELOC_X86_64_32S;
3399           }
3400       else
3401         switch (size)
3402           {
3403           case 1: return BFD_RELOC_8;
3404           case 2: return BFD_RELOC_16;
3405           case 4: return BFD_RELOC_32;
3406           case 8: return BFD_RELOC_64;
3407           }
3408       as_bad (_("cannot do %s %u byte relocation"),
3409               sign > 0 ? "signed" : "unsigned", size);
3410     }
3411
3412   return NO_RELOC;
3413 }
3414
3415 /* Here we decide which fixups can be adjusted to make them relative to
3416    the beginning of the section instead of the symbol.  Basically we need
3417    to make sure that the dynamic relocations are done correctly, so in
3418    some cases we force the original symbol to be used.  */
3419
3420 int
3421 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3422 {
3423 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3424   if (!IS_ELF)
3425     return 1;
3426
3427   /* Don't adjust pc-relative references to merge sections in 64-bit
3428      mode.  */
3429   if (use_rela_relocations
3430       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3431       && fixP->fx_pcrel)
3432     return 0;
3433
3434   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3435      and changed later by validate_fix.  */
3436   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3437       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3438     return 0;
3439
3440   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3441      for size relocations.  */
3442   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3443       || fixP->fx_r_type == BFD_RELOC_SIZE64
3444       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3445       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3446       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3447       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3448       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3449       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3450       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3451       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3452       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3453       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3454       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3455       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3456       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3457       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3458       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3459       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3460       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3461       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3462       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3463       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3464       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3465       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3466       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3467       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3468       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3469       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3470       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3471       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3472       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3473     return 0;
3474 #endif
3475   return 1;
3476 }
3477
3478 static INLINE bool
3479 want_disp32 (const insn_template *t)
3480 {
3481   return flag_code != CODE_64BIT
3482          || i.prefix[ADDR_PREFIX]
3483          || (t->base_opcode == 0x8d
3484              && t->opcode_modifier.opcodespace == SPACE_BASE
3485              && (!i.types[1].bitfield.qword
3486                 || t->opcode_modifier.size == SIZE32));
3487 }
3488
3489 static int
3490 intel_float_operand (const char *mnemonic)
3491 {
3492   /* Note that the value returned is meaningful only for opcodes with (memory)
3493      operands, hence the code here is free to improperly handle opcodes that
3494      have no operands (for better performance and smaller code). */
3495
3496   if (mnemonic[0] != 'f')
3497     return 0; /* non-math */
3498
3499   switch (mnemonic[1])
3500     {
3501     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3502        the fs segment override prefix not currently handled because no
3503        call path can make opcodes without operands get here */
3504     case 'i':
3505       return 2 /* integer op */;
3506     case 'l':
3507       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3508         return 3; /* fldcw/fldenv */
3509       break;
3510     case 'n':
3511       if (mnemonic[2] != 'o' /* fnop */)
3512         return 3; /* non-waiting control op */
3513       break;
3514     case 'r':
3515       if (mnemonic[2] == 's')
3516         return 3; /* frstor/frstpm */
3517       break;
3518     case 's':
3519       if (mnemonic[2] == 'a')
3520         return 3; /* fsave */
3521       if (mnemonic[2] == 't')
3522         {
3523           switch (mnemonic[3])
3524             {
3525             case 'c': /* fstcw */
3526             case 'd': /* fstdw */
3527             case 'e': /* fstenv */
3528             case 's': /* fsts[gw] */
3529               return 3;
3530             }
3531         }
3532       break;
3533     case 'x':
3534       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3535         return 0; /* fxsave/fxrstor are not really math ops */
3536       break;
3537     }
3538
3539   return 1;
3540 }
3541
3542 static INLINE void
3543 install_template (const insn_template *t)
3544 {
3545   unsigned int l;
3546
3547   i.tm = *t;
3548
3549   /* Note that for pseudo prefixes this produces a length of 1. But for them
3550      the length isn't interesting at all.  */
3551   for (l = 1; l < 4; ++l)
3552     if (!(t->base_opcode >> (8 * l)))
3553       break;
3554
3555   i.opcode_length = l;
3556 }
3557
3558 /* Build the VEX prefix.  */
3559
3560 static void
3561 build_vex_prefix (const insn_template *t)
3562 {
3563   unsigned int register_specifier;
3564   unsigned int vector_length;
3565   unsigned int w;
3566
3567   /* Check register specifier.  */
3568   if (i.vex.register_specifier)
3569     {
3570       register_specifier =
3571         ~register_number (i.vex.register_specifier) & 0xf;
3572       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3573     }
3574   else
3575     register_specifier = 0xf;
3576
3577   /* Use 2-byte VEX prefix by swapping destination and source operand
3578      if there are more than 1 register operand.  */
3579   if (i.reg_operands > 1
3580       && i.vec_encoding != vex_encoding_vex3
3581       && i.dir_encoding == dir_encoding_default
3582       && i.operands == i.reg_operands
3583       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3584       && i.tm.opcode_modifier.opcodespace == SPACE_0F
3585       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3586       && i.rex == REX_B)
3587     {
3588       unsigned int xchg = i.operands - 1;
3589       union i386_op temp_op;
3590       i386_operand_type temp_type;
3591
3592       temp_type = i.types[xchg];
3593       i.types[xchg] = i.types[0];
3594       i.types[0] = temp_type;
3595       temp_op = i.op[xchg];
3596       i.op[xchg] = i.op[0];
3597       i.op[0] = temp_op;
3598
3599       gas_assert (i.rm.mode == 3);
3600
3601       i.rex = REX_R;
3602       xchg = i.rm.regmem;
3603       i.rm.regmem = i.rm.reg;
3604       i.rm.reg = xchg;
3605
3606       if (i.tm.opcode_modifier.d)
3607         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3608                             ? Opcode_ExtD : Opcode_SIMD_IntD;
3609       else /* Use the next insn.  */
3610         install_template (&t[1]);
3611     }
3612
3613   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3614      are no memory operands and at least 3 register ones.  */
3615   if (i.reg_operands >= 3
3616       && i.vec_encoding != vex_encoding_vex3
3617       && i.reg_operands == i.operands - i.imm_operands
3618       && i.tm.opcode_modifier.vex
3619       && i.tm.opcode_modifier.commutative
3620       && (i.tm.opcode_modifier.sse2avx || optimize > 1)
3621       && i.rex == REX_B
3622       && i.vex.register_specifier
3623       && !(i.vex.register_specifier->reg_flags & RegRex))
3624     {
3625       unsigned int xchg = i.operands - i.reg_operands;
3626       union i386_op temp_op;
3627       i386_operand_type temp_type;
3628
3629       gas_assert (i.tm.opcode_modifier.opcodespace == SPACE_0F);
3630       gas_assert (!i.tm.opcode_modifier.sae);
3631       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3632                                       &i.types[i.operands - 3]));
3633       gas_assert (i.rm.mode == 3);
3634
3635       temp_type = i.types[xchg];
3636       i.types[xchg] = i.types[xchg + 1];
3637       i.types[xchg + 1] = temp_type;
3638       temp_op = i.op[xchg];
3639       i.op[xchg] = i.op[xchg + 1];
3640       i.op[xchg + 1] = temp_op;
3641
3642       i.rex = 0;
3643       xchg = i.rm.regmem | 8;
3644       i.rm.regmem = ~register_specifier & 0xf;
3645       gas_assert (!(i.rm.regmem & 8));
3646       i.vex.register_specifier += xchg - i.rm.regmem;
3647       register_specifier = ~xchg & 0xf;
3648     }
3649
3650   if (i.tm.opcode_modifier.vex == VEXScalar)
3651     vector_length = avxscalar;
3652   else if (i.tm.opcode_modifier.vex == VEX256)
3653     vector_length = 1;
3654   else
3655     {
3656       unsigned int op;
3657
3658       /* Determine vector length from the last multi-length vector
3659          operand.  */
3660       vector_length = 0;
3661       for (op = t->operands; op--;)
3662         if (t->operand_types[op].bitfield.xmmword
3663             && t->operand_types[op].bitfield.ymmword
3664             && i.types[op].bitfield.ymmword)
3665           {
3666             vector_length = 1;
3667             break;
3668           }
3669     }
3670
3671   /* Check the REX.W bit and VEXW.  */
3672   if (i.tm.opcode_modifier.vexw == VEXWIG)
3673     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3674   else if (i.tm.opcode_modifier.vexw)
3675     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3676   else
3677     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3678
3679   /* Use 2-byte VEX prefix if possible.  */
3680   if (w == 0
3681       && i.vec_encoding != vex_encoding_vex3
3682       && i.tm.opcode_modifier.opcodespace == SPACE_0F
3683       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3684     {
3685       /* 2-byte VEX prefix.  */
3686       unsigned int r;
3687
3688       i.vex.length = 2;
3689       i.vex.bytes[0] = 0xc5;
3690
3691       /* Check the REX.R bit.  */
3692       r = (i.rex & REX_R) ? 0 : 1;
3693       i.vex.bytes[1] = (r << 7
3694                         | register_specifier << 3
3695                         | vector_length << 2
3696                         | i.tm.opcode_modifier.opcodeprefix);
3697     }
3698   else
3699     {
3700       /* 3-byte VEX prefix.  */
3701       i.vex.length = 3;
3702
3703       switch (i.tm.opcode_modifier.opcodespace)
3704         {
3705         case SPACE_0F:
3706         case SPACE_0F38:
3707         case SPACE_0F3A:
3708           i.vex.bytes[0] = 0xc4;
3709           break;
3710         case SPACE_XOP08:
3711         case SPACE_XOP09:
3712         case SPACE_XOP0A:
3713           i.vex.bytes[0] = 0x8f;
3714           break;
3715         default:
3716           abort ();
3717         }
3718
3719       /* The high 3 bits of the second VEX byte are 1's compliment
3720          of RXB bits from REX.  */
3721       i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace;
3722
3723       i.vex.bytes[2] = (w << 7
3724                         | register_specifier << 3
3725                         | vector_length << 2
3726                         | i.tm.opcode_modifier.opcodeprefix);
3727     }
3728 }
3729
3730 static INLINE bool
3731 is_evex_encoding (const insn_template *t)
3732 {
3733   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3734          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3735          || t->opcode_modifier.sae;
3736 }
3737
3738 static INLINE bool
3739 is_any_vex_encoding (const insn_template *t)
3740 {
3741   return t->opcode_modifier.vex || is_evex_encoding (t);
3742 }
3743
3744 static unsigned int
3745 get_broadcast_bytes (const insn_template *t, bool diag)
3746 {
3747   unsigned int op, bytes;
3748   const i386_operand_type *types;
3749
3750   if (i.broadcast.type)
3751     return i.broadcast.bytes = ((1 << (t->opcode_modifier.broadcast - 1))
3752                                 * i.broadcast.type);
3753
3754   gas_assert (intel_syntax);
3755
3756   for (op = 0; op < t->operands; ++op)
3757     if (t->operand_types[op].bitfield.baseindex)
3758       break;
3759
3760   gas_assert (op < t->operands);
3761
3762   if (t->opcode_modifier.evex
3763       && t->opcode_modifier.evex != EVEXDYN)
3764     switch (i.broadcast.bytes)
3765       {
3766       case 1:
3767         if (t->operand_types[op].bitfield.word)
3768           return 2;
3769       /* Fall through.  */
3770       case 2:
3771         if (t->operand_types[op].bitfield.dword)
3772           return 4;
3773       /* Fall through.  */
3774       case 4:
3775         if (t->operand_types[op].bitfield.qword)
3776           return 8;
3777       /* Fall through.  */
3778       case 8:
3779         if (t->operand_types[op].bitfield.xmmword)
3780           return 16;
3781         if (t->operand_types[op].bitfield.ymmword)
3782           return 32;
3783         if (t->operand_types[op].bitfield.zmmword)
3784           return 64;
3785       /* Fall through.  */
3786       default:
3787         abort ();
3788       }
3789
3790   gas_assert (op + 1 < t->operands);
3791
3792   if (t->operand_types[op + 1].bitfield.xmmword
3793       + t->operand_types[op + 1].bitfield.ymmword
3794       + t->operand_types[op + 1].bitfield.zmmword > 1)
3795     {
3796       types = &i.types[op + 1];
3797       diag = false;
3798     }
3799   else /* Ambiguous - guess with a preference to non-AVX512VL forms.  */
3800     types = &t->operand_types[op];
3801
3802   if (types->bitfield.zmmword)
3803     bytes = 64;
3804   else if (types->bitfield.ymmword)
3805     bytes = 32;
3806   else
3807     bytes = 16;
3808
3809   if (diag)
3810     as_warn (_("ambiguous broadcast for `%s', using %u-bit form"),
3811              t->name, bytes * 8);
3812
3813   return bytes;
3814 }
3815
3816 /* Build the EVEX prefix.  */
3817
3818 static void
3819 build_evex_prefix (void)
3820 {
3821   unsigned int register_specifier, w;
3822   rex_byte vrex_used = 0;
3823
3824   /* Check register specifier.  */
3825   if (i.vex.register_specifier)
3826     {
3827       gas_assert ((i.vrex & REX_X) == 0);
3828
3829       register_specifier = i.vex.register_specifier->reg_num;
3830       if ((i.vex.register_specifier->reg_flags & RegRex))
3831         register_specifier += 8;
3832       /* The upper 16 registers are encoded in the fourth byte of the
3833          EVEX prefix.  */
3834       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3835         i.vex.bytes[3] = 0x8;
3836       register_specifier = ~register_specifier & 0xf;
3837     }
3838   else
3839     {
3840       register_specifier = 0xf;
3841
3842       /* Encode upper 16 vector index register in the fourth byte of
3843          the EVEX prefix.  */
3844       if (!(i.vrex & REX_X))
3845         i.vex.bytes[3] = 0x8;
3846       else
3847         vrex_used |= REX_X;
3848     }
3849
3850   /* 4 byte EVEX prefix.  */
3851   i.vex.length = 4;
3852   i.vex.bytes[0] = 0x62;
3853
3854   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3855      bits from REX.  */
3856   gas_assert (i.tm.opcode_modifier.opcodespace >= SPACE_0F);
3857   gas_assert (i.tm.opcode_modifier.opcodespace <= SPACE_EVEXMAP6);
3858   i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace;
3859
3860   /* The fifth bit of the second EVEX byte is 1's compliment of the
3861      REX_R bit in VREX.  */
3862   if (!(i.vrex & REX_R))
3863     i.vex.bytes[1] |= 0x10;
3864   else
3865     vrex_used |= REX_R;
3866
3867   if ((i.reg_operands + i.imm_operands) == i.operands)
3868     {
3869       /* When all operands are registers, the REX_X bit in REX is not
3870          used.  We reuse it to encode the upper 16 registers, which is
3871          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3872          as 1's compliment.  */
3873       if ((i.vrex & REX_B))
3874         {
3875           vrex_used |= REX_B;
3876           i.vex.bytes[1] &= ~0x40;
3877         }
3878     }
3879
3880   /* EVEX instructions shouldn't need the REX prefix.  */
3881   i.vrex &= ~vrex_used;
3882   gas_assert (i.vrex == 0);
3883
3884   /* Check the REX.W bit and VEXW.  */
3885   if (i.tm.opcode_modifier.vexw == VEXWIG)
3886     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
3887   else if (i.tm.opcode_modifier.vexw)
3888     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3889   else
3890     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
3891
3892   /* The third byte of the EVEX prefix.  */
3893   i.vex.bytes[2] = ((w << 7)
3894                     | (register_specifier << 3)
3895                     | 4 /* Encode the U bit.  */
3896                     | i.tm.opcode_modifier.opcodeprefix);
3897
3898   /* The fourth byte of the EVEX prefix.  */
3899   /* The zeroing-masking bit.  */
3900   if (i.mask.reg && i.mask.zeroing)
3901     i.vex.bytes[3] |= 0x80;
3902
3903   /* Don't always set the broadcast bit if there is no RC.  */
3904   if (i.rounding.type == rc_none)
3905     {
3906       /* Encode the vector length.  */
3907       unsigned int vec_length;
3908
3909       if (!i.tm.opcode_modifier.evex
3910           || i.tm.opcode_modifier.evex == EVEXDYN)
3911         {
3912           unsigned int op;
3913
3914           /* Determine vector length from the last multi-length vector
3915              operand.  */
3916           for (op = i.operands; op--;)
3917             if (i.tm.operand_types[op].bitfield.xmmword
3918                 + i.tm.operand_types[op].bitfield.ymmword
3919                 + i.tm.operand_types[op].bitfield.zmmword > 1)
3920               {
3921                 if (i.types[op].bitfield.zmmword)
3922                   {
3923                     i.tm.opcode_modifier.evex = EVEX512;
3924                     break;
3925                   }
3926                 else if (i.types[op].bitfield.ymmword)
3927                   {
3928                     i.tm.opcode_modifier.evex = EVEX256;
3929                     break;
3930                   }
3931                 else if (i.types[op].bitfield.xmmword)
3932                   {
3933                     i.tm.opcode_modifier.evex = EVEX128;
3934                     break;
3935                   }
3936                 else if (i.broadcast.bytes && op == i.broadcast.operand)
3937                   {
3938                     switch (get_broadcast_bytes (&i.tm, true))
3939                       {
3940                         case 64:
3941                           i.tm.opcode_modifier.evex = EVEX512;
3942                           break;
3943                         case 32:
3944                           i.tm.opcode_modifier.evex = EVEX256;
3945                           break;
3946                         case 16:
3947                           i.tm.opcode_modifier.evex = EVEX128;
3948                           break;
3949                         default:
3950                           abort ();
3951                       }
3952                     break;
3953                   }
3954               }
3955
3956           if (op >= MAX_OPERANDS)
3957             abort ();
3958         }
3959
3960       switch (i.tm.opcode_modifier.evex)
3961         {
3962         case EVEXLIG: /* LL' is ignored */
3963           vec_length = evexlig << 5;
3964           break;
3965         case EVEX128:
3966           vec_length = 0 << 5;
3967           break;
3968         case EVEX256:
3969           vec_length = 1 << 5;
3970           break;
3971         case EVEX512:
3972           vec_length = 2 << 5;
3973           break;
3974         default:
3975           abort ();
3976           break;
3977         }
3978       i.vex.bytes[3] |= vec_length;
3979       /* Encode the broadcast bit.  */
3980       if (i.broadcast.bytes)
3981         i.vex.bytes[3] |= 0x10;
3982     }
3983   else if (i.rounding.type != saeonly)
3984     i.vex.bytes[3] |= 0x10 | (i.rounding.type << 5);
3985   else
3986     i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
3987
3988   if (i.mask.reg)
3989     i.vex.bytes[3] |= i.mask.reg->reg_num;
3990 }
3991
3992 static void
3993 process_immext (void)
3994 {
3995   expressionS *exp;
3996
3997   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
3998      which is coded in the same place as an 8-bit immediate field
3999      would be.  Here we fake an 8-bit immediate operand from the
4000      opcode suffix stored in tm.extension_opcode.
4001
4002      AVX instructions also use this encoding, for some of
4003      3 argument instructions.  */
4004
4005   gas_assert (i.imm_operands <= 1
4006               && (i.operands <= 2
4007                   || (is_any_vex_encoding (&i.tm)
4008                       && i.operands <= 4)));
4009
4010   exp = &im_expressions[i.imm_operands++];
4011   i.op[i.operands].imms = exp;
4012   i.types[i.operands].bitfield.imm8 = 1;
4013   i.operands++;
4014   exp->X_op = O_constant;
4015   exp->X_add_number = i.tm.extension_opcode;
4016   i.tm.extension_opcode = None;
4017 }
4018
4019
4020 static int
4021 check_hle (void)
4022 {
4023   switch (i.tm.opcode_modifier.prefixok)
4024     {
4025     default:
4026       abort ();
4027     case PrefixLock:
4028     case PrefixNone:
4029     case PrefixNoTrack:
4030     case PrefixRep:
4031       as_bad (_("invalid instruction `%s' after `%s'"),
4032               i.tm.name, i.hle_prefix);
4033       return 0;
4034     case PrefixHLELock:
4035       if (i.prefix[LOCK_PREFIX])
4036         return 1;
4037       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4038       return 0;
4039     case PrefixHLEAny:
4040       return 1;
4041     case PrefixHLERelease:
4042       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4043         {
4044           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4045                   i.tm.name);
4046           return 0;
4047         }
4048       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4049         {
4050           as_bad (_("memory destination needed for instruction `%s'"
4051                     " after `xrelease'"), i.tm.name);
4052           return 0;
4053         }
4054       return 1;
4055     }
4056 }
4057
4058 /* Encode aligned vector move as unaligned vector move.  */
4059
4060 static void
4061 encode_with_unaligned_vector_move (void)
4062 {
4063   switch (i.tm.base_opcode)
4064     {
4065     case 0x28:  /* Load instructions.  */
4066     case 0x29:  /* Store instructions.  */
4067       /* movaps/movapd/vmovaps/vmovapd.  */
4068       if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4069           && i.tm.opcode_modifier.opcodeprefix <= PREFIX_0X66)
4070         i.tm.base_opcode = 0x10 | (i.tm.base_opcode & 1);
4071       break;
4072     case 0x6f:  /* Load instructions.  */
4073     case 0x7f:  /* Store instructions.  */
4074       /* movdqa/vmovdqa/vmovdqa64/vmovdqa32. */
4075       if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4076           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66)
4077         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4078       break;
4079     default:
4080       break;
4081     }
4082 }
4083
4084 /* Try the shortest encoding by shortening operand size.  */
4085
4086 static void
4087 optimize_encoding (void)
4088 {
4089   unsigned int j;
4090
4091   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
4092       && i.tm.base_opcode == 0x8d)
4093     {
4094       /* Optimize: -O:
4095            lea symbol, %rN    -> mov $symbol, %rN
4096            lea (%rM), %rN     -> mov %rM, %rN
4097            lea (,%rM,1), %rN  -> mov %rM, %rN
4098
4099            and in 32-bit mode for 16-bit addressing
4100
4101            lea (%rM), %rN     -> movzx %rM, %rN
4102
4103            and in 64-bit mode zap 32-bit addressing in favor of using a
4104            32-bit (or less) destination.
4105        */
4106       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4107         {
4108           if (!i.op[1].regs->reg_type.bitfield.word)
4109             i.tm.opcode_modifier.size = SIZE32;
4110           i.prefix[ADDR_PREFIX] = 0;
4111         }
4112
4113       if (!i.index_reg && !i.base_reg)
4114         {
4115           /* Handle:
4116                lea symbol, %rN    -> mov $symbol, %rN
4117            */
4118           if (flag_code == CODE_64BIT)
4119             {
4120               /* Don't transform a relocation to a 16-bit one.  */
4121               if (i.op[0].disps
4122                   && i.op[0].disps->X_op != O_constant
4123                   && i.op[1].regs->reg_type.bitfield.word)
4124                 return;
4125
4126               if (!i.op[1].regs->reg_type.bitfield.qword
4127                   || i.tm.opcode_modifier.size == SIZE32)
4128                 {
4129                   i.tm.base_opcode = 0xb8;
4130                   i.tm.opcode_modifier.modrm = 0;
4131                   if (!i.op[1].regs->reg_type.bitfield.word)
4132                     i.types[0].bitfield.imm32 = 1;
4133                   else
4134                     {
4135                       i.tm.opcode_modifier.size = SIZE16;
4136                       i.types[0].bitfield.imm16 = 1;
4137                     }
4138                 }
4139               else
4140                 {
4141                   /* Subject to further optimization below.  */
4142                   i.tm.base_opcode = 0xc7;
4143                   i.tm.extension_opcode = 0;
4144                   i.types[0].bitfield.imm32s = 1;
4145                   i.types[0].bitfield.baseindex = 0;
4146                 }
4147             }
4148           /* Outside of 64-bit mode address and operand sizes have to match if
4149              a relocation is involved, as otherwise we wouldn't (currently) or
4150              even couldn't express the relocation correctly.  */
4151           else if (i.op[0].disps
4152                    && i.op[0].disps->X_op != O_constant
4153                    && ((!i.prefix[ADDR_PREFIX])
4154                        != (flag_code == CODE_32BIT
4155                            ? i.op[1].regs->reg_type.bitfield.dword
4156                            : i.op[1].regs->reg_type.bitfield.word)))
4157             return;
4158           /* In 16-bit mode converting LEA with 16-bit addressing and a 32-bit
4159              destination is going to grow encoding size.  */
4160           else if (flag_code == CODE_16BIT
4161                    && (optimize <= 1 || optimize_for_space)
4162                    && !i.prefix[ADDR_PREFIX]
4163                    && i.op[1].regs->reg_type.bitfield.dword)
4164             return;
4165           else
4166             {
4167               i.tm.base_opcode = 0xb8;
4168               i.tm.opcode_modifier.modrm = 0;
4169               if (i.op[1].regs->reg_type.bitfield.dword)
4170                 i.types[0].bitfield.imm32 = 1;
4171               else
4172                 i.types[0].bitfield.imm16 = 1;
4173
4174               if (i.op[0].disps
4175                   && i.op[0].disps->X_op == O_constant
4176                   && i.op[1].regs->reg_type.bitfield.dword
4177                   /* NB: Add () to !i.prefix[ADDR_PREFIX] to silence
4178                      GCC 5. */
4179                   && (!i.prefix[ADDR_PREFIX]) != (flag_code == CODE_32BIT))
4180                 i.op[0].disps->X_add_number &= 0xffff;
4181             }
4182
4183           i.tm.operand_types[0] = i.types[0];
4184           i.imm_operands = 1;
4185           if (!i.op[0].imms)
4186             {
4187               i.op[0].imms = &im_expressions[0];
4188               i.op[0].imms->X_op = O_absent;
4189             }
4190         }
4191       else if (i.op[0].disps
4192                   && (i.op[0].disps->X_op != O_constant
4193                       || i.op[0].disps->X_add_number))
4194         return;
4195       else
4196         {
4197           /* Handle:
4198                lea (%rM), %rN     -> mov %rM, %rN
4199                lea (,%rM,1), %rN  -> mov %rM, %rN
4200                lea (%rM), %rN     -> movzx %rM, %rN
4201            */
4202           const reg_entry *addr_reg;
4203
4204           if (!i.index_reg && i.base_reg->reg_num != RegIP)
4205             addr_reg = i.base_reg;
4206           else if (!i.base_reg
4207                    && i.index_reg->reg_num != RegIZ
4208                    && !i.log2_scale_factor)
4209             addr_reg = i.index_reg;
4210           else
4211             return;
4212
4213           if (addr_reg->reg_type.bitfield.word
4214               && i.op[1].regs->reg_type.bitfield.dword)
4215             {
4216               if (flag_code != CODE_32BIT)
4217                 return;
4218               i.tm.opcode_modifier.opcodespace = SPACE_0F;
4219               i.tm.base_opcode = 0xb7;
4220             }
4221           else
4222             i.tm.base_opcode = 0x8b;
4223
4224           if (addr_reg->reg_type.bitfield.dword
4225               && i.op[1].regs->reg_type.bitfield.qword)
4226             i.tm.opcode_modifier.size = SIZE32;
4227
4228           i.op[0].regs = addr_reg;
4229           i.reg_operands = 2;
4230         }
4231
4232       i.mem_operands = 0;
4233       i.disp_operands = 0;
4234       i.prefix[ADDR_PREFIX] = 0;
4235       i.prefix[SEG_PREFIX] = 0;
4236       i.seg[0] = NULL;
4237     }
4238
4239   if (optimize_for_space
4240       && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4241       && i.reg_operands == 1
4242       && i.imm_operands == 1
4243       && !i.types[1].bitfield.byte
4244       && i.op[0].imms->X_op == O_constant
4245       && fits_in_imm7 (i.op[0].imms->X_add_number)
4246       && (i.tm.base_opcode == 0xa8
4247           || (i.tm.base_opcode == 0xf6
4248               && i.tm.extension_opcode == 0x0)))
4249     {
4250       /* Optimize: -Os:
4251            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4252        */
4253       unsigned int base_regnum = i.op[1].regs->reg_num;
4254       if (flag_code == CODE_64BIT || base_regnum < 4)
4255         {
4256           i.types[1].bitfield.byte = 1;
4257           /* Ignore the suffix.  */
4258           i.suffix = 0;
4259           /* Convert to byte registers.  */
4260           if (i.types[1].bitfield.word)
4261             j = 16;
4262           else if (i.types[1].bitfield.dword)
4263             j = 32;
4264           else
4265             j = 48;
4266           if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4267             j += 8;
4268           i.op[1].regs -= j;
4269         }
4270     }
4271   else if (flag_code == CODE_64BIT
4272            && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4273            && ((i.types[1].bitfield.qword
4274                 && i.reg_operands == 1
4275                 && i.imm_operands == 1
4276                 && i.op[0].imms->X_op == O_constant
4277                 && ((i.tm.base_opcode == 0xb8
4278                      && i.tm.extension_opcode == None
4279                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4280                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4281                         && ((i.tm.base_opcode == 0x24
4282                              || i.tm.base_opcode == 0xa8)
4283                             || (i.tm.base_opcode == 0x80
4284                                 && i.tm.extension_opcode == 0x4)
4285                             || ((i.tm.base_opcode == 0xf6
4286                                  || (i.tm.base_opcode | 1) == 0xc7)
4287                                 && i.tm.extension_opcode == 0x0)))
4288                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4289                         && i.tm.base_opcode == 0x83
4290                         && i.tm.extension_opcode == 0x4)))
4291                || (i.types[0].bitfield.qword
4292                    && ((i.reg_operands == 2
4293                         && i.op[0].regs == i.op[1].regs
4294                         && (i.tm.base_opcode == 0x30
4295                             || i.tm.base_opcode == 0x28))
4296                        || (i.reg_operands == 1
4297                            && i.operands == 1
4298                            && i.tm.base_opcode == 0x30)))))
4299     {
4300       /* Optimize: -O:
4301            andq $imm31, %r64   -> andl $imm31, %r32
4302            andq $imm7, %r64    -> andl $imm7, %r32
4303            testq $imm31, %r64  -> testl $imm31, %r32
4304            xorq %r64, %r64     -> xorl %r32, %r32
4305            subq %r64, %r64     -> subl %r32, %r32
4306            movq $imm31, %r64   -> movl $imm31, %r32
4307            movq $imm32, %r64   -> movl $imm32, %r32
4308         */
4309       i.tm.opcode_modifier.size = SIZE32;
4310       if (i.imm_operands)
4311         {
4312           i.types[0].bitfield.imm32 = 1;
4313           i.types[0].bitfield.imm32s = 0;
4314           i.types[0].bitfield.imm64 = 0;
4315         }
4316       else
4317         {
4318           i.types[0].bitfield.dword = 1;
4319           i.types[0].bitfield.qword = 0;
4320         }
4321       i.types[1].bitfield.dword = 1;
4322       i.types[1].bitfield.qword = 0;
4323       if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)
4324         {
4325           /* Handle
4326                movq $imm31, %r64   -> movl $imm31, %r32
4327                movq $imm32, %r64   -> movl $imm32, %r32
4328            */
4329           i.tm.operand_types[0].bitfield.imm32 = 1;
4330           i.tm.operand_types[0].bitfield.imm32s = 0;
4331           i.tm.operand_types[0].bitfield.imm64 = 0;
4332           if ((i.tm.base_opcode | 1) == 0xc7)
4333             {
4334               /* Handle
4335                    movq $imm31, %r64   -> movl $imm31, %r32
4336                */
4337               i.tm.base_opcode = 0xb8;
4338               i.tm.extension_opcode = None;
4339               i.tm.opcode_modifier.w = 0;
4340               i.tm.opcode_modifier.modrm = 0;
4341             }
4342         }
4343     }
4344   else if (optimize > 1
4345            && !optimize_for_space
4346            && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4347            && i.reg_operands == 2
4348            && i.op[0].regs == i.op[1].regs
4349            && ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8
4350                || (i.tm.base_opcode & ~(Opcode_D | 1)) == 0x20)
4351            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4352     {
4353       /* Optimize: -O2:
4354            andb %rN, %rN  -> testb %rN, %rN
4355            andw %rN, %rN  -> testw %rN, %rN
4356            andq %rN, %rN  -> testq %rN, %rN
4357            orb %rN, %rN   -> testb %rN, %rN
4358            orw %rN, %rN   -> testw %rN, %rN
4359            orq %rN, %rN   -> testq %rN, %rN
4360
4361            and outside of 64-bit mode
4362
4363            andl %rN, %rN  -> testl %rN, %rN
4364            orl %rN, %rN   -> testl %rN, %rN
4365        */
4366       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4367     }
4368   else if (i.reg_operands == 3
4369            && i.op[0].regs == i.op[1].regs
4370            && !i.types[2].bitfield.xmmword
4371            && (i.tm.opcode_modifier.vex
4372                || ((!i.mask.reg || i.mask.zeroing)
4373                    && is_evex_encoding (&i.tm)
4374                    && (i.vec_encoding != vex_encoding_evex
4375                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4376                        || i.tm.cpu_flags.bitfield.cpuavx512vl
4377                        || (i.tm.operand_types[2].bitfield.zmmword
4378                            && i.types[2].bitfield.ymmword))))
4379            && i.tm.opcode_modifier.opcodespace == SPACE_0F
4380            && ((i.tm.base_opcode | 2) == 0x57
4381                || i.tm.base_opcode == 0xdf
4382                || i.tm.base_opcode == 0xef
4383                || (i.tm.base_opcode | 3) == 0xfb
4384                || i.tm.base_opcode == 0x42
4385                || i.tm.base_opcode == 0x47))
4386     {
4387       /* Optimize: -O1:
4388            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4389            vpsubq and vpsubw:
4390              EVEX VOP %zmmM, %zmmM, %zmmN
4391                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4392                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4393              EVEX VOP %ymmM, %ymmM, %ymmN
4394                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4395                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4396              VEX VOP %ymmM, %ymmM, %ymmN
4397                -> VEX VOP %xmmM, %xmmM, %xmmN
4398            VOP, one of vpandn and vpxor:
4399              VEX VOP %ymmM, %ymmM, %ymmN
4400                -> VEX VOP %xmmM, %xmmM, %xmmN
4401            VOP, one of vpandnd and vpandnq:
4402              EVEX VOP %zmmM, %zmmM, %zmmN
4403                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4404                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4405              EVEX VOP %ymmM, %ymmM, %ymmN
4406                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4407                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4408            VOP, one of vpxord and vpxorq:
4409              EVEX VOP %zmmM, %zmmM, %zmmN
4410                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4411                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4412              EVEX VOP %ymmM, %ymmM, %ymmN
4413                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4414                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4415            VOP, one of kxord and kxorq:
4416              VEX VOP %kM, %kM, %kN
4417                -> VEX kxorw %kM, %kM, %kN
4418            VOP, one of kandnd and kandnq:
4419              VEX VOP %kM, %kM, %kN
4420                -> VEX kandnw %kM, %kM, %kN
4421        */
4422       if (is_evex_encoding (&i.tm))
4423         {
4424           if (i.vec_encoding != vex_encoding_evex)
4425             {
4426               i.tm.opcode_modifier.vex = VEX128;
4427               i.tm.opcode_modifier.vexw = VEXW0;
4428               i.tm.opcode_modifier.evex = 0;
4429             }
4430           else if (optimize > 1)
4431             i.tm.opcode_modifier.evex = EVEX128;
4432           else
4433             return;
4434         }
4435       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4436         {
4437           i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
4438           i.tm.opcode_modifier.vexw = VEXW0;
4439         }
4440       else
4441         i.tm.opcode_modifier.vex = VEX128;
4442
4443       if (i.tm.opcode_modifier.vex)
4444         for (j = 0; j < 3; j++)
4445           {
4446             i.types[j].bitfield.xmmword = 1;
4447             i.types[j].bitfield.ymmword = 0;
4448           }
4449     }
4450   else if (i.vec_encoding != vex_encoding_evex
4451            && !i.types[0].bitfield.zmmword
4452            && !i.types[1].bitfield.zmmword
4453            && !i.mask.reg
4454            && !i.broadcast.bytes
4455            && is_evex_encoding (&i.tm)
4456            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4457                || (i.tm.base_opcode & ~4) == 0xdb
4458                || (i.tm.base_opcode & ~4) == 0xeb)
4459            && i.tm.extension_opcode == None)
4460     {
4461       /* Optimize: -O1:
4462            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4463            vmovdqu32 and vmovdqu64:
4464              EVEX VOP %xmmM, %xmmN
4465                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4466              EVEX VOP %ymmM, %ymmN
4467                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4468              EVEX VOP %xmmM, mem
4469                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4470              EVEX VOP %ymmM, mem
4471                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4472              EVEX VOP mem, %xmmN
4473                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4474              EVEX VOP mem, %ymmN
4475                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4476            VOP, one of vpand, vpandn, vpor, vpxor:
4477              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4478                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4479              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4480                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4481              EVEX VOP{d,q} mem, %xmmM, %xmmN
4482                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4483              EVEX VOP{d,q} mem, %ymmM, %ymmN
4484                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4485        */
4486       for (j = 0; j < i.operands; j++)
4487         if (operand_type_check (i.types[j], disp)
4488             && i.op[j].disps->X_op == O_constant)
4489           {
4490             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4491                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4492                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4493             int evex_disp8, vex_disp8;
4494             unsigned int memshift = i.memshift;
4495             offsetT n = i.op[j].disps->X_add_number;
4496
4497             evex_disp8 = fits_in_disp8 (n);
4498             i.memshift = 0;
4499             vex_disp8 = fits_in_disp8 (n);
4500             if (evex_disp8 != vex_disp8)
4501               {
4502                 i.memshift = memshift;
4503                 return;
4504               }
4505
4506             i.types[j].bitfield.disp8 = vex_disp8;
4507             break;
4508           }
4509       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4510           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
4511         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4512       i.tm.opcode_modifier.vex
4513         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4514       i.tm.opcode_modifier.vexw = VEXW0;
4515       /* VPAND, VPOR, and VPXOR are commutative.  */
4516       if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
4517         i.tm.opcode_modifier.commutative = 1;
4518       i.tm.opcode_modifier.evex = 0;
4519       i.tm.opcode_modifier.masking = 0;
4520       i.tm.opcode_modifier.broadcast = 0;
4521       i.tm.opcode_modifier.disp8memshift = 0;
4522       i.memshift = 0;
4523       if (j < i.operands)
4524         i.types[j].bitfield.disp8
4525           = fits_in_disp8 (i.op[j].disps->X_add_number);
4526     }
4527 }
4528
4529 /* Return non-zero for load instruction.  */
4530
4531 static int
4532 load_insn_p (void)
4533 {
4534   unsigned int dest;
4535   int any_vex_p = is_any_vex_encoding (&i.tm);
4536   unsigned int base_opcode = i.tm.base_opcode | 1;
4537
4538   if (!any_vex_p)
4539     {
4540       /* Anysize insns: lea, invlpg, clflush, prefetch*, bndmk, bndcl, bndcu,
4541          bndcn, bndstx, bndldx, clflushopt, clwb, cldemote.  */
4542       if (i.tm.opcode_modifier.operandconstraint == ANY_SIZE)
4543         return 0;
4544
4545       /* pop.   */
4546       if (strcmp (i.tm.name, "pop") == 0)
4547         return 1;
4548     }
4549
4550   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE)
4551     {
4552       /* popf, popa.   */
4553       if (i.tm.base_opcode == 0x9d
4554           || i.tm.base_opcode == 0x61)
4555         return 1;
4556
4557       /* movs, cmps, lods, scas.  */
4558       if ((i.tm.base_opcode | 0xb) == 0xaf)
4559         return 1;
4560
4561       /* outs, xlatb.  */
4562       if (base_opcode == 0x6f
4563           || i.tm.base_opcode == 0xd7)
4564         return 1;
4565       /* NB: For AMD-specific insns with implicit memory operands,
4566          they're intentionally not covered.  */
4567     }
4568
4569   /* No memory operand.  */
4570   if (!i.mem_operands)
4571     return 0;
4572
4573   if (any_vex_p)
4574     {
4575       /* vldmxcsr.  */
4576       if (i.tm.base_opcode == 0xae
4577           && i.tm.opcode_modifier.vex
4578           && i.tm.opcode_modifier.opcodespace == SPACE_0F
4579           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4580           && i.tm.extension_opcode == 2)
4581         return 1;
4582     }
4583   else if (i.tm.opcode_modifier.opcodespace == SPACE_BASE)
4584     {
4585       /* test, not, neg, mul, imul, div, idiv.  */
4586       if ((i.tm.base_opcode == 0xf6 || i.tm.base_opcode == 0xf7)
4587           && i.tm.extension_opcode != 1)
4588         return 1;
4589
4590       /* inc, dec.  */
4591       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
4592         return 1;
4593
4594       /* add, or, adc, sbb, and, sub, xor, cmp.  */
4595       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
4596         return 1;
4597
4598       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
4599       if ((base_opcode == 0xc1
4600            || (i.tm.base_opcode >= 0xd0 && i.tm.base_opcode <= 0xd3))
4601           && i.tm.extension_opcode != 6)
4602         return 1;
4603
4604       /* Check for x87 instructions.  */
4605       if (base_opcode >= 0xd8 && base_opcode <= 0xdf)
4606         {
4607           /* Skip fst, fstp, fstenv, fstcw.  */
4608           if (i.tm.base_opcode == 0xd9
4609               && (i.tm.extension_opcode == 2
4610                   || i.tm.extension_opcode == 3
4611                   || i.tm.extension_opcode == 6
4612                   || i.tm.extension_opcode == 7))
4613             return 0;
4614
4615           /* Skip fisttp, fist, fistp, fstp.  */
4616           if (i.tm.base_opcode == 0xdb
4617               && (i.tm.extension_opcode == 1
4618                   || i.tm.extension_opcode == 2
4619                   || i.tm.extension_opcode == 3
4620                   || i.tm.extension_opcode == 7))
4621             return 0;
4622
4623           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
4624           if (i.tm.base_opcode == 0xdd
4625               && (i.tm.extension_opcode == 1
4626                   || i.tm.extension_opcode == 2
4627                   || i.tm.extension_opcode == 3
4628                   || i.tm.extension_opcode == 6
4629                   || i.tm.extension_opcode == 7))
4630             return 0;
4631
4632           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
4633           if (i.tm.base_opcode == 0xdf
4634               && (i.tm.extension_opcode == 1
4635                   || i.tm.extension_opcode == 2
4636                   || i.tm.extension_opcode == 3
4637                   || i.tm.extension_opcode == 6
4638                   || i.tm.extension_opcode == 7))
4639             return 0;
4640
4641           return 1;
4642         }
4643     }
4644   else if (i.tm.opcode_modifier.opcodespace == SPACE_0F)
4645     {
4646       /* bt, bts, btr, btc.  */
4647       if (i.tm.base_opcode == 0xba
4648           && (i.tm.extension_opcode >= 4 && i.tm.extension_opcode <= 7))
4649         return 1;
4650
4651       /* cmpxchg8b, cmpxchg16b, xrstors, vmptrld.  */
4652       if (i.tm.base_opcode == 0xc7
4653           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4654           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3
4655               || i.tm.extension_opcode == 6))
4656         return 1;
4657
4658       /* fxrstor, ldmxcsr, xrstor.  */
4659       if (i.tm.base_opcode == 0xae
4660           && (i.tm.extension_opcode == 1
4661               || i.tm.extension_opcode == 2
4662               || i.tm.extension_opcode == 5))
4663         return 1;
4664
4665       /* lgdt, lidt, lmsw.  */
4666       if (i.tm.base_opcode == 0x01
4667           && (i.tm.extension_opcode == 2
4668               || i.tm.extension_opcode == 3
4669               || i.tm.extension_opcode == 6))
4670         return 1;
4671     }
4672
4673   dest = i.operands - 1;
4674
4675   /* Check fake imm8 operand and 3 source operands.  */
4676   if ((i.tm.opcode_modifier.immext
4677        || i.tm.opcode_modifier.vexsources == VEX3SOURCES)
4678       && i.types[dest].bitfield.imm8)
4679     dest--;
4680
4681   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg.  */
4682   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
4683       && (base_opcode == 0x1
4684           || base_opcode == 0x9
4685           || base_opcode == 0x11
4686           || base_opcode == 0x19
4687           || base_opcode == 0x21
4688           || base_opcode == 0x29
4689           || base_opcode == 0x31
4690           || base_opcode == 0x39
4691           || (base_opcode | 2) == 0x87))
4692     return 1;
4693
4694   /* xadd.  */
4695   if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4696       && base_opcode == 0xc1)
4697     return 1;
4698
4699   /* Check for load instruction.  */
4700   return (i.types[dest].bitfield.class != ClassNone
4701           || i.types[dest].bitfield.instance == Accum);
4702 }
4703
4704 /* Output lfence, 0xfaee8, after instruction.  */
4705
4706 static void
4707 insert_lfence_after (void)
4708 {
4709   if (lfence_after_load && load_insn_p ())
4710     {
4711       /* There are also two REP string instructions that require
4712          special treatment. Specifically, the compare string (CMPS)
4713          and scan string (SCAS) instructions set EFLAGS in a manner
4714          that depends on the data being compared/scanned. When used
4715          with a REP prefix, the number of iterations may therefore
4716          vary depending on this data. If the data is a program secret
4717          chosen by the adversary using an LVI method,
4718          then this data-dependent behavior may leak some aspect
4719          of the secret.  */
4720       if (((i.tm.base_opcode | 0x1) == 0xa7
4721            || (i.tm.base_opcode | 0x1) == 0xaf)
4722           && i.prefix[REP_PREFIX])
4723         {
4724             as_warn (_("`%s` changes flags which would affect control flow behavior"),
4725                      i.tm.name);
4726         }
4727       char *p = frag_more (3);
4728       *p++ = 0xf;
4729       *p++ = 0xae;
4730       *p = 0xe8;
4731     }
4732 }
4733
4734 /* Output lfence, 0xfaee8, before instruction.  */
4735
4736 static void
4737 insert_lfence_before (void)
4738 {
4739   char *p;
4740
4741   if (i.tm.opcode_modifier.opcodespace != SPACE_BASE)
4742     return;
4743
4744   if (i.tm.base_opcode == 0xff
4745       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
4746     {
4747       /* Insert lfence before indirect branch if needed.  */
4748
4749       if (lfence_before_indirect_branch == lfence_branch_none)
4750         return;
4751
4752       if (i.operands != 1)
4753         abort ();
4754
4755       if (i.reg_operands == 1)
4756         {
4757           /* Indirect branch via register.  Don't insert lfence with
4758              -mlfence-after-load=yes.  */
4759           if (lfence_after_load
4760               || lfence_before_indirect_branch == lfence_branch_memory)
4761             return;
4762         }
4763       else if (i.mem_operands == 1
4764                && lfence_before_indirect_branch != lfence_branch_register)
4765         {
4766           as_warn (_("indirect `%s` with memory operand should be avoided"),
4767                    i.tm.name);
4768           return;
4769         }
4770       else
4771         return;
4772
4773       if (last_insn.kind != last_insn_other
4774           && last_insn.seg == now_seg)
4775         {
4776           as_warn_where (last_insn.file, last_insn.line,
4777                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
4778                          last_insn.name, i.tm.name);
4779           return;
4780         }
4781
4782       p = frag_more (3);
4783       *p++ = 0xf;
4784       *p++ = 0xae;
4785       *p = 0xe8;
4786       return;
4787     }
4788
4789   /* Output or/not/shl and lfence before near ret.  */
4790   if (lfence_before_ret != lfence_before_ret_none
4791       && (i.tm.base_opcode == 0xc2
4792           || i.tm.base_opcode == 0xc3))
4793     {
4794       if (last_insn.kind != last_insn_other
4795           && last_insn.seg == now_seg)
4796         {
4797           as_warn_where (last_insn.file, last_insn.line,
4798                          _("`%s` skips -mlfence-before-ret on `%s`"),
4799                          last_insn.name, i.tm.name);
4800           return;
4801         }
4802
4803       /* Near ret ingore operand size override under CPU64.  */
4804       char prefix = flag_code == CODE_64BIT
4805                     ? 0x48
4806                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
4807
4808       if (lfence_before_ret == lfence_before_ret_not)
4809         {
4810           /* not: 0xf71424, may add prefix
4811              for operand size override or 64-bit code.  */
4812           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
4813           if (prefix)
4814             *p++ = prefix;
4815           *p++ = 0xf7;
4816           *p++ = 0x14;
4817           *p++ = 0x24;
4818           if (prefix)
4819             *p++ = prefix;
4820           *p++ = 0xf7;
4821           *p++ = 0x14;
4822           *p++ = 0x24;
4823         }
4824       else
4825         {
4826           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
4827           if (prefix)
4828             *p++ = prefix;
4829           if (lfence_before_ret == lfence_before_ret_or)
4830             {
4831               /* or: 0x830c2400, may add prefix
4832                  for operand size override or 64-bit code.  */
4833               *p++ = 0x83;
4834               *p++ = 0x0c;
4835             }
4836           else
4837             {
4838               /* shl: 0xc1242400, may add prefix
4839                  for operand size override or 64-bit code.  */
4840               *p++ = 0xc1;
4841               *p++ = 0x24;
4842             }
4843
4844           *p++ = 0x24;
4845           *p++ = 0x0;
4846         }
4847
4848       *p++ = 0xf;
4849       *p++ = 0xae;
4850       *p = 0xe8;
4851     }
4852 }
4853
4854 /* Helper for md_assemble() to decide whether to prepare for a possible 2nd
4855    parsing pass. Instead of introducing a rarely use new insn attribute this
4856    utilizes a common pattern between affected templates. It is deemed
4857    acceptable that this will lead to unnecessary pass 2 preparations in a
4858    limited set of cases.  */
4859 static INLINE bool may_need_pass2 (const insn_template *t)
4860 {
4861   return t->opcode_modifier.sse2avx
4862          /* Note that all SSE2AVX templates have at least one operand.  */
4863          ? t->operand_types[t->operands - 1].bitfield.class == RegSIMD
4864          : (t->opcode_modifier.opcodespace == SPACE_0F
4865             && (t->base_opcode | 1) == 0xbf)
4866            || (t->opcode_modifier.opcodespace == SPACE_BASE
4867                && t->base_opcode == 0x63);
4868 }
4869
4870 /* This is the guts of the machine-dependent assembler.  LINE points to a
4871    machine dependent instruction.  This function is supposed to emit
4872    the frags/bytes it assembles to.  */
4873
4874 void
4875 md_assemble (char *line)
4876 {
4877   unsigned int j;
4878   char mnemonic[MAX_MNEM_SIZE], mnem_suffix = 0, *copy = NULL;
4879   const char *end, *pass1_mnem = NULL;
4880   enum i386_error pass1_err = 0;
4881   const insn_template *t;
4882
4883   /* Initialize globals.  */
4884   current_templates = NULL;
4885  retry:
4886   memset (&i, '\0', sizeof (i));
4887   i.rounding.type = rc_none;
4888   for (j = 0; j < MAX_OPERANDS; j++)
4889     i.reloc[j] = NO_RELOC;
4890   memset (disp_expressions, '\0', sizeof (disp_expressions));
4891   memset (im_expressions, '\0', sizeof (im_expressions));
4892   save_stack_p = save_stack;
4893
4894   /* First parse an instruction mnemonic & call i386_operand for the operands.
4895      We assume that the scrubber has arranged it so that line[0] is the valid
4896      start of a (possibly prefixed) mnemonic.  */
4897
4898   end = parse_insn (line, mnemonic);
4899   if (end == NULL)
4900     {
4901       if (pass1_mnem != NULL)
4902         goto match_error;
4903       if (i.error != no_error)
4904         {
4905           gas_assert (current_templates != NULL);
4906           if (may_need_pass2 (current_templates->start) && !i.suffix)
4907             goto no_match;
4908           /* No point in trying a 2nd pass - it'll only find the same suffix
4909              again.  */
4910           mnem_suffix = i.suffix;
4911           goto match_error;
4912         }
4913       return;
4914     }
4915   if (may_need_pass2 (current_templates->start))
4916     {
4917       /* Make a copy of the full line in case we need to retry.  */
4918       copy = xstrdup (line);
4919     }
4920   line += end - line;
4921   mnem_suffix = i.suffix;
4922
4923   line = parse_operands (line, mnemonic);
4924   this_operand = -1;
4925   if (line == NULL)
4926     {
4927       free (copy);
4928       return;
4929     }
4930
4931   /* Now we've parsed the mnemonic into a set of templates, and have the
4932      operands at hand.  */
4933
4934   /* All Intel opcodes have reversed operands except for "bound", "enter",
4935      "invlpg*", "monitor*", "mwait*", "tpause", "umwait", "pvalidate",
4936      "rmpadjust", "rmpupdate", and "rmpquery".  We also don't reverse
4937      intersegment "jmp" and "call" instructions with 2 immediate operands so
4938      that the immediate segment precedes the offset consistently in Intel and
4939      AT&T modes.  */
4940   if (intel_syntax
4941       && i.operands > 1
4942       && (strcmp (mnemonic, "bound") != 0)
4943       && (strncmp (mnemonic, "invlpg", 6) != 0)
4944       && !startswith (mnemonic, "monitor")
4945       && !startswith (mnemonic, "mwait")
4946       && (strcmp (mnemonic, "pvalidate") != 0)
4947       && !startswith (mnemonic, "rmp")
4948       && (strcmp (mnemonic, "tpause") != 0)
4949       && (strcmp (mnemonic, "umwait") != 0)
4950       && !(i.operands == 2
4951            && operand_type_check (i.types[0], imm)
4952            && operand_type_check (i.types[1], imm)))
4953     swap_operands ();
4954
4955   /* The order of the immediates should be reversed
4956      for 2 immediates extrq and insertq instructions */
4957   if (i.imm_operands == 2
4958       && (strcmp (mnemonic, "extrq") == 0
4959           || strcmp (mnemonic, "insertq") == 0))
4960       swap_2_operands (0, 1);
4961
4962   if (i.imm_operands)
4963     optimize_imm ();
4964
4965   if (i.disp_operands && !want_disp32 (current_templates->start)
4966       && (!current_templates->start->opcode_modifier.jump
4967           || i.jumpabsolute || i.types[0].bitfield.baseindex))
4968     {
4969       for (j = 0; j < i.operands; ++j)
4970         {
4971           const expressionS *exp = i.op[j].disps;
4972
4973           if (!operand_type_check (i.types[j], disp))
4974             continue;
4975
4976           if (exp->X_op != O_constant)
4977             continue;
4978
4979           /* Since displacement is signed extended to 64bit, don't allow
4980              disp32 if it is out of range.  */
4981           if (fits_in_signed_long (exp->X_add_number))
4982             continue;
4983
4984           i.types[j].bitfield.disp32 = 0;
4985           if (i.types[j].bitfield.baseindex)
4986             {
4987               as_bad (_("0x%" PRIx64 " out of range of signed 32bit displacement"),
4988                       (uint64_t) exp->X_add_number);
4989               return;
4990             }
4991         }
4992     }
4993
4994   /* Don't optimize displacement for movabs since it only takes 64bit
4995      displacement.  */
4996   if (i.disp_operands
4997       && i.disp_encoding <= disp_encoding_8bit
4998       && (flag_code != CODE_64BIT
4999           || strcmp (mnemonic, "movabs") != 0))
5000     optimize_disp ();
5001
5002   /* Next, we find a template that matches the given insn,
5003      making sure the overlap of the given operands types is consistent
5004      with the template operand types.  */
5005
5006   if (!(t = match_template (mnem_suffix)))
5007     {
5008       const char *err_msg;
5009
5010       if (copy && !mnem_suffix)
5011         {
5012           line = copy;
5013           copy = NULL;
5014   no_match:
5015           pass1_err = i.error;
5016           pass1_mnem = current_templates->start->name;
5017           goto retry;
5018         }
5019
5020       /* If a non-/only-64bit template (group) was found in pass 1, and if
5021          _some_ template (group) was found in pass 2, squash pass 1's
5022          error.  */
5023       if (pass1_err == unsupported_64bit)
5024         pass1_mnem = NULL;
5025
5026   match_error:
5027       free (copy);
5028
5029       switch (pass1_mnem ? pass1_err : i.error)
5030         {
5031         default:
5032           abort ();
5033         case operand_size_mismatch:
5034           err_msg = _("operand size mismatch");
5035           break;
5036         case operand_type_mismatch:
5037           err_msg = _("operand type mismatch");
5038           break;
5039         case register_type_mismatch:
5040           err_msg = _("register type mismatch");
5041           break;
5042         case number_of_operands_mismatch:
5043           err_msg = _("number of operands mismatch");
5044           break;
5045         case invalid_instruction_suffix:
5046           err_msg = _("invalid instruction suffix");
5047           break;
5048         case bad_imm4:
5049           err_msg = _("constant doesn't fit in 4 bits");
5050           break;
5051         case unsupported_with_intel_mnemonic:
5052           err_msg = _("unsupported with Intel mnemonic");
5053           break;
5054         case unsupported_syntax:
5055           err_msg = _("unsupported syntax");
5056           break;
5057         case unsupported:
5058           as_bad (_("unsupported instruction `%s'"),
5059                   pass1_mnem ? pass1_mnem : current_templates->start->name);
5060           return;
5061         case unsupported_on_arch:
5062           as_bad (_("`%s' is not supported on `%s%s'"),
5063                   pass1_mnem ? pass1_mnem : current_templates->start->name,
5064                   cpu_arch_name ? cpu_arch_name : default_arch,
5065                   cpu_sub_arch_name ? cpu_sub_arch_name : "");
5066           return;
5067         case unsupported_64bit:
5068           if (ISLOWER (mnem_suffix))
5069             as_bad (_("`%s%c' is %s supported in 64-bit mode"),
5070                     pass1_mnem ? pass1_mnem : current_templates->start->name,
5071                     mnem_suffix,
5072                     flag_code == CODE_64BIT ? _("not") : _("only"));
5073           else
5074             as_bad (_("`%s' is %s supported in 64-bit mode"),
5075                     pass1_mnem ? pass1_mnem : current_templates->start->name,
5076                     flag_code == CODE_64BIT ? _("not") : _("only"));
5077           return;
5078         case invalid_sib_address:
5079           err_msg = _("invalid SIB address");
5080           break;
5081         case invalid_vsib_address:
5082           err_msg = _("invalid VSIB address");
5083           break;
5084         case invalid_vector_register_set:
5085           err_msg = _("mask, index, and destination registers must be distinct");
5086           break;
5087         case invalid_tmm_register_set:
5088           err_msg = _("all tmm registers must be distinct");
5089           break;
5090         case invalid_dest_and_src_register_set:
5091           err_msg = _("destination and source registers must be distinct");
5092           break;
5093         case unsupported_vector_index_register:
5094           err_msg = _("unsupported vector index register");
5095           break;
5096         case unsupported_broadcast:
5097           err_msg = _("unsupported broadcast");
5098           break;
5099         case broadcast_needed:
5100           err_msg = _("broadcast is needed for operand of such type");
5101           break;
5102         case unsupported_masking:
5103           err_msg = _("unsupported masking");
5104           break;
5105         case mask_not_on_destination:
5106           err_msg = _("mask not on destination operand");
5107           break;
5108         case no_default_mask:
5109           err_msg = _("default mask isn't allowed");
5110           break;
5111         case unsupported_rc_sae:
5112           err_msg = _("unsupported static rounding/sae");
5113           break;
5114         case invalid_register_operand:
5115           err_msg = _("invalid register operand");
5116           break;
5117         }
5118       as_bad (_("%s for `%s'"), err_msg,
5119               pass1_mnem ? pass1_mnem : current_templates->start->name);
5120       return;
5121     }
5122
5123   free (copy);
5124
5125   if (sse_check != check_none
5126       /* The opcode space check isn't strictly needed; it's there only to
5127          bypass the logic below when easily possible.  */
5128       && t->opcode_modifier.opcodespace >= SPACE_0F
5129       && t->opcode_modifier.opcodespace <= SPACE_0F3A
5130       && !i.tm.cpu_flags.bitfield.cpusse4a
5131       && !is_any_vex_encoding (t))
5132     {
5133       bool simd = false;
5134
5135       for (j = 0; j < t->operands; ++j)
5136         {
5137           if (t->operand_types[j].bitfield.class == RegMMX)
5138             break;
5139           if (t->operand_types[j].bitfield.class == RegSIMD)
5140             simd = true;
5141         }
5142
5143       if (j >= t->operands && simd)
5144         (sse_check == check_warning
5145          ? as_warn
5146          : as_bad) (_("SSE instruction `%s' is used"), i.tm.name);
5147     }
5148
5149   if (i.tm.opcode_modifier.fwait)
5150     if (!add_prefix (FWAIT_OPCODE))
5151       return;
5152
5153   /* Check if REP prefix is OK.  */
5154   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep)
5155     {
5156       as_bad (_("invalid instruction `%s' after `%s'"),
5157                 i.tm.name, i.rep_prefix);
5158       return;
5159     }
5160
5161   /* Check for lock without a lockable instruction.  Destination operand
5162      must be memory unless it is xchg (0x86).  */
5163   if (i.prefix[LOCK_PREFIX]
5164       && (i.tm.opcode_modifier.prefixok < PrefixLock
5165           || i.mem_operands == 0
5166           || (i.tm.base_opcode != 0x86
5167               && !(i.flags[i.operands - 1] & Operand_Mem))))
5168     {
5169       as_bad (_("expecting lockable instruction after `lock'"));
5170       return;
5171     }
5172
5173   if (is_any_vex_encoding (&i.tm)
5174       || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
5175       || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX)
5176     {
5177       /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
5178       if (i.prefix[DATA_PREFIX])
5179         {
5180           as_bad (_("data size prefix invalid with `%s'"), i.tm.name);
5181           return;
5182         }
5183
5184       /* Don't allow e.g. KMOV in TLS code sequences.  */
5185       for (j = i.imm_operands; j < i.operands; ++j)
5186         switch (i.reloc[j])
5187           {
5188           case BFD_RELOC_386_TLS_GOTIE:
5189           case BFD_RELOC_386_TLS_LE_32:
5190           case BFD_RELOC_X86_64_GOTTPOFF:
5191           case BFD_RELOC_X86_64_TLSLD:
5192             as_bad (_("TLS relocation cannot be used with `%s'"), i.tm.name);
5193             return;
5194           default:
5195             break;
5196           }
5197     }
5198
5199   /* Check if HLE prefix is OK.  */
5200   if (i.hle_prefix && !check_hle ())
5201     return;
5202
5203   /* Check BND prefix.  */
5204   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
5205     as_bad (_("expecting valid branch instruction after `bnd'"));
5206
5207   /* Check NOTRACK prefix.  */
5208   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
5209     as_bad (_("expecting indirect branch instruction after `notrack'"));
5210
5211   if (i.tm.cpu_flags.bitfield.cpumpx)
5212     {
5213       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
5214         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
5215       else if (flag_code != CODE_16BIT
5216                ? i.prefix[ADDR_PREFIX]
5217                : i.mem_operands && !i.prefix[ADDR_PREFIX])
5218         as_bad (_("16-bit address isn't allowed in MPX instructions"));
5219     }
5220
5221   /* Insert BND prefix.  */
5222   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
5223     {
5224       if (!i.prefix[BND_PREFIX])
5225         add_prefix (BND_PREFIX_OPCODE);
5226       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
5227         {
5228           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
5229           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
5230         }
5231     }
5232
5233   /* Check string instruction segment overrides.  */
5234   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
5235     {
5236       gas_assert (i.mem_operands);
5237       if (!check_string ())
5238         return;
5239       i.disp_operands = 0;
5240     }
5241
5242   /* The memory operand of (%dx) should be only used with input/output
5243      instructions (base opcodes: 0x6c, 0x6e, 0xec, 0xee).  */
5244   if (i.input_output_operand
5245       && ((i.tm.base_opcode | 0x82) != 0xee
5246           || i.tm.opcode_modifier.opcodespace != SPACE_BASE))
5247     {
5248       as_bad (_("input/output port address isn't allowed with `%s'"),
5249               i.tm.name);
5250       return;
5251     }
5252
5253   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
5254     optimize_encoding ();
5255
5256   if (use_unaligned_vector_move)
5257     encode_with_unaligned_vector_move ();
5258
5259   if (!process_suffix ())
5260     return;
5261
5262   /* Check if IP-relative addressing requirements can be satisfied.  */
5263   if (i.tm.cpu_flags.bitfield.cpuprefetchi
5264       && !(i.base_reg && i.base_reg->reg_num == RegIP))
5265     as_warn (_("'%s' only supports RIP-relative address"), i.tm.name);
5266
5267   /* Update operand types and check extended states.  */
5268   for (j = 0; j < i.operands; j++)
5269     {
5270       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
5271       switch (i.tm.operand_types[j].bitfield.class)
5272         {
5273         default:
5274           break;
5275         case RegMMX:
5276           i.xstate |= xstate_mmx;
5277           break;
5278         case RegMask:
5279           i.xstate |= xstate_mask;
5280           break;
5281         case RegSIMD:
5282           if (i.tm.operand_types[j].bitfield.tmmword)
5283             i.xstate |= xstate_tmm;
5284           else if (i.tm.operand_types[j].bitfield.zmmword)
5285             i.xstate |= xstate_zmm;
5286           else if (i.tm.operand_types[j].bitfield.ymmword)
5287             i.xstate |= xstate_ymm;
5288           else if (i.tm.operand_types[j].bitfield.xmmword)
5289             i.xstate |= xstate_xmm;
5290           break;
5291         }
5292     }
5293
5294   /* Make still unresolved immediate matches conform to size of immediate
5295      given in i.suffix.  */
5296   if (!finalize_imm ())
5297     return;
5298
5299   if (i.types[0].bitfield.imm1)
5300     i.imm_operands = 0; /* kludge for shift insns.  */
5301
5302   /* We only need to check those implicit registers for instructions
5303      with 3 operands or less.  */
5304   if (i.operands <= 3)
5305     for (j = 0; j < i.operands; j++)
5306       if (i.types[j].bitfield.instance != InstanceNone
5307           && !i.types[j].bitfield.xmmword)
5308         i.reg_operands--;
5309
5310   /* For insns with operands there are more diddles to do to the opcode.  */
5311   if (i.operands)
5312     {
5313       if (!process_operands ())
5314         return;
5315     }
5316   else if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
5317     {
5318       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
5319       as_warn (_("translating to `%sp'"), i.tm.name);
5320     }
5321
5322   if (is_any_vex_encoding (&i.tm))
5323     {
5324       if (!cpu_arch_flags.bitfield.cpui286)
5325         {
5326           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
5327                   i.tm.name);
5328           return;
5329         }
5330
5331       /* Check for explicit REX prefix.  */
5332       if (i.prefix[REX_PREFIX] || i.rex_encoding)
5333         {
5334           as_bad (_("REX prefix invalid with `%s'"), i.tm.name);
5335           return;
5336         }
5337
5338       if (i.tm.opcode_modifier.vex)
5339         build_vex_prefix (t);
5340       else
5341         build_evex_prefix ();
5342
5343       /* The individual REX.RXBW bits got consumed.  */
5344       i.rex &= REX_OPCODE;
5345     }
5346
5347   /* Handle conversion of 'int $3' --> special int3 insn.  XOP or FMA4
5348      instructions may define INT_OPCODE as well, so avoid this corner
5349      case for those instructions that use MODRM.  */
5350   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
5351       && i.tm.base_opcode == INT_OPCODE
5352       && !i.tm.opcode_modifier.modrm
5353       && i.op[0].imms->X_add_number == 3)
5354     {
5355       i.tm.base_opcode = INT3_OPCODE;
5356       i.imm_operands = 0;
5357     }
5358
5359   if ((i.tm.opcode_modifier.jump == JUMP
5360        || i.tm.opcode_modifier.jump == JUMP_BYTE
5361        || i.tm.opcode_modifier.jump == JUMP_DWORD)
5362       && i.op[0].disps->X_op == O_constant)
5363     {
5364       /* Convert "jmp constant" (and "call constant") to a jump (call) to
5365          the absolute address given by the constant.  Since ix86 jumps and
5366          calls are pc relative, we need to generate a reloc.  */
5367       i.op[0].disps->X_add_symbol = &abs_symbol;
5368       i.op[0].disps->X_op = O_symbol;
5369     }
5370
5371   /* For 8 bit registers we need an empty rex prefix.  Also if the
5372      instruction already has a prefix, we need to convert old
5373      registers to new ones.  */
5374
5375   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
5376        && (i.op[0].regs->reg_flags & RegRex64) != 0)
5377       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
5378           && (i.op[1].regs->reg_flags & RegRex64) != 0)
5379       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
5380            || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
5381           && i.rex != 0))
5382     {
5383       int x;
5384
5385       i.rex |= REX_OPCODE;
5386       for (x = 0; x < 2; x++)
5387         {
5388           /* Look for 8 bit operand that uses old registers.  */
5389           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
5390               && (i.op[x].regs->reg_flags & RegRex64) == 0)
5391             {
5392               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5393               /* In case it is "hi" register, give up.  */
5394               if (i.op[x].regs->reg_num > 3)
5395                 as_bad (_("can't encode register '%s%s' in an "
5396                           "instruction requiring REX prefix."),
5397                         register_prefix, i.op[x].regs->reg_name);
5398
5399               /* Otherwise it is equivalent to the extended register.
5400                  Since the encoding doesn't change this is merely
5401                  cosmetic cleanup for debug output.  */
5402
5403               i.op[x].regs = i.op[x].regs + 8;
5404             }
5405         }
5406     }
5407
5408   if (i.rex == 0 && i.rex_encoding)
5409     {
5410       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
5411          that uses legacy register.  If it is "hi" register, don't add
5412          the REX_OPCODE byte.  */
5413       int x;
5414       for (x = 0; x < 2; x++)
5415         if (i.types[x].bitfield.class == Reg
5416             && i.types[x].bitfield.byte
5417             && (i.op[x].regs->reg_flags & RegRex64) == 0
5418             && i.op[x].regs->reg_num > 3)
5419           {
5420             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5421             i.rex_encoding = false;
5422             break;
5423           }
5424
5425       if (i.rex_encoding)
5426         i.rex = REX_OPCODE;
5427     }
5428
5429   if (i.rex != 0)
5430     add_prefix (REX_OPCODE | i.rex);
5431
5432   insert_lfence_before ();
5433
5434   /* We are ready to output the insn.  */
5435   output_insn ();
5436
5437   insert_lfence_after ();
5438
5439   last_insn.seg = now_seg;
5440
5441   if (i.tm.opcode_modifier.isprefix)
5442     {
5443       last_insn.kind = last_insn_prefix;
5444       last_insn.name = i.tm.name;
5445       last_insn.file = as_where (&last_insn.line);
5446     }
5447   else
5448     last_insn.kind = last_insn_other;
5449 }
5450
5451 /* The Q suffix is generally valid only in 64-bit mode, with very few
5452    exceptions: fild, fistp, fisttp, and cmpxchg8b.  Note that for fild
5453    and fisttp only one of their two templates is matched below: That's
5454    sufficient since other relevant attributes are the same between both
5455    respective templates.  */
5456 static INLINE bool q_suffix_allowed(const insn_template *t)
5457 {
5458   return flag_code == CODE_64BIT
5459          || (t->opcode_modifier.opcodespace == SPACE_BASE
5460              && t->base_opcode == 0xdf
5461              && (t->extension_opcode & 1)) /* fild / fistp / fisttp */
5462          || (t->opcode_modifier.opcodespace == SPACE_0F
5463              && t->base_opcode == 0xc7
5464              && t->opcode_modifier.opcodeprefix == PREFIX_NONE
5465              && t->extension_opcode == 1) /* cmpxchg8b */;
5466 }
5467
5468 static const char *
5469 parse_insn (const char *line, char *mnemonic)
5470 {
5471   const char *l = line, *token_start = l;
5472   char *mnem_p;
5473   bool pass1 = !current_templates;
5474   int supported;
5475   const insn_template *t;
5476   char *dot_p = NULL;
5477
5478   while (1)
5479     {
5480       mnem_p = mnemonic;
5481       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
5482         {
5483           if (*mnem_p == '.')
5484             dot_p = mnem_p;
5485           mnem_p++;
5486           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5487             {
5488               as_bad (_("no such instruction: `%s'"), token_start);
5489               return NULL;
5490             }
5491           l++;
5492         }
5493       if (!is_space_char (*l)
5494           && *l != END_OF_INSN
5495           && (intel_syntax
5496               || (*l != PREFIX_SEPARATOR
5497                   && *l != ',')))
5498         {
5499           as_bad (_("invalid character %s in mnemonic"),
5500                   output_invalid (*l));
5501           return NULL;
5502         }
5503       if (token_start == l)
5504         {
5505           if (!intel_syntax && *l == PREFIX_SEPARATOR)
5506             as_bad (_("expecting prefix; got nothing"));
5507           else
5508             as_bad (_("expecting mnemonic; got nothing"));
5509           return NULL;
5510         }
5511
5512       /* Look up instruction (or prefix) via hash table.  */
5513       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5514
5515       if (*l != END_OF_INSN
5516           && (!is_space_char (*l) || l[1] != END_OF_INSN)
5517           && current_templates
5518           && current_templates->start->opcode_modifier.isprefix)
5519         {
5520           if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
5521             {
5522               as_bad ((flag_code != CODE_64BIT
5523                        ? _("`%s' is only supported in 64-bit mode")
5524                        : _("`%s' is not supported in 64-bit mode")),
5525                       current_templates->start->name);
5526               return NULL;
5527             }
5528           /* If we are in 16-bit mode, do not allow addr16 or data16.
5529              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
5530           if ((current_templates->start->opcode_modifier.size == SIZE16
5531                || current_templates->start->opcode_modifier.size == SIZE32)
5532               && flag_code != CODE_64BIT
5533               && ((current_templates->start->opcode_modifier.size == SIZE32)
5534                   ^ (flag_code == CODE_16BIT)))
5535             {
5536               as_bad (_("redundant %s prefix"),
5537                       current_templates->start->name);
5538               return NULL;
5539             }
5540
5541           if (current_templates->start->base_opcode == PSEUDO_PREFIX)
5542             {
5543               /* Handle pseudo prefixes.  */
5544               switch (current_templates->start->extension_opcode)
5545                 {
5546                 case Prefix_Disp8:
5547                   /* {disp8} */
5548                   i.disp_encoding = disp_encoding_8bit;
5549                   break;
5550                 case Prefix_Disp16:
5551                   /* {disp16} */
5552                   i.disp_encoding = disp_encoding_16bit;
5553                   break;
5554                 case Prefix_Disp32:
5555                   /* {disp32} */
5556                   i.disp_encoding = disp_encoding_32bit;
5557                   break;
5558                 case Prefix_Load:
5559                   /* {load} */
5560                   i.dir_encoding = dir_encoding_load;
5561                   break;
5562                 case Prefix_Store:
5563                   /* {store} */
5564                   i.dir_encoding = dir_encoding_store;
5565                   break;
5566                 case Prefix_VEX:
5567                   /* {vex} */
5568                   i.vec_encoding = vex_encoding_vex;
5569                   break;
5570                 case Prefix_VEX3:
5571                   /* {vex3} */
5572                   i.vec_encoding = vex_encoding_vex3;
5573                   break;
5574                 case Prefix_EVEX:
5575                   /* {evex} */
5576                   i.vec_encoding = vex_encoding_evex;
5577                   break;
5578                 case Prefix_REX:
5579                   /* {rex} */
5580                   i.rex_encoding = true;
5581                   break;
5582                 case Prefix_NoOptimize:
5583                   /* {nooptimize} */
5584                   i.no_optimize = true;
5585                   break;
5586                 default:
5587                   abort ();
5588                 }
5589             }
5590           else
5591             {
5592               /* Add prefix, checking for repeated prefixes.  */
5593               switch (add_prefix (current_templates->start->base_opcode))
5594                 {
5595                 case PREFIX_EXIST:
5596                   return NULL;
5597                 case PREFIX_DS:
5598                   if (current_templates->start->cpu_flags.bitfield.cpuibt)
5599                     i.notrack_prefix = current_templates->start->name;
5600                   break;
5601                 case PREFIX_REP:
5602                   if (current_templates->start->cpu_flags.bitfield.cpuhle)
5603                     i.hle_prefix = current_templates->start->name;
5604                   else if (current_templates->start->cpu_flags.bitfield.cpumpx)
5605                     i.bnd_prefix = current_templates->start->name;
5606                   else
5607                     i.rep_prefix = current_templates->start->name;
5608                   break;
5609                 default:
5610                   break;
5611                 }
5612             }
5613           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
5614           token_start = ++l;
5615         }
5616       else
5617         break;
5618     }
5619
5620   if (!current_templates)
5621     {
5622       /* Deprecated functionality (new code should use pseudo-prefixes instead):
5623          Check if we should swap operand or force 32bit displacement in
5624          encoding.  */
5625       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
5626         i.dir_encoding = dir_encoding_swap;
5627       else if (mnem_p - 3 == dot_p
5628                && dot_p[1] == 'd'
5629                && dot_p[2] == '8')
5630         i.disp_encoding = disp_encoding_8bit;
5631       else if (mnem_p - 4 == dot_p
5632                && dot_p[1] == 'd'
5633                && dot_p[2] == '3'
5634                && dot_p[3] == '2')
5635         i.disp_encoding = disp_encoding_32bit;
5636       else
5637         goto check_suffix;
5638       mnem_p = dot_p;
5639       *dot_p = '\0';
5640       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5641     }
5642
5643   if (!current_templates || !pass1)
5644     {
5645       current_templates = NULL;
5646
5647     check_suffix:
5648       if (mnem_p > mnemonic)
5649         {
5650           /* See if we can get a match by trimming off a suffix.  */
5651           switch (mnem_p[-1])
5652             {
5653             case WORD_MNEM_SUFFIX:
5654               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
5655                 i.suffix = SHORT_MNEM_SUFFIX;
5656               else
5657                 /* Fall through.  */
5658               case BYTE_MNEM_SUFFIX:
5659               case QWORD_MNEM_SUFFIX:
5660                 i.suffix = mnem_p[-1];
5661               mnem_p[-1] = '\0';
5662               current_templates
5663                 = (const templates *) str_hash_find (op_hash, mnemonic);
5664               break;
5665             case SHORT_MNEM_SUFFIX:
5666             case LONG_MNEM_SUFFIX:
5667               if (!intel_syntax)
5668                 {
5669                   i.suffix = mnem_p[-1];
5670                   mnem_p[-1] = '\0';
5671                   current_templates
5672                     = (const templates *) str_hash_find (op_hash, mnemonic);
5673                 }
5674               break;
5675
5676               /* Intel Syntax.  */
5677             case 'd':
5678               if (intel_syntax)
5679                 {
5680                   if (intel_float_operand (mnemonic) == 1)
5681                     i.suffix = SHORT_MNEM_SUFFIX;
5682                   else
5683                     i.suffix = LONG_MNEM_SUFFIX;
5684                   mnem_p[-1] = '\0';
5685                   current_templates
5686                     = (const templates *) str_hash_find (op_hash, mnemonic);
5687                 }
5688               /* For compatibility reasons accept MOVSD and CMPSD without
5689                  operands even in AT&T mode.  */
5690               else if (*l == END_OF_INSN
5691                        || (is_space_char (*l) && l[1] == END_OF_INSN))
5692                 {
5693                   mnem_p[-1] = '\0';
5694                   current_templates
5695                     = (const templates *) str_hash_find (op_hash, mnemonic);
5696                   if (current_templates != NULL
5697                       /* MOVS or CMPS */
5698                       && (current_templates->start->base_opcode | 2) == 0xa6
5699                       && current_templates->start->opcode_modifier.opcodespace
5700                          == SPACE_BASE
5701                       && mnem_p[-2] == 's')
5702                     {
5703                       as_warn (_("found `%sd'; assuming `%sl' was meant"),
5704                                mnemonic, mnemonic);
5705                       i.suffix = LONG_MNEM_SUFFIX;
5706                     }
5707                   else
5708                     {
5709                       current_templates = NULL;
5710                       mnem_p[-1] = 'd';
5711                     }
5712                 }
5713               break;
5714             }
5715         }
5716
5717       if (!current_templates)
5718         {
5719           if (pass1)
5720             as_bad (_("no such instruction: `%s'"), token_start);
5721           return NULL;
5722         }
5723     }
5724
5725   if (current_templates->start->opcode_modifier.jump == JUMP
5726       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
5727     {
5728       /* Check for a branch hint.  We allow ",pt" and ",pn" for
5729          predict taken and predict not taken respectively.
5730          I'm not sure that branch hints actually do anything on loop
5731          and jcxz insns (JumpByte) for current Pentium4 chips.  They
5732          may work in the future and it doesn't hurt to accept them
5733          now.  */
5734       if (l[0] == ',' && l[1] == 'p')
5735         {
5736           if (l[2] == 't')
5737             {
5738               if (!add_prefix (DS_PREFIX_OPCODE))
5739                 return NULL;
5740               l += 3;
5741             }
5742           else if (l[2] == 'n')
5743             {
5744               if (!add_prefix (CS_PREFIX_OPCODE))
5745                 return NULL;
5746               l += 3;
5747             }
5748         }
5749     }
5750   /* Any other comma loses.  */
5751   if (*l == ',')
5752     {
5753       as_bad (_("invalid character %s in mnemonic"),
5754               output_invalid (*l));
5755       return NULL;
5756     }
5757
5758   /* Check if instruction is supported on specified architecture.  */
5759   supported = 0;
5760   for (t = current_templates->start; t < current_templates->end; ++t)
5761     {
5762       supported |= cpu_flags_match (t);
5763
5764       if (i.suffix == QWORD_MNEM_SUFFIX && !q_suffix_allowed (t))
5765         supported &= ~CPU_FLAGS_64BIT_MATCH;
5766
5767       if (supported == CPU_FLAGS_PERFECT_MATCH)
5768         return l;
5769     }
5770
5771   if (pass1)
5772     {
5773       if (supported & CPU_FLAGS_64BIT_MATCH)
5774         i.error = unsupported_on_arch;
5775       else
5776         i.error = unsupported_64bit;
5777     }
5778
5779   return NULL;
5780 }
5781
5782 static char *
5783 parse_operands (char *l, const char *mnemonic)
5784 {
5785   char *token_start;
5786
5787   /* 1 if operand is pending after ','.  */
5788   unsigned int expecting_operand = 0;
5789
5790   while (*l != END_OF_INSN)
5791     {
5792       /* Non-zero if operand parens not balanced.  */
5793       unsigned int paren_not_balanced = 0;
5794       /* True if inside double quotes.  */
5795       bool in_quotes = false;
5796
5797       /* Skip optional white space before operand.  */
5798       if (is_space_char (*l))
5799         ++l;
5800       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
5801         {
5802           as_bad (_("invalid character %s before operand %d"),
5803                   output_invalid (*l),
5804                   i.operands + 1);
5805           return NULL;
5806         }
5807       token_start = l;  /* After white space.  */
5808       while (in_quotes || paren_not_balanced || *l != ',')
5809         {
5810           if (*l == END_OF_INSN)
5811             {
5812               if (in_quotes)
5813                 {
5814                   as_bad (_("unbalanced double quotes in operand %d."),
5815                           i.operands + 1);
5816                   return NULL;
5817                 }
5818               if (paren_not_balanced)
5819                 {
5820                   know (!intel_syntax);
5821                   as_bad (_("unbalanced parenthesis in operand %d."),
5822                           i.operands + 1);
5823                   return NULL;
5824                 }
5825               else
5826                 break;  /* we are done */
5827             }
5828           else if (*l == '\\' && l[1] == '"')
5829             ++l;
5830           else if (*l == '"')
5831             in_quotes = !in_quotes;
5832           else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l))
5833             {
5834               as_bad (_("invalid character %s in operand %d"),
5835                       output_invalid (*l),
5836                       i.operands + 1);
5837               return NULL;
5838             }
5839           if (!intel_syntax && !in_quotes)
5840             {
5841               if (*l == '(')
5842                 ++paren_not_balanced;
5843               if (*l == ')')
5844                 --paren_not_balanced;
5845             }
5846           l++;
5847         }
5848       if (l != token_start)
5849         {                       /* Yes, we've read in another operand.  */
5850           unsigned int operand_ok;
5851           this_operand = i.operands++;
5852           if (i.operands > MAX_OPERANDS)
5853             {
5854               as_bad (_("spurious operands; (%d operands/instruction max)"),
5855                       MAX_OPERANDS);
5856               return NULL;
5857             }
5858           i.types[this_operand].bitfield.unspecified = 1;
5859           /* Now parse operand adding info to 'i' as we go along.  */
5860           END_STRING_AND_SAVE (l);
5861
5862           if (i.mem_operands > 1)
5863             {
5864               as_bad (_("too many memory references for `%s'"),
5865                       mnemonic);
5866               return 0;
5867             }
5868
5869           if (intel_syntax)
5870             operand_ok =
5871               i386_intel_operand (token_start,
5872                                   intel_float_operand (mnemonic));
5873           else
5874             operand_ok = i386_att_operand (token_start);
5875
5876           RESTORE_END_STRING (l);
5877           if (!operand_ok)
5878             return NULL;
5879         }
5880       else
5881         {
5882           if (expecting_operand)
5883             {
5884             expecting_operand_after_comma:
5885               as_bad (_("expecting operand after ','; got nothing"));
5886               return NULL;
5887             }
5888           if (*l == ',')
5889             {
5890               as_bad (_("expecting operand before ','; got nothing"));
5891               return NULL;
5892             }
5893         }
5894
5895       /* Now *l must be either ',' or END_OF_INSN.  */
5896       if (*l == ',')
5897         {
5898           if (*++l == END_OF_INSN)
5899             {
5900               /* Just skip it, if it's \n complain.  */
5901               goto expecting_operand_after_comma;
5902             }
5903           expecting_operand = 1;
5904         }
5905     }
5906   return l;
5907 }
5908
5909 static void
5910 swap_2_operands (unsigned int xchg1, unsigned int xchg2)
5911 {
5912   union i386_op temp_op;
5913   i386_operand_type temp_type;
5914   unsigned int temp_flags;
5915   enum bfd_reloc_code_real temp_reloc;
5916
5917   temp_type = i.types[xchg2];
5918   i.types[xchg2] = i.types[xchg1];
5919   i.types[xchg1] = temp_type;
5920
5921   temp_flags = i.flags[xchg2];
5922   i.flags[xchg2] = i.flags[xchg1];
5923   i.flags[xchg1] = temp_flags;
5924
5925   temp_op = i.op[xchg2];
5926   i.op[xchg2] = i.op[xchg1];
5927   i.op[xchg1] = temp_op;
5928
5929   temp_reloc = i.reloc[xchg2];
5930   i.reloc[xchg2] = i.reloc[xchg1];
5931   i.reloc[xchg1] = temp_reloc;
5932
5933   if (i.mask.reg)
5934     {
5935       if (i.mask.operand == xchg1)
5936         i.mask.operand = xchg2;
5937       else if (i.mask.operand == xchg2)
5938         i.mask.operand = xchg1;
5939     }
5940   if (i.broadcast.type || i.broadcast.bytes)
5941     {
5942       if (i.broadcast.operand == xchg1)
5943         i.broadcast.operand = xchg2;
5944       else if (i.broadcast.operand == xchg2)
5945         i.broadcast.operand = xchg1;
5946     }
5947 }
5948
5949 static void
5950 swap_operands (void)
5951 {
5952   switch (i.operands)
5953     {
5954     case 5:
5955     case 4:
5956       swap_2_operands (1, i.operands - 2);
5957       /* Fall through.  */
5958     case 3:
5959     case 2:
5960       swap_2_operands (0, i.operands - 1);
5961       break;
5962     default:
5963       abort ();
5964     }
5965
5966   if (i.mem_operands == 2)
5967     {
5968       const reg_entry *temp_seg;
5969       temp_seg = i.seg[0];
5970       i.seg[0] = i.seg[1];
5971       i.seg[1] = temp_seg;
5972     }
5973 }
5974
5975 /* Try to ensure constant immediates are represented in the smallest
5976    opcode possible.  */
5977 static void
5978 optimize_imm (void)
5979 {
5980   char guess_suffix = 0;
5981   int op;
5982
5983   if (i.suffix)
5984     guess_suffix = i.suffix;
5985   else if (i.reg_operands)
5986     {
5987       /* Figure out a suffix from the last register operand specified.
5988          We can't do this properly yet, i.e. excluding special register
5989          instances, but the following works for instructions with
5990          immediates.  In any case, we can't set i.suffix yet.  */
5991       for (op = i.operands; --op >= 0;)
5992         if (i.types[op].bitfield.class != Reg)
5993           continue;
5994         else if (i.types[op].bitfield.byte)
5995           {
5996             guess_suffix = BYTE_MNEM_SUFFIX;
5997             break;
5998           }
5999         else if (i.types[op].bitfield.word)
6000           {
6001             guess_suffix = WORD_MNEM_SUFFIX;
6002             break;
6003           }
6004         else if (i.types[op].bitfield.dword)
6005           {
6006             guess_suffix = LONG_MNEM_SUFFIX;
6007             break;
6008           }
6009         else if (i.types[op].bitfield.qword)
6010           {
6011             guess_suffix = QWORD_MNEM_SUFFIX;
6012             break;
6013           }
6014     }
6015   else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
6016     guess_suffix = WORD_MNEM_SUFFIX;
6017
6018   for (op = i.operands; --op >= 0;)
6019     if (operand_type_check (i.types[op], imm))
6020       {
6021         switch (i.op[op].imms->X_op)
6022           {
6023           case O_constant:
6024             /* If a suffix is given, this operand may be shortened.  */
6025             switch (guess_suffix)
6026               {
6027               case LONG_MNEM_SUFFIX:
6028                 i.types[op].bitfield.imm32 = 1;
6029                 i.types[op].bitfield.imm64 = 1;
6030                 break;
6031               case WORD_MNEM_SUFFIX:
6032                 i.types[op].bitfield.imm16 = 1;
6033                 i.types[op].bitfield.imm32 = 1;
6034                 i.types[op].bitfield.imm32s = 1;
6035                 i.types[op].bitfield.imm64 = 1;
6036                 break;
6037               case BYTE_MNEM_SUFFIX:
6038                 i.types[op].bitfield.imm8 = 1;
6039                 i.types[op].bitfield.imm8s = 1;
6040                 i.types[op].bitfield.imm16 = 1;
6041                 i.types[op].bitfield.imm32 = 1;
6042                 i.types[op].bitfield.imm32s = 1;
6043                 i.types[op].bitfield.imm64 = 1;
6044                 break;
6045               }
6046
6047             /* If this operand is at most 16 bits, convert it
6048                to a signed 16 bit number before trying to see
6049                whether it will fit in an even smaller size.
6050                This allows a 16-bit operand such as $0xffe0 to
6051                be recognised as within Imm8S range.  */
6052             if ((i.types[op].bitfield.imm16)
6053                 && fits_in_unsigned_word (i.op[op].imms->X_add_number))
6054               {
6055                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
6056                                                 ^ 0x8000) - 0x8000);
6057               }
6058 #ifdef BFD64
6059             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
6060             if ((i.types[op].bitfield.imm32)
6061                 && fits_in_unsigned_long (i.op[op].imms->X_add_number))
6062               {
6063                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
6064                                                 ^ ((offsetT) 1 << 31))
6065                                                - ((offsetT) 1 << 31));
6066               }
6067 #endif
6068             i.types[op]
6069               = operand_type_or (i.types[op],
6070                                  smallest_imm_type (i.op[op].imms->X_add_number));
6071
6072             /* We must avoid matching of Imm32 templates when 64bit
6073                only immediate is available.  */
6074             if (guess_suffix == QWORD_MNEM_SUFFIX)
6075               i.types[op].bitfield.imm32 = 0;
6076             break;
6077
6078           case O_absent:
6079           case O_register:
6080             abort ();
6081
6082             /* Symbols and expressions.  */
6083           default:
6084             /* Convert symbolic operand to proper sizes for matching, but don't
6085                prevent matching a set of insns that only supports sizes other
6086                than those matching the insn suffix.  */
6087             {
6088               i386_operand_type mask, allowed;
6089               const insn_template *t = current_templates->start;
6090
6091               operand_type_set (&mask, 0);
6092               switch (guess_suffix)
6093                 {
6094                 case QWORD_MNEM_SUFFIX:
6095                   mask.bitfield.imm64 = 1;
6096                   mask.bitfield.imm32s = 1;
6097                   break;
6098                 case LONG_MNEM_SUFFIX:
6099                   mask.bitfield.imm32 = 1;
6100                   break;
6101                 case WORD_MNEM_SUFFIX:
6102                   mask.bitfield.imm16 = 1;
6103                   break;
6104                 case BYTE_MNEM_SUFFIX:
6105                   mask.bitfield.imm8 = 1;
6106                   break;
6107                 default:
6108                   break;
6109                 }
6110
6111               allowed = operand_type_and (t->operand_types[op], mask);
6112               while (++t < current_templates->end)
6113                 {
6114                   allowed = operand_type_or (allowed, t->operand_types[op]);
6115                   allowed = operand_type_and (allowed, mask);
6116                 }
6117
6118               if (!operand_type_all_zero (&allowed))
6119                 i.types[op] = operand_type_and (i.types[op], mask);
6120             }
6121             break;
6122           }
6123       }
6124 }
6125
6126 /* Try to use the smallest displacement type too.  */
6127 static void
6128 optimize_disp (void)
6129 {
6130   int op;
6131
6132   for (op = i.operands; --op >= 0;)
6133     if (operand_type_check (i.types[op], disp))
6134       {
6135         if (i.op[op].disps->X_op == O_constant)
6136           {
6137             offsetT op_disp = i.op[op].disps->X_add_number;
6138
6139             if (!op_disp && i.types[op].bitfield.baseindex)
6140               {
6141                 i.types[op] = operand_type_and_not (i.types[op], anydisp);
6142                 i.op[op].disps = NULL;
6143                 i.disp_operands--;
6144                 continue;
6145               }
6146
6147             if (i.types[op].bitfield.disp16
6148                 && fits_in_unsigned_word (op_disp))
6149               {
6150                 /* If this operand is at most 16 bits, convert
6151                    to a signed 16 bit number and don't use 64bit
6152                    displacement.  */
6153                 op_disp = ((op_disp ^ 0x8000) - 0x8000);
6154                 i.types[op].bitfield.disp64 = 0;
6155               }
6156
6157 #ifdef BFD64
6158             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
6159             if ((flag_code != CODE_64BIT
6160                  ? i.types[op].bitfield.disp32
6161                  : want_disp32 (current_templates->start)
6162                    && (!current_templates->start->opcode_modifier.jump
6163                        || i.jumpabsolute || i.types[op].bitfield.baseindex))
6164                 && fits_in_unsigned_long (op_disp))
6165               {
6166                 /* If this operand is at most 32 bits, convert
6167                    to a signed 32 bit number and don't use 64bit
6168                    displacement.  */
6169                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
6170                 i.types[op].bitfield.disp64 = 0;
6171                 i.types[op].bitfield.disp32 = 1;
6172               }
6173
6174             if (flag_code == CODE_64BIT && fits_in_signed_long (op_disp))
6175               {
6176                 i.types[op].bitfield.disp64 = 0;
6177                 i.types[op].bitfield.disp32 = 1;
6178               }
6179 #endif
6180             if ((i.types[op].bitfield.disp32
6181                  || i.types[op].bitfield.disp16)
6182                 && fits_in_disp8 (op_disp))
6183               i.types[op].bitfield.disp8 = 1;
6184
6185             i.op[op].disps->X_add_number = op_disp;
6186           }
6187         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
6188                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
6189           {
6190             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
6191                          i.op[op].disps, 0, i.reloc[op]);
6192             i.types[op] = operand_type_and_not (i.types[op], anydisp);
6193           }
6194         else
6195           /* We only support 64bit displacement on constants.  */
6196           i.types[op].bitfield.disp64 = 0;
6197       }
6198 }
6199
6200 /* Return 1 if there is a match in broadcast bytes between operand
6201    GIVEN and instruction template T.   */
6202
6203 static INLINE int
6204 match_broadcast_size (const insn_template *t, unsigned int given)
6205 {
6206   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
6207            && i.types[given].bitfield.byte)
6208           || (t->opcode_modifier.broadcast == WORD_BROADCAST
6209               && i.types[given].bitfield.word)
6210           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
6211               && i.types[given].bitfield.dword)
6212           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
6213               && i.types[given].bitfield.qword));
6214 }
6215
6216 /* Check if operands are valid for the instruction.  */
6217
6218 static int
6219 check_VecOperands (const insn_template *t)
6220 {
6221   unsigned int op;
6222   i386_cpu_flags cpu;
6223
6224   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
6225      any one operand are implicity requiring AVX512VL support if the actual
6226      operand size is YMMword or XMMword.  Since this function runs after
6227      template matching, there's no need to check for YMMword/XMMword in
6228      the template.  */
6229   cpu = cpu_flags_and (t->cpu_flags, avx512);
6230   if (!cpu_flags_all_zero (&cpu)
6231       && !t->cpu_flags.bitfield.cpuavx512vl
6232       && !cpu_arch_flags.bitfield.cpuavx512vl)
6233     {
6234       for (op = 0; op < t->operands; ++op)
6235         {
6236           if (t->operand_types[op].bitfield.zmmword
6237               && (i.types[op].bitfield.ymmword
6238                   || i.types[op].bitfield.xmmword))
6239             {
6240               i.error = unsupported;
6241               return 1;
6242             }
6243         }
6244     }
6245
6246   /* Somewhat similarly, templates specifying both AVX and AVX2 are
6247      requiring AVX2 support if the actual operand size is YMMword.  */
6248   if (t->cpu_flags.bitfield.cpuavx
6249       && t->cpu_flags.bitfield.cpuavx2
6250       && !cpu_arch_flags.bitfield.cpuavx2)
6251     {
6252       for (op = 0; op < t->operands; ++op)
6253         {
6254           if (t->operand_types[op].bitfield.xmmword
6255               && i.types[op].bitfield.ymmword)
6256             {
6257               i.error = unsupported;
6258               return 1;
6259             }
6260         }
6261     }
6262
6263   /* Without VSIB byte, we can't have a vector register for index.  */
6264   if (!t->opcode_modifier.sib
6265       && i.index_reg
6266       && (i.index_reg->reg_type.bitfield.xmmword
6267           || i.index_reg->reg_type.bitfield.ymmword
6268           || i.index_reg->reg_type.bitfield.zmmword))
6269     {
6270       i.error = unsupported_vector_index_register;
6271       return 1;
6272     }
6273
6274   /* Check if default mask is allowed.  */
6275   if (t->opcode_modifier.operandconstraint == NO_DEFAULT_MASK
6276       && (!i.mask.reg || i.mask.reg->reg_num == 0))
6277     {
6278       i.error = no_default_mask;
6279       return 1;
6280     }
6281
6282   /* For VSIB byte, we need a vector register for index, and all vector
6283      registers must be distinct.  */
6284   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
6285     {
6286       if (!i.index_reg
6287           || !((t->opcode_modifier.sib == VECSIB128
6288                 && i.index_reg->reg_type.bitfield.xmmword)
6289                || (t->opcode_modifier.sib == VECSIB256
6290                    && i.index_reg->reg_type.bitfield.ymmword)
6291                || (t->opcode_modifier.sib == VECSIB512
6292                    && i.index_reg->reg_type.bitfield.zmmword)))
6293       {
6294         i.error = invalid_vsib_address;
6295         return 1;
6296       }
6297
6298       gas_assert (i.reg_operands == 2 || i.mask.reg);
6299       if (i.reg_operands == 2 && !i.mask.reg)
6300         {
6301           gas_assert (i.types[0].bitfield.class == RegSIMD);
6302           gas_assert (i.types[0].bitfield.xmmword
6303                       || i.types[0].bitfield.ymmword);
6304           gas_assert (i.types[2].bitfield.class == RegSIMD);
6305           gas_assert (i.types[2].bitfield.xmmword
6306                       || i.types[2].bitfield.ymmword);
6307           if (operand_check == check_none)
6308             return 0;
6309           if (register_number (i.op[0].regs)
6310               != register_number (i.index_reg)
6311               && register_number (i.op[2].regs)
6312                  != register_number (i.index_reg)
6313               && register_number (i.op[0].regs)
6314                  != register_number (i.op[2].regs))
6315             return 0;
6316           if (operand_check == check_error)
6317             {
6318               i.error = invalid_vector_register_set;
6319               return 1;
6320             }
6321           as_warn (_("mask, index, and destination registers should be distinct"));
6322         }
6323       else if (i.reg_operands == 1 && i.mask.reg)
6324         {
6325           if (i.types[1].bitfield.class == RegSIMD
6326               && (i.types[1].bitfield.xmmword
6327                   || i.types[1].bitfield.ymmword
6328                   || i.types[1].bitfield.zmmword)
6329               && (register_number (i.op[1].regs)
6330                   == register_number (i.index_reg)))
6331             {
6332               if (operand_check == check_error)
6333                 {
6334                   i.error = invalid_vector_register_set;
6335                   return 1;
6336                 }
6337               if (operand_check != check_none)
6338                 as_warn (_("index and destination registers should be distinct"));
6339             }
6340         }
6341     }
6342
6343   /* For AMX instructions with 3 TMM register operands, all operands
6344       must be distinct.  */
6345   if (i.reg_operands == 3
6346       && t->operand_types[0].bitfield.tmmword
6347       && (i.op[0].regs == i.op[1].regs
6348           || i.op[0].regs == i.op[2].regs
6349           || i.op[1].regs == i.op[2].regs))
6350     {
6351       i.error = invalid_tmm_register_set;
6352       return 1;
6353     }
6354
6355   /* For some special instructions require that destination must be distinct
6356      from source registers.  */
6357   if (t->opcode_modifier.operandconstraint == DISTINCT_DEST)
6358     {
6359       unsigned int dest_reg = i.operands - 1;
6360
6361       know (i.operands >= 3);
6362
6363       /* #UD if dest_reg == src1_reg or dest_reg == src2_reg.  */
6364       if (i.op[dest_reg - 1].regs == i.op[dest_reg].regs
6365           || (i.reg_operands > 2
6366               && i.op[dest_reg - 2].regs == i.op[dest_reg].regs))
6367         {
6368           i.error = invalid_dest_and_src_register_set;
6369           return 1;
6370         }
6371     }
6372
6373   /* Check if broadcast is supported by the instruction and is applied
6374      to the memory operand.  */
6375   if (i.broadcast.type || i.broadcast.bytes)
6376     {
6377       i386_operand_type type, overlap;
6378
6379       /* Check if specified broadcast is supported in this instruction,
6380          and its broadcast bytes match the memory operand.  */
6381       op = i.broadcast.operand;
6382       if (!t->opcode_modifier.broadcast
6383           || !(i.flags[op] & Operand_Mem)
6384           || (!i.types[op].bitfield.unspecified
6385               && !match_broadcast_size (t, op)))
6386         {
6387         bad_broadcast:
6388           i.error = unsupported_broadcast;
6389           return 1;
6390         }
6391
6392       if (i.broadcast.type)
6393         i.broadcast.bytes = ((1 << (t->opcode_modifier.broadcast - 1))
6394                              * i.broadcast.type);
6395       operand_type_set (&type, 0);
6396       switch (get_broadcast_bytes (t, false))
6397         {
6398         case 2:
6399           type.bitfield.word = 1;
6400           break;
6401         case 4:
6402           type.bitfield.dword = 1;
6403           break;
6404         case 8:
6405           type.bitfield.qword = 1;
6406           break;
6407         case 16:
6408           type.bitfield.xmmword = 1;
6409           break;
6410         case 32:
6411           type.bitfield.ymmword = 1;
6412           break;
6413         case 64:
6414           type.bitfield.zmmword = 1;
6415           break;
6416         default:
6417           goto bad_broadcast;
6418         }
6419
6420       overlap = operand_type_and (type, t->operand_types[op]);
6421       if (t->operand_types[op].bitfield.class == RegSIMD
6422           && t->operand_types[op].bitfield.byte
6423              + t->operand_types[op].bitfield.word
6424              + t->operand_types[op].bitfield.dword
6425              + t->operand_types[op].bitfield.qword > 1)
6426         {
6427           overlap.bitfield.xmmword = 0;
6428           overlap.bitfield.ymmword = 0;
6429           overlap.bitfield.zmmword = 0;
6430         }
6431       if (operand_type_all_zero (&overlap))
6432           goto bad_broadcast;
6433
6434       if (t->opcode_modifier.checkregsize)
6435         {
6436           unsigned int j;
6437
6438           type.bitfield.baseindex = 1;
6439           for (j = 0; j < i.operands; ++j)
6440             {
6441               if (j != op
6442                   && !operand_type_register_match(i.types[j],
6443                                                   t->operand_types[j],
6444                                                   type,
6445                                                   t->operand_types[op]))
6446                 goto bad_broadcast;
6447             }
6448         }
6449     }
6450   /* If broadcast is supported in this instruction, we need to check if
6451      operand of one-element size isn't specified without broadcast.  */
6452   else if (t->opcode_modifier.broadcast && i.mem_operands)
6453     {
6454       /* Find memory operand.  */
6455       for (op = 0; op < i.operands; op++)
6456         if (i.flags[op] & Operand_Mem)
6457           break;
6458       gas_assert (op < i.operands);
6459       /* Check size of the memory operand.  */
6460       if (match_broadcast_size (t, op))
6461         {
6462           i.error = broadcast_needed;
6463           return 1;
6464         }
6465     }
6466   else
6467     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
6468
6469   /* Check if requested masking is supported.  */
6470   if (i.mask.reg)
6471     {
6472       switch (t->opcode_modifier.masking)
6473         {
6474         case BOTH_MASKING:
6475           break;
6476         case MERGING_MASKING:
6477           if (i.mask.zeroing)
6478             {
6479         case 0:
6480               i.error = unsupported_masking;
6481               return 1;
6482             }
6483           break;
6484         case DYNAMIC_MASKING:
6485           /* Memory destinations allow only merging masking.  */
6486           if (i.mask.zeroing && i.mem_operands)
6487             {
6488               /* Find memory operand.  */
6489               for (op = 0; op < i.operands; op++)
6490                 if (i.flags[op] & Operand_Mem)
6491                   break;
6492               gas_assert (op < i.operands);
6493               if (op == i.operands - 1)
6494                 {
6495                   i.error = unsupported_masking;
6496                   return 1;
6497                 }
6498             }
6499           break;
6500         default:
6501           abort ();
6502         }
6503     }
6504
6505   /* Check if masking is applied to dest operand.  */
6506   if (i.mask.reg && (i.mask.operand != i.operands - 1))
6507     {
6508       i.error = mask_not_on_destination;
6509       return 1;
6510     }
6511
6512   /* Check RC/SAE.  */
6513   if (i.rounding.type != rc_none)
6514     {
6515       if (!t->opcode_modifier.sae
6516           || ((i.rounding.type != saeonly) != t->opcode_modifier.staticrounding)
6517           || i.mem_operands)
6518         {
6519           i.error = unsupported_rc_sae;
6520           return 1;
6521         }
6522
6523       /* Non-EVEX.LIG forms need to have a ZMM register as at least one
6524          operand.  */
6525       if (t->opcode_modifier.evex != EVEXLIG)
6526         {
6527           for (op = 0; op < t->operands; ++op)
6528             if (i.types[op].bitfield.zmmword)
6529               break;
6530           if (op >= t->operands)
6531             {
6532               i.error = operand_size_mismatch;
6533               return 1;
6534             }
6535         }
6536     }
6537
6538   /* Check the special Imm4 cases; must be the first operand.  */
6539   if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
6540     {
6541       if (i.op[0].imms->X_op != O_constant
6542           || !fits_in_imm4 (i.op[0].imms->X_add_number))
6543         {
6544           i.error = bad_imm4;
6545           return 1;
6546         }
6547
6548       /* Turn off Imm<N> so that update_imm won't complain.  */
6549       operand_type_set (&i.types[0], 0);
6550     }
6551
6552   /* Check vector Disp8 operand.  */
6553   if (t->opcode_modifier.disp8memshift
6554       && i.disp_encoding <= disp_encoding_8bit)
6555     {
6556       if (i.broadcast.bytes)
6557         i.memshift = t->opcode_modifier.broadcast - 1;
6558       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
6559         i.memshift = t->opcode_modifier.disp8memshift;
6560       else
6561         {
6562           const i386_operand_type *type = NULL, *fallback = NULL;
6563
6564           i.memshift = 0;
6565           for (op = 0; op < i.operands; op++)
6566             if (i.flags[op] & Operand_Mem)
6567               {
6568                 if (t->opcode_modifier.evex == EVEXLIG)
6569                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
6570                 else if (t->operand_types[op].bitfield.xmmword
6571                          + t->operand_types[op].bitfield.ymmword
6572                          + t->operand_types[op].bitfield.zmmword <= 1)
6573                   type = &t->operand_types[op];
6574                 else if (!i.types[op].bitfield.unspecified)
6575                   type = &i.types[op];
6576                 else /* Ambiguities get resolved elsewhere.  */
6577                   fallback = &t->operand_types[op];
6578               }
6579             else if (i.types[op].bitfield.class == RegSIMD
6580                      && t->opcode_modifier.evex != EVEXLIG)
6581               {
6582                 if (i.types[op].bitfield.zmmword)
6583                   i.memshift = 6;
6584                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
6585                   i.memshift = 5;
6586                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
6587                   i.memshift = 4;
6588               }
6589
6590           if (!type && !i.memshift)
6591             type = fallback;
6592           if (type)
6593             {
6594               if (type->bitfield.zmmword)
6595                 i.memshift = 6;
6596               else if (type->bitfield.ymmword)
6597                 i.memshift = 5;
6598               else if (type->bitfield.xmmword)
6599                 i.memshift = 4;
6600             }
6601
6602           /* For the check in fits_in_disp8().  */
6603           if (i.memshift == 0)
6604             i.memshift = -1;
6605         }
6606
6607       for (op = 0; op < i.operands; op++)
6608         if (operand_type_check (i.types[op], disp)
6609             && i.op[op].disps->X_op == O_constant)
6610           {
6611             if (fits_in_disp8 (i.op[op].disps->X_add_number))
6612               {
6613                 i.types[op].bitfield.disp8 = 1;
6614                 return 0;
6615               }
6616             i.types[op].bitfield.disp8 = 0;
6617           }
6618     }
6619
6620   i.memshift = 0;
6621
6622   return 0;
6623 }
6624
6625 /* Check if encoding requirements are met by the instruction.  */
6626
6627 static int
6628 VEX_check_encoding (const insn_template *t)
6629 {
6630   if (i.vec_encoding == vex_encoding_error)
6631     {
6632       i.error = unsupported;
6633       return 1;
6634     }
6635
6636   if (i.vec_encoding == vex_encoding_evex)
6637     {
6638       /* This instruction must be encoded with EVEX prefix.  */
6639       if (!is_evex_encoding (t))
6640         {
6641           i.error = unsupported;
6642           return 1;
6643         }
6644       return 0;
6645     }
6646
6647   if (!t->opcode_modifier.vex)
6648     {
6649       /* This instruction template doesn't have VEX prefix.  */
6650       if (i.vec_encoding != vex_encoding_default)
6651         {
6652           i.error = unsupported;
6653           return 1;
6654         }
6655       return 0;
6656     }
6657
6658   return 0;
6659 }
6660
6661 /* Helper function for the progress() macro in match_template().  */
6662 static INLINE enum i386_error progress (enum i386_error new,
6663                                         enum i386_error last,
6664                                         unsigned int line, unsigned int *line_p)
6665 {
6666   if (line <= *line_p)
6667     return last;
6668   *line_p = line;
6669   return new;
6670 }
6671
6672 static const insn_template *
6673 match_template (char mnem_suffix)
6674 {
6675   /* Points to template once we've found it.  */
6676   const insn_template *t;
6677   i386_operand_type overlap0, overlap1, overlap2, overlap3;
6678   i386_operand_type overlap4;
6679   unsigned int found_reverse_match;
6680   i386_operand_type operand_types [MAX_OPERANDS];
6681   int addr_prefix_disp;
6682   unsigned int j, size_match, check_register, errline = __LINE__;
6683   enum i386_error specific_error = number_of_operands_mismatch;
6684 #define progress(err) progress (err, specific_error, __LINE__, &errline)
6685
6686 #if MAX_OPERANDS != 5
6687 # error "MAX_OPERANDS must be 5."
6688 #endif
6689
6690   found_reverse_match = 0;
6691   addr_prefix_disp = -1;
6692
6693   for (t = current_templates->start; t < current_templates->end; t++)
6694     {
6695       addr_prefix_disp = -1;
6696       found_reverse_match = 0;
6697
6698       /* Must have right number of operands.  */
6699       if (i.operands != t->operands)
6700         continue;
6701
6702       /* Check processor support.  */
6703       specific_error = progress (unsupported);
6704       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
6705         continue;
6706
6707       /* Check AT&T mnemonic.   */
6708       specific_error = progress (unsupported_with_intel_mnemonic);
6709       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
6710         continue;
6711
6712       /* Check AT&T/Intel syntax.  */
6713       specific_error = progress (unsupported_syntax);
6714       if ((intel_syntax && t->opcode_modifier.attsyntax)
6715           || (!intel_syntax && t->opcode_modifier.intelsyntax))
6716         continue;
6717
6718       /* Check Intel64/AMD64 ISA.   */
6719       switch (isa64)
6720         {
6721         default:
6722           /* Default: Don't accept Intel64.  */
6723           if (t->opcode_modifier.isa64 == INTEL64)
6724             continue;
6725           break;
6726         case amd64:
6727           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
6728           if (t->opcode_modifier.isa64 >= INTEL64)
6729             continue;
6730           break;
6731         case intel64:
6732           /* -mintel64: Don't accept AMD64.  */
6733           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
6734             continue;
6735           break;
6736         }
6737
6738       /* Check the suffix.  */
6739       specific_error = progress (invalid_instruction_suffix);
6740       if ((t->opcode_modifier.no_bsuf && mnem_suffix == BYTE_MNEM_SUFFIX)
6741           || (t->opcode_modifier.no_wsuf && mnem_suffix == WORD_MNEM_SUFFIX)
6742           || (t->opcode_modifier.no_lsuf && mnem_suffix == LONG_MNEM_SUFFIX)
6743           || (t->opcode_modifier.no_ssuf && mnem_suffix == SHORT_MNEM_SUFFIX)
6744           || (t->opcode_modifier.no_qsuf && mnem_suffix == QWORD_MNEM_SUFFIX))
6745         continue;
6746
6747       specific_error = progress (operand_size_mismatch);
6748       size_match = operand_size_match (t);
6749       if (!size_match)
6750         continue;
6751
6752       /* This is intentionally not
6753
6754          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
6755
6756          as the case of a missing * on the operand is accepted (perhaps with
6757          a warning, issued further down).  */
6758       specific_error = progress (operand_type_mismatch);
6759       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
6760         continue;
6761
6762       /* In Intel syntax, normally we can check for memory operand size when
6763          there is no mnemonic suffix.  But jmp and call have 2 different
6764          encodings with Dword memory operand size.  Skip the "near" one
6765          (permitting a register operand) when "far" was requested.  */
6766       if (i.far_branch
6767           && t->opcode_modifier.jump == JUMP_ABSOLUTE
6768           && t->operand_types[0].bitfield.class == Reg)
6769         continue;
6770
6771       for (j = 0; j < MAX_OPERANDS; j++)
6772         operand_types[j] = t->operand_types[j];
6773
6774       /* In general, don't allow 32-bit operands on pre-386.  */
6775       specific_error = progress (mnem_suffix ? invalid_instruction_suffix
6776                                              : operand_size_mismatch);
6777       j = i.imm_operands + (t->operands > i.imm_operands + 1);
6778       if (i.suffix == LONG_MNEM_SUFFIX
6779           && !cpu_arch_flags.bitfield.cpui386
6780           && (intel_syntax
6781               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
6782                  && !intel_float_operand (t->name))
6783               : intel_float_operand (t->name) != 2)
6784           && (t->operands == i.imm_operands
6785               || (operand_types[i.imm_operands].bitfield.class != RegMMX
6786                && operand_types[i.imm_operands].bitfield.class != RegSIMD
6787                && operand_types[i.imm_operands].bitfield.class != RegMask)
6788               || (operand_types[j].bitfield.class != RegMMX
6789                   && operand_types[j].bitfield.class != RegSIMD
6790                   && operand_types[j].bitfield.class != RegMask))
6791           && !t->opcode_modifier.sib)
6792         continue;
6793
6794       /* Do not verify operands when there are none.  */
6795       if (!t->operands)
6796         {
6797           if (VEX_check_encoding (t))
6798             {
6799               specific_error = progress (i.error);
6800               continue;
6801             }
6802
6803           /* We've found a match; break out of loop.  */
6804           break;
6805         }
6806
6807       if (!t->opcode_modifier.jump
6808           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
6809         {
6810           /* There should be only one Disp operand.  */
6811           for (j = 0; j < MAX_OPERANDS; j++)
6812             if (operand_type_check (operand_types[j], disp))
6813               break;
6814           if (j < MAX_OPERANDS)
6815             {
6816               bool override = (i.prefix[ADDR_PREFIX] != 0);
6817
6818               addr_prefix_disp = j;
6819
6820               /* Address size prefix will turn Disp64 operand into Disp32 and
6821                  Disp32/Disp16 one into Disp16/Disp32 respectively.  */
6822               switch (flag_code)
6823                 {
6824                 case CODE_16BIT:
6825                   override = !override;
6826                   /* Fall through.  */
6827                 case CODE_32BIT:
6828                   if (operand_types[j].bitfield.disp32
6829                       && operand_types[j].bitfield.disp16)
6830                     {
6831                       operand_types[j].bitfield.disp16 = override;
6832                       operand_types[j].bitfield.disp32 = !override;
6833                     }
6834                   gas_assert (!operand_types[j].bitfield.disp64);
6835                   break;
6836
6837                 case CODE_64BIT:
6838                   if (operand_types[j].bitfield.disp64)
6839                     {
6840                       gas_assert (!operand_types[j].bitfield.disp32);
6841                       operand_types[j].bitfield.disp32 = override;
6842                       operand_types[j].bitfield.disp64 = !override;
6843                     }
6844                   operand_types[j].bitfield.disp16 = 0;
6845                   break;
6846                 }
6847             }
6848         }
6849
6850       /* We check register size if needed.  */
6851       if (t->opcode_modifier.checkregsize)
6852         {
6853           check_register = (1 << t->operands) - 1;
6854           if (i.broadcast.type || i.broadcast.bytes)
6855             check_register &= ~(1 << i.broadcast.operand);
6856         }
6857       else
6858         check_register = 0;
6859
6860       overlap0 = operand_type_and (i.types[0], operand_types[0]);
6861       switch (t->operands)
6862         {
6863         case 1:
6864           if (!operand_type_match (overlap0, i.types[0]))
6865             continue;
6866           break;
6867         case 2:
6868           /* xchg %eax, %eax is a special case. It is an alias for nop
6869              only in 32bit mode and we can use opcode 0x90.  In 64bit
6870              mode, we can't use 0x90 for xchg %eax, %eax since it should
6871              zero-extend %eax to %rax.  */
6872           if (flag_code == CODE_64BIT
6873               && t->base_opcode == 0x90
6874               && t->opcode_modifier.opcodespace == SPACE_BASE
6875               && i.types[0].bitfield.instance == Accum
6876               && i.types[0].bitfield.dword
6877               && i.types[1].bitfield.instance == Accum)
6878             continue;
6879
6880           if (t->base_opcode == MOV_AX_DISP32
6881               && t->opcode_modifier.opcodespace == SPACE_BASE)
6882             {
6883               /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
6884               if (i.reloc[0] == BFD_RELOC_386_GOT32)
6885                 continue;
6886
6887               /* xrelease mov %eax, <disp> is another special case. It must not
6888                  match the accumulator-only encoding of mov.  */
6889               if (i.hle_prefix)
6890                 continue;
6891             }
6892           /* Fall through.  */
6893
6894         case 3:
6895           if (!(size_match & MATCH_STRAIGHT))
6896             goto check_reverse;
6897           /* Reverse direction of operands if swapping is possible in the first
6898              place (operands need to be symmetric) and
6899              - the load form is requested, and the template is a store form,
6900              - the store form is requested, and the template is a load form,
6901              - the non-default (swapped) form is requested.  */
6902           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
6903           if (t->opcode_modifier.d && i.reg_operands == i.operands
6904               && !operand_type_all_zero (&overlap1))
6905             switch (i.dir_encoding)
6906               {
6907               case dir_encoding_load:
6908                 if (operand_type_check (operand_types[i.operands - 1], anymem)
6909                     || t->opcode_modifier.regmem)
6910                   goto check_reverse;
6911                 break;
6912
6913               case dir_encoding_store:
6914                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
6915                     && !t->opcode_modifier.regmem)
6916                   goto check_reverse;
6917                 break;
6918
6919               case dir_encoding_swap:
6920                 goto check_reverse;
6921
6922               case dir_encoding_default:
6923                 break;
6924               }
6925           /* If we want store form, we skip the current load.  */
6926           if ((i.dir_encoding == dir_encoding_store
6927                || i.dir_encoding == dir_encoding_swap)
6928               && i.mem_operands == 0
6929               && t->opcode_modifier.load)
6930             continue;
6931           /* Fall through.  */
6932         case 4:
6933         case 5:
6934           overlap1 = operand_type_and (i.types[1], operand_types[1]);
6935           if (!operand_type_match (overlap0, i.types[0])
6936               || !operand_type_match (overlap1, i.types[1])
6937               || ((check_register & 3) == 3
6938                   && !operand_type_register_match (i.types[0],
6939                                                    operand_types[0],
6940                                                    i.types[1],
6941                                                    operand_types[1])))
6942             {
6943               specific_error = progress (i.error);
6944
6945               /* Check if other direction is valid ...  */
6946               if (!t->opcode_modifier.d)
6947                 continue;
6948
6949             check_reverse:
6950               if (!(size_match & MATCH_REVERSE))
6951                 continue;
6952               /* Try reversing direction of operands.  */
6953               j = t->opcode_modifier.vexsources ? 1 : i.operands - 1;
6954               overlap0 = operand_type_and (i.types[0], operand_types[j]);
6955               overlap1 = operand_type_and (i.types[j], operand_types[0]);
6956               overlap2 = operand_type_and (i.types[1], operand_types[1]);
6957               gas_assert (t->operands != 3 || !check_register);
6958               if (!operand_type_match (overlap0, i.types[0])
6959                   || !operand_type_match (overlap1, i.types[j])
6960                   || (t->operands == 3
6961                       && !operand_type_match (overlap2, i.types[1]))
6962                   || (check_register
6963                       && !operand_type_register_match (i.types[0],
6964                                                        operand_types[j],
6965                                                        i.types[j],
6966                                                        operand_types[0])))
6967                 {
6968                   /* Does not match either direction.  */
6969                   specific_error = progress (i.error);
6970                   continue;
6971                 }
6972               /* found_reverse_match holds which variant of D
6973                  we've found.  */
6974               if (!t->opcode_modifier.d)
6975                 found_reverse_match = 0;
6976               else if (operand_types[0].bitfield.tbyte)
6977                 {
6978                   if (t->opcode_modifier.operandconstraint != UGH)
6979                     found_reverse_match = Opcode_FloatD;
6980                   /* FSUB{,R} and FDIV{,R} may need a 2nd bit flipped.  */
6981                   if ((t->base_opcode & 0x20)
6982                       && (intel_syntax || intel_mnemonic))
6983                     found_reverse_match |= Opcode_FloatR;
6984                 }
6985               else if (t->opcode_modifier.vexsources)
6986                 {
6987                   found_reverse_match = Opcode_VexW;
6988                   goto check_operands_345;
6989                 }
6990               else if (t->opcode_modifier.opcodespace != SPACE_BASE
6991                        && (t->opcode_modifier.opcodespace != SPACE_0F
6992                            /* MOV to/from CR/DR/TR, as an exception, follow
6993                               the base opcode space encoding model.  */
6994                            || (t->base_opcode | 7) != 0x27))
6995                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
6996                                       ? Opcode_ExtD : Opcode_SIMD_IntD;
6997               else if (!t->opcode_modifier.commutative)
6998                 found_reverse_match = Opcode_D;
6999               else
7000                 found_reverse_match = ~0;
7001             }
7002           else
7003             {
7004               /* Found a forward 2 operand match here.  */
7005             check_operands_345:
7006               switch (t->operands)
7007                 {
7008                 case 5:
7009                   overlap4 = operand_type_and (i.types[4], operand_types[4]);
7010                   if (!operand_type_match (overlap4, i.types[4])
7011                       || !operand_type_register_match (i.types[3],
7012                                                        operand_types[3],
7013                                                        i.types[4],
7014                                                        operand_types[4]))
7015                     {
7016                       specific_error = progress (i.error);
7017                       continue;
7018                     }
7019                   /* Fall through.  */
7020                 case 4:
7021                   overlap3 = operand_type_and (i.types[3], operand_types[3]);
7022                   if (!operand_type_match (overlap3, i.types[3])
7023                       || ((check_register & 0xa) == 0xa
7024                           && !operand_type_register_match (i.types[1],
7025                                                             operand_types[1],
7026                                                             i.types[3],
7027                                                             operand_types[3]))
7028                       || ((check_register & 0xc) == 0xc
7029                           && !operand_type_register_match (i.types[2],
7030                                                             operand_types[2],
7031                                                             i.types[3],
7032                                                             operand_types[3])))
7033                     {
7034                       specific_error = progress (i.error);
7035                       continue;
7036                     }
7037                   /* Fall through.  */
7038                 case 3:
7039                   overlap2 = operand_type_and (i.types[2], operand_types[2]);
7040                   if (!operand_type_match (overlap2, i.types[2])
7041                       || ((check_register & 5) == 5
7042                           && !operand_type_register_match (i.types[0],
7043                                                             operand_types[0],
7044                                                             i.types[2],
7045                                                             operand_types[2]))
7046                       || ((check_register & 6) == 6
7047                           && !operand_type_register_match (i.types[1],
7048                                                             operand_types[1],
7049                                                             i.types[2],
7050                                                             operand_types[2])))
7051                     {
7052                       specific_error = progress (i.error);
7053                       continue;
7054                     }
7055                   break;
7056                 }
7057             }
7058           /* Found either forward/reverse 2, 3 or 4 operand match here:
7059              slip through to break.  */
7060         }
7061
7062       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
7063       if (VEX_check_encoding (t))
7064         {
7065           specific_error = progress (i.error);
7066           continue;
7067         }
7068
7069       /* Check if vector operands are valid.  */
7070       if (check_VecOperands (t))
7071         {
7072           specific_error = progress (i.error);
7073           continue;
7074         }
7075
7076       /* We've found a match; break out of loop.  */
7077       break;
7078     }
7079
7080 #undef progress
7081
7082   if (t == current_templates->end)
7083     {
7084       /* We found no match.  */
7085       i.error = specific_error;
7086       return NULL;
7087     }
7088
7089   if (!quiet_warnings)
7090     {
7091       if (!intel_syntax
7092           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
7093         as_warn (_("indirect %s without `*'"), t->name);
7094
7095       if (t->opcode_modifier.isprefix
7096           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
7097         {
7098           /* Warn them that a data or address size prefix doesn't
7099              affect assembly of the next line of code.  */
7100           as_warn (_("stand-alone `%s' prefix"), t->name);
7101         }
7102     }
7103
7104   /* Copy the template we found.  */
7105   install_template (t);
7106
7107   if (addr_prefix_disp != -1)
7108     i.tm.operand_types[addr_prefix_disp]
7109       = operand_types[addr_prefix_disp];
7110
7111   switch (found_reverse_match)
7112     {
7113     case 0:
7114       break;
7115
7116     default:
7117       /* If we found a reverse match we must alter the opcode direction
7118          bit and clear/flip the regmem modifier one.  found_reverse_match
7119          holds bits to change (different for int & float insns).  */
7120
7121       i.tm.base_opcode ^= found_reverse_match;
7122
7123       /* Certain SIMD insns have their load forms specified in the opcode
7124          table, and hence we need to _set_ RegMem instead of clearing it.
7125          We need to avoid setting the bit though on insns like KMOVW.  */
7126       i.tm.opcode_modifier.regmem
7127         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
7128           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
7129           && !i.tm.opcode_modifier.regmem;
7130
7131       /* Fall through.  */
7132     case ~0:
7133       i.tm.operand_types[0] = operand_types[i.operands - 1];
7134       i.tm.operand_types[i.operands - 1] = operand_types[0];
7135       break;
7136
7137     case Opcode_VexW:
7138       /* Only the first two register operands need reversing, alongside
7139          flipping VEX.W.  */
7140       i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;
7141
7142       j = i.tm.operand_types[0].bitfield.imm8;
7143       i.tm.operand_types[j] = operand_types[j + 1];
7144       i.tm.operand_types[j + 1] = operand_types[j];
7145       break;
7146     }
7147
7148   return t;
7149 }
7150
7151 static int
7152 check_string (void)
7153 {
7154   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
7155   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
7156
7157   if (i.seg[op] != NULL && i.seg[op] != reg_es)
7158     {
7159       as_bad (_("`%s' operand %u must use `%ses' segment"),
7160               i.tm.name,
7161               intel_syntax ? i.tm.operands - es_op : es_op + 1,
7162               register_prefix);
7163       return 0;
7164     }
7165
7166   /* There's only ever one segment override allowed per instruction.
7167      This instruction possibly has a legal segment override on the
7168      second operand, so copy the segment to where non-string
7169      instructions store it, allowing common code.  */
7170   i.seg[op] = i.seg[1];
7171
7172   return 1;
7173 }
7174
7175 static int
7176 process_suffix (void)
7177 {
7178   bool is_crc32 = false, is_movx = false;
7179
7180   /* If matched instruction specifies an explicit instruction mnemonic
7181      suffix, use it.  */
7182   if (i.tm.opcode_modifier.size == SIZE16)
7183     i.suffix = WORD_MNEM_SUFFIX;
7184   else if (i.tm.opcode_modifier.size == SIZE32)
7185     i.suffix = LONG_MNEM_SUFFIX;
7186   else if (i.tm.opcode_modifier.size == SIZE64)
7187     i.suffix = QWORD_MNEM_SUFFIX;
7188   else if (i.reg_operands
7189            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
7190            && i.tm.opcode_modifier.operandconstraint != ADDR_PREFIX_OP_REG)
7191     {
7192       unsigned int numop = i.operands;
7193
7194       /* MOVSX/MOVZX */
7195       is_movx = (i.tm.opcode_modifier.opcodespace == SPACE_0F
7196                  && (i.tm.base_opcode | 8) == 0xbe)
7197                 || (i.tm.opcode_modifier.opcodespace == SPACE_BASE
7198                     && i.tm.base_opcode == 0x63
7199                     && i.tm.cpu_flags.bitfield.cpu64);
7200
7201       /* CRC32 */
7202       is_crc32 = (i.tm.base_opcode == 0xf0
7203                   && i.tm.opcode_modifier.opcodespace == SPACE_0F38
7204                   && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2);
7205
7206       /* movsx/movzx want only their source operand considered here, for the
7207          ambiguity checking below.  The suffix will be replaced afterwards
7208          to represent the destination (register).  */
7209       if (is_movx && (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63))
7210         --i.operands;
7211
7212       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
7213       if (is_crc32 && i.tm.operand_types[1].bitfield.qword)
7214         i.rex |= REX_W;
7215
7216       /* If there's no instruction mnemonic suffix we try to invent one
7217          based on GPR operands.  */
7218       if (!i.suffix)
7219         {
7220           /* We take i.suffix from the last register operand specified,
7221              Destination register type is more significant than source
7222              register type.  crc32 in SSE4.2 prefers source register
7223              type. */
7224           unsigned int op = is_crc32 ? 1 : i.operands;
7225
7226           while (op--)
7227             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
7228                 || i.tm.operand_types[op].bitfield.instance == Accum)
7229               {
7230                 if (i.types[op].bitfield.class != Reg)
7231                   continue;
7232                 if (i.types[op].bitfield.byte)
7233                   i.suffix = BYTE_MNEM_SUFFIX;
7234                 else if (i.types[op].bitfield.word)
7235                   i.suffix = WORD_MNEM_SUFFIX;
7236                 else if (i.types[op].bitfield.dword)
7237                   i.suffix = LONG_MNEM_SUFFIX;
7238                 else if (i.types[op].bitfield.qword)
7239                   i.suffix = QWORD_MNEM_SUFFIX;
7240                 else
7241                   continue;
7242                 break;
7243               }
7244
7245           /* As an exception, movsx/movzx silently default to a byte source
7246              in AT&T mode.  */
7247           if (is_movx && i.tm.opcode_modifier.w && !i.suffix && !intel_syntax)
7248             i.suffix = BYTE_MNEM_SUFFIX;
7249         }
7250       else if (i.suffix == BYTE_MNEM_SUFFIX)
7251         {
7252           if (!check_byte_reg ())
7253             return 0;
7254         }
7255       else if (i.suffix == LONG_MNEM_SUFFIX)
7256         {
7257           if (!check_long_reg ())
7258             return 0;
7259         }
7260       else if (i.suffix == QWORD_MNEM_SUFFIX)
7261         {
7262           if (!check_qword_reg ())
7263             return 0;
7264         }
7265       else if (i.suffix == WORD_MNEM_SUFFIX)
7266         {
7267           if (!check_word_reg ())
7268             return 0;
7269         }
7270       else if (intel_syntax
7271                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
7272         /* Do nothing if the instruction is going to ignore the prefix.  */
7273         ;
7274       else
7275         abort ();
7276
7277       /* Undo the movsx/movzx change done above.  */
7278       i.operands = numop;
7279     }
7280   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
7281            && !i.suffix)
7282     {
7283       i.suffix = stackop_size;
7284       if (stackop_size == LONG_MNEM_SUFFIX)
7285         {
7286           /* stackop_size is set to LONG_MNEM_SUFFIX for the
7287              .code16gcc directive to support 16-bit mode with
7288              32-bit address.  For IRET without a suffix, generate
7289              16-bit IRET (opcode 0xcf) to return from an interrupt
7290              handler.  */
7291           if (i.tm.base_opcode == 0xcf)
7292             {
7293               i.suffix = WORD_MNEM_SUFFIX;
7294               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
7295             }
7296           /* Warn about changed behavior for segment register push/pop.  */
7297           else if ((i.tm.base_opcode | 1) == 0x07)
7298             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
7299                      i.tm.name);
7300         }
7301     }
7302   else if (!i.suffix
7303            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
7304                || i.tm.opcode_modifier.jump == JUMP_BYTE
7305                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
7306                || (i.tm.opcode_modifier.opcodespace == SPACE_0F
7307                    && i.tm.base_opcode == 0x01 /* [ls][gi]dt */
7308                    && i.tm.extension_opcode <= 3)))
7309     {
7310       switch (flag_code)
7311         {
7312         case CODE_64BIT:
7313           if (!i.tm.opcode_modifier.no_qsuf)
7314             {
7315               if (i.tm.opcode_modifier.jump == JUMP_BYTE
7316                   || i.tm.opcode_modifier.no_lsuf)
7317                 i.suffix = QWORD_MNEM_SUFFIX;
7318               break;
7319             }
7320           /* Fall through.  */
7321         case CODE_32BIT:
7322           if (!i.tm.opcode_modifier.no_lsuf)
7323             i.suffix = LONG_MNEM_SUFFIX;
7324           break;
7325         case CODE_16BIT:
7326           if (!i.tm.opcode_modifier.no_wsuf)
7327             i.suffix = WORD_MNEM_SUFFIX;
7328           break;
7329         }
7330     }
7331
7332   if (!i.suffix
7333       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7334           /* Also cover lret/retf/iret in 64-bit mode.  */
7335           || (flag_code == CODE_64BIT
7336               && !i.tm.opcode_modifier.no_lsuf
7337               && !i.tm.opcode_modifier.no_qsuf))
7338       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7339       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
7340       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
7341       /* Accept FLDENV et al without suffix.  */
7342       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
7343     {
7344       unsigned int suffixes, evex = 0;
7345
7346       suffixes = !i.tm.opcode_modifier.no_bsuf;
7347       if (!i.tm.opcode_modifier.no_wsuf)
7348         suffixes |= 1 << 1;
7349       if (!i.tm.opcode_modifier.no_lsuf)
7350         suffixes |= 1 << 2;
7351       if (!i.tm.opcode_modifier.no_ssuf)
7352         suffixes |= 1 << 4;
7353       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
7354         suffixes |= 1 << 5;
7355
7356       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
7357          also suitable for AT&T syntax mode, it was requested that this be
7358          restricted to just Intel syntax.  */
7359       if (intel_syntax && is_any_vex_encoding (&i.tm)
7360           && !i.broadcast.type && !i.broadcast.bytes)
7361         {
7362           unsigned int op;
7363
7364           for (op = 0; op < i.tm.operands; ++op)
7365             {
7366               if (is_evex_encoding (&i.tm)
7367                   && !cpu_arch_flags.bitfield.cpuavx512vl)
7368                 {
7369                   if (i.tm.operand_types[op].bitfield.ymmword)
7370                     i.tm.operand_types[op].bitfield.xmmword = 0;
7371                   if (i.tm.operand_types[op].bitfield.zmmword)
7372                     i.tm.operand_types[op].bitfield.ymmword = 0;
7373                   if (!i.tm.opcode_modifier.evex
7374                       || i.tm.opcode_modifier.evex == EVEXDYN)
7375                     i.tm.opcode_modifier.evex = EVEX512;
7376                 }
7377
7378               if (i.tm.operand_types[op].bitfield.xmmword
7379                   + i.tm.operand_types[op].bitfield.ymmword
7380                   + i.tm.operand_types[op].bitfield.zmmword < 2)
7381                 continue;
7382
7383               /* Any properly sized operand disambiguates the insn.  */
7384               if (i.types[op].bitfield.xmmword
7385                   || i.types[op].bitfield.ymmword
7386                   || i.types[op].bitfield.zmmword)
7387                 {
7388                   suffixes &= ~(7 << 6);
7389                   evex = 0;
7390                   break;
7391                 }
7392
7393               if ((i.flags[op] & Operand_Mem)
7394                   && i.tm.operand_types[op].bitfield.unspecified)
7395                 {
7396                   if (i.tm.operand_types[op].bitfield.xmmword)
7397                     suffixes |= 1 << 6;
7398                   if (i.tm.operand_types[op].bitfield.ymmword)
7399                     suffixes |= 1 << 7;
7400                   if (i.tm.operand_types[op].bitfield.zmmword)
7401                     suffixes |= 1 << 8;
7402                   if (is_evex_encoding (&i.tm))
7403                     evex = EVEX512;
7404                 }
7405             }
7406         }
7407
7408       /* Are multiple suffixes / operand sizes allowed?  */
7409       if (suffixes & (suffixes - 1))
7410         {
7411           if (intel_syntax
7412               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7413                   || operand_check == check_error))
7414             {
7415               as_bad (_("ambiguous operand size for `%s'"), i.tm.name);
7416               return 0;
7417             }
7418           if (operand_check == check_error)
7419             {
7420               as_bad (_("no instruction mnemonic suffix given and "
7421                         "no register operands; can't size `%s'"), i.tm.name);
7422               return 0;
7423             }
7424           if (operand_check == check_warning)
7425             as_warn (_("%s; using default for `%s'"),
7426                        intel_syntax
7427                        ? _("ambiguous operand size")
7428                        : _("no instruction mnemonic suffix given and "
7429                            "no register operands"),
7430                        i.tm.name);
7431
7432           if (i.tm.opcode_modifier.floatmf)
7433             i.suffix = SHORT_MNEM_SUFFIX;
7434           else if (is_movx)
7435             /* handled below */;
7436           else if (evex)
7437             i.tm.opcode_modifier.evex = evex;
7438           else if (flag_code == CODE_16BIT)
7439             i.suffix = WORD_MNEM_SUFFIX;
7440           else if (!i.tm.opcode_modifier.no_lsuf)
7441             i.suffix = LONG_MNEM_SUFFIX;
7442           else
7443             i.suffix = QWORD_MNEM_SUFFIX;
7444         }
7445     }
7446
7447   if (is_movx)
7448     {
7449       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
7450          In AT&T syntax, if there is no suffix (warned about above), the default
7451          will be byte extension.  */
7452       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
7453         i.tm.base_opcode |= 1;
7454
7455       /* For further processing, the suffix should represent the destination
7456          (register).  This is already the case when one was used with
7457          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
7458          no suffix to begin with.  */
7459       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
7460         {
7461           if (i.types[1].bitfield.word)
7462             i.suffix = WORD_MNEM_SUFFIX;
7463           else if (i.types[1].bitfield.qword)
7464             i.suffix = QWORD_MNEM_SUFFIX;
7465           else
7466             i.suffix = LONG_MNEM_SUFFIX;
7467
7468           i.tm.opcode_modifier.w = 0;
7469         }
7470     }
7471
7472   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
7473     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
7474                    != (i.tm.operand_types[1].bitfield.class == Reg);
7475
7476   /* Change the opcode based on the operand size given by i.suffix.  */
7477   switch (i.suffix)
7478     {
7479     /* Size floating point instruction.  */
7480     case LONG_MNEM_SUFFIX:
7481       if (i.tm.opcode_modifier.floatmf)
7482         {
7483           i.tm.base_opcode ^= 4;
7484           break;
7485         }
7486     /* fall through */
7487     case WORD_MNEM_SUFFIX:
7488     case QWORD_MNEM_SUFFIX:
7489       /* It's not a byte, select word/dword operation.  */
7490       if (i.tm.opcode_modifier.w)
7491         {
7492           if (i.short_form)
7493             i.tm.base_opcode |= 8;
7494           else
7495             i.tm.base_opcode |= 1;
7496         }
7497     /* fall through */
7498     case SHORT_MNEM_SUFFIX:
7499       /* Now select between word & dword operations via the operand
7500          size prefix, except for instructions that will ignore this
7501          prefix anyway.  */
7502       if (i.suffix != QWORD_MNEM_SUFFIX
7503           && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7504           && !i.tm.opcode_modifier.floatmf
7505           && !is_any_vex_encoding (&i.tm)
7506           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
7507               || (flag_code == CODE_64BIT
7508                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
7509         {
7510           unsigned int prefix = DATA_PREFIX_OPCODE;
7511
7512           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
7513             prefix = ADDR_PREFIX_OPCODE;
7514
7515           if (!add_prefix (prefix))
7516             return 0;
7517         }
7518
7519       /* Set mode64 for an operand.  */
7520       if (i.suffix == QWORD_MNEM_SUFFIX
7521           && flag_code == CODE_64BIT
7522           && !i.tm.opcode_modifier.norex64
7523           && !i.tm.opcode_modifier.vexw
7524           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
7525              need rex64. */
7526           && ! (i.operands == 2
7527                 && i.tm.base_opcode == 0x90
7528                 && i.tm.opcode_modifier.opcodespace == SPACE_BASE
7529                 && i.types[0].bitfield.instance == Accum
7530                 && i.types[0].bitfield.qword
7531                 && i.types[1].bitfield.instance == Accum))
7532         i.rex |= REX_W;
7533
7534       break;
7535
7536     case 0:
7537       /* Select word/dword/qword operation with explicit data sizing prefix
7538          when there are no suitable register operands.  */
7539       if (i.tm.opcode_modifier.w
7540           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
7541           && (!i.reg_operands
7542               || (i.reg_operands == 1
7543                       /* ShiftCount */
7544                   && (i.tm.operand_types[0].bitfield.instance == RegC
7545                       /* InOutPortReg */
7546                       || i.tm.operand_types[0].bitfield.instance == RegD
7547                       || i.tm.operand_types[1].bitfield.instance == RegD
7548                       /* CRC32 */
7549                       || is_crc32))))
7550         i.tm.base_opcode |= 1;
7551       break;
7552     }
7553
7554   if (i.tm.opcode_modifier.operandconstraint == ADDR_PREFIX_OP_REG)
7555     {
7556       gas_assert (!i.suffix);
7557       gas_assert (i.reg_operands);
7558
7559       if (i.tm.operand_types[0].bitfield.instance == Accum
7560           || i.operands == 1)
7561         {
7562           /* The address size override prefix changes the size of the
7563              first operand.  */
7564           if (flag_code == CODE_64BIT
7565               && i.op[0].regs->reg_type.bitfield.word)
7566             {
7567               as_bad (_("16-bit addressing unavailable for `%s'"),
7568                       i.tm.name);
7569               return 0;
7570             }
7571
7572           if ((flag_code == CODE_32BIT
7573                ? i.op[0].regs->reg_type.bitfield.word
7574                : i.op[0].regs->reg_type.bitfield.dword)
7575               && !add_prefix (ADDR_PREFIX_OPCODE))
7576             return 0;
7577         }
7578       else
7579         {
7580           /* Check invalid register operand when the address size override
7581              prefix changes the size of register operands.  */
7582           unsigned int op;
7583           enum { need_word, need_dword, need_qword } need;
7584
7585           /* Check the register operand for the address size prefix if
7586              the memory operand has no real registers, like symbol, DISP
7587              or bogus (x32-only) symbol(%rip) when symbol(%eip) is meant.  */
7588           if (i.mem_operands == 1
7589               && i.reg_operands == 1
7590               && i.operands == 2
7591               && i.types[1].bitfield.class == Reg
7592               && (flag_code == CODE_32BIT
7593                   ? i.op[1].regs->reg_type.bitfield.word
7594                   : i.op[1].regs->reg_type.bitfield.dword)
7595               && ((i.base_reg == NULL && i.index_reg == NULL)
7596 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
7597                   || (x86_elf_abi == X86_64_X32_ABI
7598                       && i.base_reg
7599                       && i.base_reg->reg_num == RegIP
7600                       && i.base_reg->reg_type.bitfield.qword))
7601 #else
7602                   || 0)
7603 #endif
7604               && !add_prefix (ADDR_PREFIX_OPCODE))
7605             return 0;
7606
7607           if (flag_code == CODE_32BIT)
7608             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
7609           else if (i.prefix[ADDR_PREFIX])
7610             need = need_dword;
7611           else
7612             need = flag_code == CODE_64BIT ? need_qword : need_word;
7613
7614           for (op = 0; op < i.operands; op++)
7615             {
7616               if (i.types[op].bitfield.class != Reg)
7617                 continue;
7618
7619               switch (need)
7620                 {
7621                 case need_word:
7622                   if (i.op[op].regs->reg_type.bitfield.word)
7623                     continue;
7624                   break;
7625                 case need_dword:
7626                   if (i.op[op].regs->reg_type.bitfield.dword)
7627                     continue;
7628                   break;
7629                 case need_qword:
7630                   if (i.op[op].regs->reg_type.bitfield.qword)
7631                     continue;
7632                   break;
7633                 }
7634
7635               as_bad (_("invalid register operand size for `%s'"),
7636                       i.tm.name);
7637               return 0;
7638             }
7639         }
7640     }
7641
7642   return 1;
7643 }
7644
7645 static int
7646 check_byte_reg (void)
7647 {
7648   int op;
7649
7650   for (op = i.operands; --op >= 0;)
7651     {
7652       /* Skip non-register operands. */
7653       if (i.types[op].bitfield.class != Reg)
7654         continue;
7655
7656       /* If this is an eight bit register, it's OK.  If it's the 16 or
7657          32 bit version of an eight bit register, we will just use the
7658          low portion, and that's OK too.  */
7659       if (i.types[op].bitfield.byte)
7660         continue;
7661
7662       /* I/O port address operands are OK too.  */
7663       if (i.tm.operand_types[op].bitfield.instance == RegD
7664           && i.tm.operand_types[op].bitfield.word)
7665         continue;
7666
7667       /* crc32 only wants its source operand checked here.  */
7668       if (i.tm.base_opcode == 0xf0
7669           && i.tm.opcode_modifier.opcodespace == SPACE_0F38
7670           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2
7671           && op != 0)
7672         continue;
7673
7674       /* Any other register is bad.  */
7675       as_bad (_("`%s%s' not allowed with `%s%c'"),
7676               register_prefix, i.op[op].regs->reg_name,
7677               i.tm.name, i.suffix);
7678       return 0;
7679     }
7680   return 1;
7681 }
7682
7683 static int
7684 check_long_reg (void)
7685 {
7686   int op;
7687
7688   for (op = i.operands; --op >= 0;)
7689     /* Skip non-register operands. */
7690     if (i.types[op].bitfield.class != Reg)
7691       continue;
7692     /* Reject eight bit registers, except where the template requires
7693        them. (eg. movzb)  */
7694     else if (i.types[op].bitfield.byte
7695              && (i.tm.operand_types[op].bitfield.class == Reg
7696                  || i.tm.operand_types[op].bitfield.instance == Accum)
7697              && (i.tm.operand_types[op].bitfield.word
7698                  || i.tm.operand_types[op].bitfield.dword))
7699       {
7700         as_bad (_("`%s%s' not allowed with `%s%c'"),
7701                 register_prefix,
7702                 i.op[op].regs->reg_name,
7703                 i.tm.name,
7704                 i.suffix);
7705         return 0;
7706       }
7707     /* Error if the e prefix on a general reg is missing.  */
7708     else if (i.types[op].bitfield.word
7709              && (i.tm.operand_types[op].bitfield.class == Reg
7710                  || i.tm.operand_types[op].bitfield.instance == Accum)
7711              && i.tm.operand_types[op].bitfield.dword)
7712       {
7713         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7714                 register_prefix, i.op[op].regs->reg_name,
7715                 i.suffix);
7716         return 0;
7717       }
7718     /* Warn if the r prefix on a general reg is present.  */
7719     else if (i.types[op].bitfield.qword
7720              && (i.tm.operand_types[op].bitfield.class == Reg
7721                  || i.tm.operand_types[op].bitfield.instance == Accum)
7722              && i.tm.operand_types[op].bitfield.dword)
7723       {
7724         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7725                 register_prefix, i.op[op].regs->reg_name, i.suffix);
7726         return 0;
7727       }
7728   return 1;
7729 }
7730
7731 static int
7732 check_qword_reg (void)
7733 {
7734   int op;
7735
7736   for (op = i.operands; --op >= 0; )
7737     /* Skip non-register operands. */
7738     if (i.types[op].bitfield.class != Reg)
7739       continue;
7740     /* Reject eight bit registers, except where the template requires
7741        them. (eg. movzb)  */
7742     else if (i.types[op].bitfield.byte
7743              && (i.tm.operand_types[op].bitfield.class == Reg
7744                  || i.tm.operand_types[op].bitfield.instance == Accum)
7745              && (i.tm.operand_types[op].bitfield.word
7746                  || i.tm.operand_types[op].bitfield.dword))
7747       {
7748         as_bad (_("`%s%s' not allowed with `%s%c'"),
7749                 register_prefix,
7750                 i.op[op].regs->reg_name,
7751                 i.tm.name,
7752                 i.suffix);
7753         return 0;
7754       }
7755     /* Warn if the r prefix on a general reg is missing.  */
7756     else if ((i.types[op].bitfield.word
7757               || i.types[op].bitfield.dword)
7758              && (i.tm.operand_types[op].bitfield.class == Reg
7759                  || i.tm.operand_types[op].bitfield.instance == Accum)
7760              && i.tm.operand_types[op].bitfield.qword)
7761       {
7762         /* Prohibit these changes in the 64bit mode, since the
7763            lowering is more complicated.  */
7764         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7765                 register_prefix, i.op[op].regs->reg_name, i.suffix);
7766         return 0;
7767       }
7768   return 1;
7769 }
7770
7771 static int
7772 check_word_reg (void)
7773 {
7774   int op;
7775   for (op = i.operands; --op >= 0;)
7776     /* Skip non-register operands. */
7777     if (i.types[op].bitfield.class != Reg)
7778       continue;
7779     /* Reject eight bit registers, except where the template requires
7780        them. (eg. movzb)  */
7781     else if (i.types[op].bitfield.byte
7782              && (i.tm.operand_types[op].bitfield.class == Reg
7783                  || i.tm.operand_types[op].bitfield.instance == Accum)
7784              && (i.tm.operand_types[op].bitfield.word
7785                  || i.tm.operand_types[op].bitfield.dword))
7786       {
7787         as_bad (_("`%s%s' not allowed with `%s%c'"),
7788                 register_prefix,
7789                 i.op[op].regs->reg_name,
7790                 i.tm.name,
7791                 i.suffix);
7792         return 0;
7793       }
7794     /* Error if the e or r prefix on a general reg is present.  */
7795     else if ((i.types[op].bitfield.dword
7796                  || i.types[op].bitfield.qword)
7797              && (i.tm.operand_types[op].bitfield.class == Reg
7798                  || i.tm.operand_types[op].bitfield.instance == Accum)
7799              && i.tm.operand_types[op].bitfield.word)
7800       {
7801         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7802                 register_prefix, i.op[op].regs->reg_name,
7803                 i.suffix);
7804         return 0;
7805       }
7806   return 1;
7807 }
7808
7809 static int
7810 update_imm (unsigned int j)
7811 {
7812   i386_operand_type overlap = i.types[j];
7813   if (overlap.bitfield.imm8
7814       + overlap.bitfield.imm8s
7815       + overlap.bitfield.imm16
7816       + overlap.bitfield.imm32
7817       + overlap.bitfield.imm32s
7818       + overlap.bitfield.imm64 > 1)
7819     {
7820       static const i386_operand_type imm16 = { .bitfield = { .imm16 = 1 } };
7821       static const i386_operand_type imm32 = { .bitfield = { .imm32 = 1 } };
7822       static const i386_operand_type imm32s = { .bitfield = { .imm32s = 1 } };
7823       static const i386_operand_type imm16_32 = { .bitfield =
7824         { .imm16 = 1, .imm32 = 1 }
7825       };
7826       static const i386_operand_type imm16_32s =  { .bitfield =
7827         { .imm16 = 1, .imm32s = 1 }
7828       };
7829       static const i386_operand_type imm16_32_32s = { .bitfield =
7830         { .imm16 = 1, .imm32 = 1, .imm32s = 1 }
7831       };
7832
7833       if (i.suffix)
7834         {
7835           i386_operand_type temp;
7836
7837           operand_type_set (&temp, 0);
7838           if (i.suffix == BYTE_MNEM_SUFFIX)
7839             {
7840               temp.bitfield.imm8 = overlap.bitfield.imm8;
7841               temp.bitfield.imm8s = overlap.bitfield.imm8s;
7842             }
7843           else if (i.suffix == WORD_MNEM_SUFFIX)
7844             temp.bitfield.imm16 = overlap.bitfield.imm16;
7845           else if (i.suffix == QWORD_MNEM_SUFFIX)
7846             {
7847               temp.bitfield.imm64 = overlap.bitfield.imm64;
7848               temp.bitfield.imm32s = overlap.bitfield.imm32s;
7849             }
7850           else
7851             temp.bitfield.imm32 = overlap.bitfield.imm32;
7852           overlap = temp;
7853         }
7854       else if (operand_type_equal (&overlap, &imm16_32_32s)
7855                || operand_type_equal (&overlap, &imm16_32)
7856                || operand_type_equal (&overlap, &imm16_32s))
7857         {
7858           if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
7859             overlap = imm16;
7860           else
7861             overlap = imm32s;
7862         }
7863       else if (i.prefix[REX_PREFIX] & REX_W)
7864         overlap = operand_type_and (overlap, imm32s);
7865       else if (i.prefix[DATA_PREFIX])
7866         overlap = operand_type_and (overlap,
7867                                     flag_code != CODE_16BIT ? imm16 : imm32);
7868       if (overlap.bitfield.imm8
7869           + overlap.bitfield.imm8s
7870           + overlap.bitfield.imm16
7871           + overlap.bitfield.imm32
7872           + overlap.bitfield.imm32s
7873           + overlap.bitfield.imm64 != 1)
7874         {
7875           as_bad (_("no instruction mnemonic suffix given; "
7876                     "can't determine immediate size"));
7877           return 0;
7878         }
7879     }
7880   i.types[j] = overlap;
7881
7882   return 1;
7883 }
7884
7885 static int
7886 finalize_imm (void)
7887 {
7888   unsigned int j, n;
7889
7890   /* Update the first 2 immediate operands.  */
7891   n = i.operands > 2 ? 2 : i.operands;
7892   if (n)
7893     {
7894       for (j = 0; j < n; j++)
7895         if (update_imm (j) == 0)
7896           return 0;
7897
7898       /* The 3rd operand can't be immediate operand.  */
7899       gas_assert (operand_type_check (i.types[2], imm) == 0);
7900     }
7901
7902   return 1;
7903 }
7904
7905 static int
7906 process_operands (void)
7907 {
7908   /* Default segment register this instruction will use for memory
7909      accesses.  0 means unknown.  This is only for optimizing out
7910      unnecessary segment overrides.  */
7911   const reg_entry *default_seg = NULL;
7912
7913   if (i.tm.opcode_modifier.sse2avx)
7914     {
7915       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
7916          need converting.  */
7917       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
7918       i.prefix[REX_PREFIX] = 0;
7919       i.rex_encoding = 0;
7920     }
7921   /* ImmExt should be processed after SSE2AVX.  */
7922   else if (i.tm.opcode_modifier.immext)
7923     process_immext ();
7924
7925   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
7926     {
7927       static const i386_operand_type regxmm = {
7928         .bitfield = { .class = RegSIMD, .xmmword = 1 }
7929       };
7930       unsigned int dupl = i.operands;
7931       unsigned int dest = dupl - 1;
7932       unsigned int j;
7933
7934       /* The destination must be an xmm register.  */
7935       gas_assert (i.reg_operands
7936                   && MAX_OPERANDS > dupl
7937                   && operand_type_equal (&i.types[dest], &regxmm));
7938
7939       if (i.tm.operand_types[0].bitfield.instance == Accum
7940           && i.tm.operand_types[0].bitfield.xmmword)
7941         {
7942           if (i.tm.opcode_modifier.vexsources == VEX3SOURCES)
7943             {
7944               /* Keep xmm0 for instructions with VEX prefix and 3
7945                  sources.  */
7946               i.tm.operand_types[0].bitfield.instance = InstanceNone;
7947               i.tm.operand_types[0].bitfield.class = RegSIMD;
7948               goto duplicate;
7949             }
7950           else
7951             {
7952               /* We remove the first xmm0 and keep the number of
7953                  operands unchanged, which in fact duplicates the
7954                  destination.  */
7955               for (j = 1; j < i.operands; j++)
7956                 {
7957                   i.op[j - 1] = i.op[j];
7958                   i.types[j - 1] = i.types[j];
7959                   i.tm.operand_types[j - 1] = i.tm.operand_types[j];
7960                   i.flags[j - 1] = i.flags[j];
7961                 }
7962             }
7963         }
7964       else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_1ST_XMM0)
7965         {
7966           gas_assert ((MAX_OPERANDS - 1) > dupl
7967                       && (i.tm.opcode_modifier.vexsources
7968                           == VEX3SOURCES));
7969
7970           /* Add the implicit xmm0 for instructions with VEX prefix
7971              and 3 sources.  */
7972           for (j = i.operands; j > 0; j--)
7973             {
7974               i.op[j] = i.op[j - 1];
7975               i.types[j] = i.types[j - 1];
7976               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
7977               i.flags[j] = i.flags[j - 1];
7978             }
7979           i.op[0].regs
7980             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
7981           i.types[0] = regxmm;
7982           i.tm.operand_types[0] = regxmm;
7983
7984           i.operands += 2;
7985           i.reg_operands += 2;
7986           i.tm.operands += 2;
7987
7988           dupl++;
7989           dest++;
7990           i.op[dupl] = i.op[dest];
7991           i.types[dupl] = i.types[dest];
7992           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
7993           i.flags[dupl] = i.flags[dest];
7994         }
7995       else
7996         {
7997         duplicate:
7998           i.operands++;
7999           i.reg_operands++;
8000           i.tm.operands++;
8001
8002           i.op[dupl] = i.op[dest];
8003           i.types[dupl] = i.types[dest];
8004           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
8005           i.flags[dupl] = i.flags[dest];
8006         }
8007
8008        if (i.tm.opcode_modifier.immext)
8009          process_immext ();
8010     }
8011   else if (i.tm.operand_types[0].bitfield.instance == Accum
8012            && i.tm.operand_types[0].bitfield.xmmword)
8013     {
8014       unsigned int j;
8015
8016       for (j = 1; j < i.operands; j++)
8017         {
8018           i.op[j - 1] = i.op[j];
8019           i.types[j - 1] = i.types[j];
8020
8021           /* We need to adjust fields in i.tm since they are used by
8022              build_modrm_byte.  */
8023           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
8024
8025           i.flags[j - 1] = i.flags[j];
8026         }
8027
8028       i.operands--;
8029       i.reg_operands--;
8030       i.tm.operands--;
8031     }
8032   else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_QUAD_GROUP)
8033     {
8034       unsigned int regnum, first_reg_in_group, last_reg_in_group;
8035
8036       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
8037       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
8038       regnum = register_number (i.op[1].regs);
8039       first_reg_in_group = regnum & ~3;
8040       last_reg_in_group = first_reg_in_group + 3;
8041       if (regnum != first_reg_in_group)
8042         as_warn (_("source register `%s%s' implicitly denotes"
8043                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
8044                  register_prefix, i.op[1].regs->reg_name,
8045                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
8046                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
8047                  i.tm.name);
8048     }
8049   else if (i.tm.opcode_modifier.operandconstraint == REG_KLUDGE)
8050     {
8051       /* The imul $imm, %reg instruction is converted into
8052          imul $imm, %reg, %reg, and the clr %reg instruction
8053          is converted into xor %reg, %reg.  */
8054
8055       unsigned int first_reg_op;
8056
8057       if (operand_type_check (i.types[0], reg))
8058         first_reg_op = 0;
8059       else
8060         first_reg_op = 1;
8061       /* Pretend we saw the extra register operand.  */
8062       gas_assert (i.reg_operands == 1
8063                   && i.op[first_reg_op + 1].regs == 0);
8064       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
8065       i.types[first_reg_op + 1] = i.types[first_reg_op];
8066       i.operands++;
8067       i.reg_operands++;
8068     }
8069
8070   if (i.tm.opcode_modifier.modrm)
8071     {
8072       /* The opcode is completed (modulo i.tm.extension_opcode which
8073          must be put into the modrm byte).  Now, we make the modrm and
8074          index base bytes based on all the info we've collected.  */
8075
8076       default_seg = build_modrm_byte ();
8077     }
8078   else if (i.types[0].bitfield.class == SReg)
8079     {
8080       if (flag_code != CODE_64BIT
8081           ? i.tm.base_opcode == POP_SEG_SHORT
8082             && i.op[0].regs->reg_num == 1
8083           : (i.tm.base_opcode | 1) == (POP_SEG386_SHORT & 0xff)
8084             && i.op[0].regs->reg_num < 4)
8085         {
8086           as_bad (_("you can't `%s %s%s'"),
8087                   i.tm.name, register_prefix, i.op[0].regs->reg_name);
8088           return 0;
8089         }
8090       if (i.op[0].regs->reg_num > 3
8091           && i.tm.opcode_modifier.opcodespace == SPACE_BASE )
8092         {
8093           i.tm.base_opcode ^= (POP_SEG_SHORT ^ POP_SEG386_SHORT) & 0xff;
8094           i.tm.opcode_modifier.opcodespace = SPACE_0F;
8095         }
8096       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
8097     }
8098   else if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
8099            && (i.tm.base_opcode & ~3) == MOV_AX_DISP32)
8100     {
8101       default_seg = reg_ds;
8102     }
8103   else if (i.tm.opcode_modifier.isstring)
8104     {
8105       /* For the string instructions that allow a segment override
8106          on one of their operands, the default segment is ds.  */
8107       default_seg = reg_ds;
8108     }
8109   else if (i.short_form)
8110     {
8111       /* The register or float register operand is in operand
8112          0 or 1.  */
8113       const reg_entry *r = i.op[0].regs;
8114
8115       if (i.imm_operands
8116           || (r->reg_type.bitfield.instance == Accum && i.op[1].regs))
8117         r = i.op[1].regs;
8118       /* Register goes in low 3 bits of opcode.  */
8119       i.tm.base_opcode |= r->reg_num;
8120       if ((r->reg_flags & RegRex) != 0)
8121         i.rex |= REX_B;
8122       if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
8123         {
8124           /* Warn about some common errors, but press on regardless.  */
8125           if (i.operands != 2)
8126             {
8127               /* Extraneous `l' suffix on fp insn.  */
8128               as_warn (_("translating to `%s %s%s'"), i.tm.name,
8129                        register_prefix, i.op[0].regs->reg_name);
8130             }
8131           else if (i.op[0].regs->reg_type.bitfield.instance != Accum)
8132             {
8133               /* Reversed arguments on faddp or fmulp.  */
8134               as_warn (_("translating to `%s %s%s,%s%s'"), i.tm.name,
8135                        register_prefix, i.op[!intel_syntax].regs->reg_name,
8136                        register_prefix, i.op[intel_syntax].regs->reg_name);
8137             }
8138         }
8139     }
8140
8141   if ((i.seg[0] || i.prefix[SEG_PREFIX])
8142       && i.tm.base_opcode == 0x8d /* lea */
8143       && i.tm.opcode_modifier.opcodespace == SPACE_BASE
8144       && !is_any_vex_encoding(&i.tm))
8145     {
8146       if (!quiet_warnings)
8147         as_warn (_("segment override on `%s' is ineffectual"), i.tm.name);
8148       if (optimize)
8149         {
8150           i.seg[0] = NULL;
8151           i.prefix[SEG_PREFIX] = 0;
8152         }
8153     }
8154
8155   /* If a segment was explicitly specified, and the specified segment
8156      is neither the default nor the one already recorded from a prefix,
8157      use an opcode prefix to select it.  If we never figured out what
8158      the default segment is, then default_seg will be zero at this
8159      point, and the specified segment prefix will always be used.  */
8160   if (i.seg[0]
8161       && i.seg[0] != default_seg
8162       && i386_seg_prefixes[i.seg[0]->reg_num] != i.prefix[SEG_PREFIX])
8163     {
8164       if (!add_prefix (i386_seg_prefixes[i.seg[0]->reg_num]))
8165         return 0;
8166     }
8167   return 1;
8168 }
8169
8170 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
8171                                  bool do_sse2avx)
8172 {
8173   if (r->reg_flags & RegRex)
8174     {
8175       if (i.rex & rex_bit)
8176         as_bad (_("same type of prefix used twice"));
8177       i.rex |= rex_bit;
8178     }
8179   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
8180     {
8181       gas_assert (i.vex.register_specifier == r);
8182       i.vex.register_specifier += 8;
8183     }
8184
8185   if (r->reg_flags & RegVRex)
8186     i.vrex |= rex_bit;
8187 }
8188
8189 static const reg_entry *
8190 build_modrm_byte (void)
8191 {
8192   const reg_entry *default_seg = NULL;
8193   unsigned int source, dest;
8194   int vex_3_sources;
8195
8196   vex_3_sources = i.tm.opcode_modifier.vexsources == VEX3SOURCES;
8197   if (vex_3_sources)
8198     {
8199       unsigned int nds, reg_slot;
8200       expressionS *exp;
8201
8202       dest = i.operands - 1;
8203       nds = dest - 1;
8204
8205       /* There are 2 kinds of instructions:
8206          1. 5 operands: 4 register operands or 3 register operands
8207          plus 1 memory operand plus one Imm4 operand, VexXDS, and
8208          VexW0 or VexW1.  The destination must be either XMM, YMM or
8209          ZMM register.
8210          2. 4 operands: 4 register operands or 3 register operands
8211          plus 1 memory operand, with VexXDS.  */
8212       gas_assert ((i.reg_operands == 4
8213                    || (i.reg_operands == 3 && i.mem_operands == 1))
8214                   && i.tm.opcode_modifier.vexvvvv == VEXXDS
8215                   && i.tm.opcode_modifier.vexw
8216                   && i.tm.operand_types[dest].bitfield.class == RegSIMD);
8217
8218       /* If VexW1 is set, the first non-immediate operand is the source and
8219          the second non-immediate one is encoded in the immediate operand.  */
8220       if (i.tm.opcode_modifier.vexw == VEXW1)
8221         {
8222           source = i.imm_operands;
8223           reg_slot = i.imm_operands + 1;
8224         }
8225       else
8226         {
8227           source = i.imm_operands + 1;
8228           reg_slot = i.imm_operands;
8229         }
8230
8231       if (i.imm_operands == 0)
8232         {
8233           /* When there is no immediate operand, generate an 8bit
8234              immediate operand to encode the first operand.  */
8235           exp = &im_expressions[i.imm_operands++];
8236           i.op[i.operands].imms = exp;
8237           i.types[i.operands].bitfield.imm8 = 1;
8238           i.operands++;
8239
8240           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8241           exp->X_op = O_constant;
8242           exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
8243           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8244         }
8245       else
8246         {
8247           gas_assert (i.imm_operands == 1);
8248           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
8249           gas_assert (!i.tm.opcode_modifier.immext);
8250
8251           /* Turn on Imm8 again so that output_imm will generate it.  */
8252           i.types[0].bitfield.imm8 = 1;
8253
8254           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8255           i.op[0].imms->X_add_number
8256               |= register_number (i.op[reg_slot].regs) << 4;
8257           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8258         }
8259
8260       gas_assert (i.tm.operand_types[nds].bitfield.class == RegSIMD);
8261       i.vex.register_specifier = i.op[nds].regs;
8262     }
8263   else
8264     source = dest = 0;
8265
8266   /* i.reg_operands MUST be the number of real register operands;
8267      implicit registers do not count.  If there are 3 register
8268      operands, it must be a instruction with VexNDS.  For a
8269      instruction with VexNDD, the destination register is encoded
8270      in VEX prefix.  If there are 4 register operands, it must be
8271      a instruction with VEX prefix and 3 sources.  */
8272   if (i.mem_operands == 0
8273       && ((i.reg_operands == 2
8274            && i.tm.opcode_modifier.vexvvvv <= VEXXDS)
8275           || (i.reg_operands == 3
8276               && i.tm.opcode_modifier.vexvvvv == VEXXDS)
8277           || (i.reg_operands == 4 && vex_3_sources)))
8278     {
8279       switch (i.operands)
8280         {
8281         case 2:
8282           source = 0;
8283           break;
8284         case 3:
8285           /* When there are 3 operands, one of them may be immediate,
8286              which may be the first or the last operand.  Otherwise,
8287              the first operand must be shift count register (cl) or it
8288              is an instruction with VexNDS. */
8289           gas_assert (i.imm_operands == 1
8290                       || (i.imm_operands == 0
8291                           && (i.tm.opcode_modifier.vexvvvv == VEXXDS
8292                               || (i.types[0].bitfield.instance == RegC
8293                                   && i.types[0].bitfield.byte))));
8294           if (operand_type_check (i.types[0], imm)
8295               || (i.types[0].bitfield.instance == RegC
8296                   && i.types[0].bitfield.byte))
8297             source = 1;
8298           else
8299             source = 0;
8300           break;
8301         case 4:
8302           /* When there are 4 operands, the first two must be 8bit
8303              immediate operands. The source operand will be the 3rd
8304              one.
8305
8306              For instructions with VexNDS, if the first operand
8307              an imm8, the source operand is the 2nd one.  If the last
8308              operand is imm8, the source operand is the first one.  */
8309           gas_assert ((i.imm_operands == 2
8310                        && i.types[0].bitfield.imm8
8311                        && i.types[1].bitfield.imm8)
8312                       || (i.tm.opcode_modifier.vexvvvv == VEXXDS
8313                           && i.imm_operands == 1
8314                           && (i.types[0].bitfield.imm8
8315                               || i.types[i.operands - 1].bitfield.imm8)));
8316           if (i.imm_operands == 2)
8317             source = 2;
8318           else
8319             {
8320               if (i.types[0].bitfield.imm8)
8321                 source = 1;
8322               else
8323                 source = 0;
8324             }
8325           break;
8326         case 5:
8327           gas_assert (!is_evex_encoding (&i.tm));
8328           gas_assert (i.imm_operands == 1 && vex_3_sources);
8329           break;
8330         default:
8331           abort ();
8332         }
8333
8334       if (!vex_3_sources)
8335         {
8336           dest = source + 1;
8337
8338           if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8339             {
8340               /* For instructions with VexNDS, the register-only source
8341                  operand must be a 32/64bit integer, XMM, YMM, ZMM, or mask
8342                  register.  It is encoded in VEX prefix.  */
8343
8344               i386_operand_type op;
8345               unsigned int vvvv;
8346
8347               /* Swap two source operands if needed.  */
8348               if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES)
8349                 {
8350                   vvvv = source;
8351                   source = dest;
8352                 }
8353               else
8354                 vvvv = dest;
8355
8356               op = i.tm.operand_types[vvvv];
8357               if ((dest + 1) >= i.operands
8358                   || ((op.bitfield.class != Reg
8359                        || (!op.bitfield.dword && !op.bitfield.qword))
8360                       && op.bitfield.class != RegSIMD
8361                       && op.bitfield.class != RegMask))
8362                 abort ();
8363               i.vex.register_specifier = i.op[vvvv].regs;
8364               dest++;
8365             }
8366         }
8367
8368       i.rm.mode = 3;
8369       /* One of the register operands will be encoded in the i.rm.reg
8370          field, the other in the combined i.rm.mode and i.rm.regmem
8371          fields.  If no form of this instruction supports a memory
8372          destination operand, then we assume the source operand may
8373          sometimes be a memory operand and so we need to store the
8374          destination in the i.rm.reg field.  */
8375       if (!i.tm.opcode_modifier.regmem
8376           && operand_type_check (i.tm.operand_types[dest], anymem) == 0)
8377         {
8378           i.rm.reg = i.op[dest].regs->reg_num;
8379           i.rm.regmem = i.op[source].regs->reg_num;
8380           set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
8381           set_rex_vrex (i.op[source].regs, REX_B, false);
8382         }
8383       else
8384         {
8385           i.rm.reg = i.op[source].regs->reg_num;
8386           i.rm.regmem = i.op[dest].regs->reg_num;
8387           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
8388           set_rex_vrex (i.op[source].regs, REX_R, false);
8389         }
8390       if (flag_code != CODE_64BIT && (i.rex & REX_R))
8391         {
8392           if (i.types[!i.tm.opcode_modifier.regmem].bitfield.class != RegCR)
8393             abort ();
8394           i.rex &= ~REX_R;
8395           add_prefix (LOCK_PREFIX_OPCODE);
8396         }
8397     }
8398   else
8399     {                   /* If it's not 2 reg operands...  */
8400       unsigned int mem;
8401
8402       if (i.mem_operands)
8403         {
8404           unsigned int fake_zero_displacement = 0;
8405           unsigned int op;
8406
8407           for (op = 0; op < i.operands; op++)
8408             if (i.flags[op] & Operand_Mem)
8409               break;
8410           gas_assert (op < i.operands);
8411
8412           if (i.tm.opcode_modifier.sib)
8413             {
8414               /* The index register of VSIB shouldn't be RegIZ.  */
8415               if (i.tm.opcode_modifier.sib != SIBMEM
8416                   && i.index_reg->reg_num == RegIZ)
8417                 abort ();
8418
8419               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8420               if (!i.base_reg)
8421                 {
8422                   i.sib.base = NO_BASE_REGISTER;
8423                   i.sib.scale = i.log2_scale_factor;
8424                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8425                   i.types[op].bitfield.disp32 = 1;
8426                 }
8427
8428               /* Since the mandatory SIB always has index register, so
8429                  the code logic remains unchanged. The non-mandatory SIB
8430                  without index register is allowed and will be handled
8431                  later.  */
8432               if (i.index_reg)
8433                 {
8434                   if (i.index_reg->reg_num == RegIZ)
8435                     i.sib.index = NO_INDEX_REGISTER;
8436                   else
8437                     i.sib.index = i.index_reg->reg_num;
8438                   set_rex_vrex (i.index_reg, REX_X, false);
8439                 }
8440             }
8441
8442           default_seg = reg_ds;
8443
8444           if (i.base_reg == 0)
8445             {
8446               i.rm.mode = 0;
8447               if (!i.disp_operands)
8448                 fake_zero_displacement = 1;
8449               if (i.index_reg == 0)
8450                 {
8451                   /* Both check for VSIB and mandatory non-vector SIB. */
8452                   gas_assert (!i.tm.opcode_modifier.sib
8453                               || i.tm.opcode_modifier.sib == SIBMEM);
8454                   /* Operand is just <disp>  */
8455                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8456                   if (flag_code == CODE_64BIT)
8457                     {
8458                       /* 64bit mode overwrites the 32bit absolute
8459                          addressing by RIP relative addressing and
8460                          absolute addressing is encoded by one of the
8461                          redundant SIB forms.  */
8462                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8463                       i.sib.base = NO_BASE_REGISTER;
8464                       i.sib.index = NO_INDEX_REGISTER;
8465                       i.types[op].bitfield.disp32 = 1;
8466                     }
8467                   else if ((flag_code == CODE_16BIT)
8468                            ^ (i.prefix[ADDR_PREFIX] != 0))
8469                     {
8470                       i.rm.regmem = NO_BASE_REGISTER_16;
8471                       i.types[op].bitfield.disp16 = 1;
8472                     }
8473                   else
8474                     {
8475                       i.rm.regmem = NO_BASE_REGISTER;
8476                       i.types[op].bitfield.disp32 = 1;
8477                     }
8478                 }
8479               else if (!i.tm.opcode_modifier.sib)
8480                 {
8481                   /* !i.base_reg && i.index_reg  */
8482                   if (i.index_reg->reg_num == RegIZ)
8483                     i.sib.index = NO_INDEX_REGISTER;
8484                   else
8485                     i.sib.index = i.index_reg->reg_num;
8486                   i.sib.base = NO_BASE_REGISTER;
8487                   i.sib.scale = i.log2_scale_factor;
8488                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8489                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8490                   i.types[op].bitfield.disp32 = 1;
8491                   if ((i.index_reg->reg_flags & RegRex) != 0)
8492                     i.rex |= REX_X;
8493                 }
8494             }
8495           /* RIP addressing for 64bit mode.  */
8496           else if (i.base_reg->reg_num == RegIP)
8497             {
8498               gas_assert (!i.tm.opcode_modifier.sib);
8499               i.rm.regmem = NO_BASE_REGISTER;
8500               i.types[op].bitfield.disp8 = 0;
8501               i.types[op].bitfield.disp16 = 0;
8502               i.types[op].bitfield.disp32 = 1;
8503               i.types[op].bitfield.disp64 = 0;
8504               i.flags[op] |= Operand_PCrel;
8505               if (! i.disp_operands)
8506                 fake_zero_displacement = 1;
8507             }
8508           else if (i.base_reg->reg_type.bitfield.word)
8509             {
8510               gas_assert (!i.tm.opcode_modifier.sib);
8511               switch (i.base_reg->reg_num)
8512                 {
8513                 case 3: /* (%bx)  */
8514                   if (i.index_reg == 0)
8515                     i.rm.regmem = 7;
8516                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
8517                     i.rm.regmem = i.index_reg->reg_num - 6;
8518                   break;
8519                 case 5: /* (%bp)  */
8520                   default_seg = reg_ss;
8521                   if (i.index_reg == 0)
8522                     {
8523                       i.rm.regmem = 6;
8524                       if (operand_type_check (i.types[op], disp) == 0)
8525                         {
8526                           /* fake (%bp) into 0(%bp)  */
8527                           if (i.disp_encoding == disp_encoding_16bit)
8528                             i.types[op].bitfield.disp16 = 1;
8529                           else
8530                             i.types[op].bitfield.disp8 = 1;
8531                           fake_zero_displacement = 1;
8532                         }
8533                     }
8534                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
8535                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
8536                   break;
8537                 default: /* (%si) -> 4 or (%di) -> 5  */
8538                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
8539                 }
8540               if (!fake_zero_displacement
8541                   && !i.disp_operands
8542                   && i.disp_encoding)
8543                 {
8544                   fake_zero_displacement = 1;
8545                   if (i.disp_encoding == disp_encoding_8bit)
8546                     i.types[op].bitfield.disp8 = 1;
8547                   else
8548                     i.types[op].bitfield.disp16 = 1;
8549                 }
8550               i.rm.mode = mode_from_disp_size (i.types[op]);
8551             }
8552           else /* i.base_reg and 32/64 bit mode  */
8553             {
8554               if (operand_type_check (i.types[op], disp))
8555                 {
8556                   i.types[op].bitfield.disp16 = 0;
8557                   i.types[op].bitfield.disp64 = 0;
8558                   i.types[op].bitfield.disp32 = 1;
8559                 }
8560
8561               if (!i.tm.opcode_modifier.sib)
8562                 i.rm.regmem = i.base_reg->reg_num;
8563               if ((i.base_reg->reg_flags & RegRex) != 0)
8564                 i.rex |= REX_B;
8565               i.sib.base = i.base_reg->reg_num;
8566               /* x86-64 ignores REX prefix bit here to avoid decoder
8567                  complications.  */
8568               if (!(i.base_reg->reg_flags & RegRex)
8569                   && (i.base_reg->reg_num == EBP_REG_NUM
8570                    || i.base_reg->reg_num == ESP_REG_NUM))
8571                   default_seg = reg_ss;
8572               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
8573                 {
8574                   fake_zero_displacement = 1;
8575                   if (i.disp_encoding == disp_encoding_32bit)
8576                     i.types[op].bitfield.disp32 = 1;
8577                   else
8578                     i.types[op].bitfield.disp8 = 1;
8579                 }
8580               i.sib.scale = i.log2_scale_factor;
8581               if (i.index_reg == 0)
8582                 {
8583                   /* Only check for VSIB. */
8584                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
8585                               && i.tm.opcode_modifier.sib != VECSIB256
8586                               && i.tm.opcode_modifier.sib != VECSIB512);
8587
8588                   /* <disp>(%esp) becomes two byte modrm with no index
8589                      register.  We've already stored the code for esp
8590                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
8591                      Any base register besides %esp will not use the
8592                      extra modrm byte.  */
8593                   i.sib.index = NO_INDEX_REGISTER;
8594                 }
8595               else if (!i.tm.opcode_modifier.sib)
8596                 {
8597                   if (i.index_reg->reg_num == RegIZ)
8598                     i.sib.index = NO_INDEX_REGISTER;
8599                   else
8600                     i.sib.index = i.index_reg->reg_num;
8601                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8602                   if ((i.index_reg->reg_flags & RegRex) != 0)
8603                     i.rex |= REX_X;
8604                 }
8605
8606               if (i.disp_operands
8607                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
8608                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
8609                 i.rm.mode = 0;
8610               else
8611                 {
8612                   if (!fake_zero_displacement
8613                       && !i.disp_operands
8614                       && i.disp_encoding)
8615                     {
8616                       fake_zero_displacement = 1;
8617                       if (i.disp_encoding == disp_encoding_8bit)
8618                         i.types[op].bitfield.disp8 = 1;
8619                       else
8620                         i.types[op].bitfield.disp32 = 1;
8621                     }
8622                   i.rm.mode = mode_from_disp_size (i.types[op]);
8623                 }
8624             }
8625
8626           if (fake_zero_displacement)
8627             {
8628               /* Fakes a zero displacement assuming that i.types[op]
8629                  holds the correct displacement size.  */
8630               expressionS *exp;
8631
8632               gas_assert (i.op[op].disps == 0);
8633               exp = &disp_expressions[i.disp_operands++];
8634               i.op[op].disps = exp;
8635               exp->X_op = O_constant;
8636               exp->X_add_number = 0;
8637               exp->X_add_symbol = (symbolS *) 0;
8638               exp->X_op_symbol = (symbolS *) 0;
8639             }
8640
8641           mem = op;
8642         }
8643       else
8644         mem = ~0;
8645
8646       if (i.tm.opcode_modifier.vexsources == XOP2SOURCES)
8647         {
8648           if (operand_type_check (i.types[0], imm))
8649             i.vex.register_specifier = NULL;
8650           else
8651             {
8652               /* VEX.vvvv encodes one of the sources when the first
8653                  operand is not an immediate.  */
8654               if (i.tm.opcode_modifier.vexw == VEXW0)
8655                 i.vex.register_specifier = i.op[0].regs;
8656               else
8657                 i.vex.register_specifier = i.op[1].regs;
8658             }
8659
8660           /* Destination is a XMM register encoded in the ModRM.reg
8661              and VEX.R bit.  */
8662           i.rm.reg = i.op[2].regs->reg_num;
8663           if ((i.op[2].regs->reg_flags & RegRex) != 0)
8664             i.rex |= REX_R;
8665
8666           /* ModRM.rm and VEX.B encodes the other source.  */
8667           if (!i.mem_operands)
8668             {
8669               i.rm.mode = 3;
8670
8671               if (i.tm.opcode_modifier.vexw == VEXW0)
8672                 i.rm.regmem = i.op[1].regs->reg_num;
8673               else
8674                 i.rm.regmem = i.op[0].regs->reg_num;
8675
8676               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8677                 i.rex |= REX_B;
8678             }
8679         }
8680       else if (i.tm.opcode_modifier.vexvvvv == VEXLWP)
8681         {
8682           i.vex.register_specifier = i.op[2].regs;
8683           if (!i.mem_operands)
8684             {
8685               i.rm.mode = 3;
8686               i.rm.regmem = i.op[1].regs->reg_num;
8687               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8688                 i.rex |= REX_B;
8689             }
8690         }
8691       /* Fill in i.rm.reg or i.rm.regmem field with register operand
8692          (if any) based on i.tm.extension_opcode.  Again, we must be
8693          careful to make sure that segment/control/debug/test/MMX
8694          registers are coded into the i.rm.reg field.  */
8695       else if (i.reg_operands)
8696         {
8697           unsigned int op;
8698           unsigned int vex_reg = ~0;
8699
8700           for (op = 0; op < i.operands; op++)
8701             if (i.types[op].bitfield.class == Reg
8702                 || i.types[op].bitfield.class == RegBND
8703                 || i.types[op].bitfield.class == RegMask
8704                 || i.types[op].bitfield.class == SReg
8705                 || i.types[op].bitfield.class == RegCR
8706                 || i.types[op].bitfield.class == RegDR
8707                 || i.types[op].bitfield.class == RegTR
8708                 || i.types[op].bitfield.class == RegSIMD
8709                 || i.types[op].bitfield.class == RegMMX)
8710               break;
8711
8712           if (vex_3_sources)
8713             op = dest;
8714           else if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8715             {
8716               /* For instructions with VexNDS, the register-only
8717                  source operand is encoded in VEX prefix. */
8718               gas_assert (mem != (unsigned int) ~0);
8719
8720               if (op > mem || i.tm.cpu_flags.bitfield.cpucmpccxadd)
8721                 {
8722                   vex_reg = op++;
8723                   gas_assert (op < i.operands);
8724                 }
8725               else
8726                 {
8727                   /* Check register-only source operand when two source
8728                      operands are swapped.  */
8729                   if (!i.tm.operand_types[op].bitfield.baseindex
8730                       && i.tm.operand_types[op + 1].bitfield.baseindex)
8731                     {
8732                       vex_reg = op;
8733                       op += 2;
8734                       gas_assert (mem == (vex_reg + 1)
8735                                   && op < i.operands);
8736                     }
8737                   else
8738                     {
8739                       vex_reg = op + 1;
8740                       gas_assert (vex_reg < i.operands);
8741                     }
8742                 }
8743             }
8744           else if (i.tm.opcode_modifier.vexvvvv == VEXNDD)
8745             {
8746               /* For instructions with VexNDD, the register destination
8747                  is encoded in VEX prefix.  */
8748               if (i.mem_operands == 0)
8749                 {
8750                   /* There is no memory operand.  */
8751                   gas_assert ((op + 2) == i.operands);
8752                   vex_reg = op + 1;
8753                 }
8754               else
8755                 {
8756                   /* There are only 2 non-immediate operands.  */
8757                   gas_assert (op < i.imm_operands + 2
8758                               && i.operands == i.imm_operands + 2);
8759                   vex_reg = i.imm_operands + 1;
8760                 }
8761             }
8762           else
8763             gas_assert (op < i.operands);
8764
8765           if (vex_reg != (unsigned int) ~0)
8766             {
8767               i386_operand_type *type = &i.tm.operand_types[vex_reg];
8768
8769               if ((type->bitfield.class != Reg
8770                    || (!type->bitfield.dword && !type->bitfield.qword))
8771                   && type->bitfield.class != RegSIMD
8772                   && type->bitfield.class != RegMask)
8773                 abort ();
8774
8775               i.vex.register_specifier = i.op[vex_reg].regs;
8776             }
8777
8778           /* Don't set OP operand twice.  */
8779           if (vex_reg != op)
8780             {
8781               /* If there is an extension opcode to put here, the
8782                  register number must be put into the regmem field.  */
8783               if (i.tm.extension_opcode != None)
8784                 {
8785                   i.rm.regmem = i.op[op].regs->reg_num;
8786                   set_rex_vrex (i.op[op].regs, REX_B,
8787                                 i.tm.opcode_modifier.sse2avx);
8788                 }
8789               else
8790                 {
8791                   i.rm.reg = i.op[op].regs->reg_num;
8792                   set_rex_vrex (i.op[op].regs, REX_R,
8793                                 i.tm.opcode_modifier.sse2avx);
8794                 }
8795             }
8796
8797           /* Now, if no memory operand has set i.rm.mode = 0, 1, 2 we
8798              must set it to 3 to indicate this is a register operand
8799              in the regmem field.  */
8800           if (!i.mem_operands)
8801             i.rm.mode = 3;
8802         }
8803
8804       /* Fill in i.rm.reg field with extension opcode (if any).  */
8805       if (i.tm.extension_opcode != None)
8806         i.rm.reg = i.tm.extension_opcode;
8807     }
8808   return default_seg;
8809 }
8810
8811 static INLINE void
8812 frag_opcode_byte (unsigned char byte)
8813 {
8814   if (now_seg != absolute_section)
8815     FRAG_APPEND_1_CHAR (byte);
8816   else
8817     ++abs_section_offset;
8818 }
8819
8820 static unsigned int
8821 flip_code16 (unsigned int code16)
8822 {
8823   gas_assert (i.tm.operands == 1);
8824
8825   return !(i.prefix[REX_PREFIX] & REX_W)
8826          && (code16 ? i.tm.operand_types[0].bitfield.disp32
8827                     : i.tm.operand_types[0].bitfield.disp16)
8828          ? CODE16 : 0;
8829 }
8830
8831 static void
8832 output_branch (void)
8833 {
8834   char *p;
8835   int size;
8836   int code16;
8837   int prefix;
8838   relax_substateT subtype;
8839   symbolS *sym;
8840   offsetT off;
8841
8842   if (now_seg == absolute_section)
8843     {
8844       as_bad (_("relaxable branches not supported in absolute section"));
8845       return;
8846     }
8847
8848   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
8849   size = i.disp_encoding > disp_encoding_8bit ? BIG : SMALL;
8850
8851   prefix = 0;
8852   if (i.prefix[DATA_PREFIX] != 0)
8853     {
8854       prefix = 1;
8855       i.prefixes -= 1;
8856       code16 ^= flip_code16(code16);
8857     }
8858   /* Pentium4 branch hints.  */
8859   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8860       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8861     {
8862       prefix++;
8863       i.prefixes--;
8864     }
8865   if (i.prefix[REX_PREFIX] != 0)
8866     {
8867       prefix++;
8868       i.prefixes--;
8869     }
8870
8871   /* BND prefixed jump.  */
8872   if (i.prefix[BND_PREFIX] != 0)
8873     {
8874       prefix++;
8875       i.prefixes--;
8876     }
8877
8878   if (i.prefixes != 0)
8879     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
8880
8881   /* It's always a symbol;  End frag & setup for relax.
8882      Make sure there is enough room in this frag for the largest
8883      instruction we may generate in md_convert_frag.  This is 2
8884      bytes for the opcode and room for the prefix and largest
8885      displacement.  */
8886   frag_grow (prefix + 2 + 4);
8887   /* Prefix and 1 opcode byte go in fr_fix.  */
8888   p = frag_more (prefix + 1);
8889   if (i.prefix[DATA_PREFIX] != 0)
8890     *p++ = DATA_PREFIX_OPCODE;
8891   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
8892       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
8893     *p++ = i.prefix[SEG_PREFIX];
8894   if (i.prefix[BND_PREFIX] != 0)
8895     *p++ = BND_PREFIX_OPCODE;
8896   if (i.prefix[REX_PREFIX] != 0)
8897     *p++ = i.prefix[REX_PREFIX];
8898   *p = i.tm.base_opcode;
8899
8900   if ((unsigned char) *p == JUMP_PC_RELATIVE)
8901     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
8902   else if (cpu_arch_flags.bitfield.cpui386)
8903     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
8904   else
8905     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
8906   subtype |= code16;
8907
8908   sym = i.op[0].disps->X_add_symbol;
8909   off = i.op[0].disps->X_add_number;
8910
8911   if (i.op[0].disps->X_op != O_constant
8912       && i.op[0].disps->X_op != O_symbol)
8913     {
8914       /* Handle complex expressions.  */
8915       sym = make_expr_symbol (i.op[0].disps);
8916       off = 0;
8917     }
8918
8919   frag_now->tc_frag_data.code64 = flag_code == CODE_64BIT;
8920
8921   /* 1 possible extra opcode + 4 byte displacement go in var part.
8922      Pass reloc in fr_var.  */
8923   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
8924 }
8925
8926 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8927 /* Return TRUE iff PLT32 relocation should be used for branching to
8928    symbol S.  */
8929
8930 static bool
8931 need_plt32_p (symbolS *s)
8932 {
8933   /* PLT32 relocation is ELF only.  */
8934   if (!IS_ELF)
8935     return false;
8936
8937 #ifdef TE_SOLARIS
8938   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
8939      krtld support it.  */
8940   return false;
8941 #endif
8942
8943   /* Since there is no need to prepare for PLT branch on x86-64, we
8944      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
8945      be used as a marker for 32-bit PC-relative branches.  */
8946   if (!object_64bit)
8947     return false;
8948
8949   if (s == NULL)
8950     return false;
8951
8952   /* Weak or undefined symbol need PLT32 relocation.  */
8953   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
8954     return true;
8955
8956   /* Non-global symbol doesn't need PLT32 relocation.  */
8957   if (! S_IS_EXTERNAL (s))
8958     return false;
8959
8960   /* Other global symbols need PLT32 relocation.  NB: Symbol with
8961      non-default visibilities are treated as normal global symbol
8962      so that PLT32 relocation can be used as a marker for 32-bit
8963      PC-relative branches.  It is useful for linker relaxation.  */
8964   return true;
8965 }
8966 #endif
8967
8968 static void
8969 output_jump (void)
8970 {
8971   char *p;
8972   int size;
8973   fixS *fixP;
8974   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
8975
8976   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
8977     {
8978       /* This is a loop or jecxz type instruction.  */
8979       size = 1;
8980       if (i.prefix[ADDR_PREFIX] != 0)
8981         {
8982           frag_opcode_byte (ADDR_PREFIX_OPCODE);
8983           i.prefixes -= 1;
8984         }
8985       /* Pentium4 branch hints.  */
8986       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8987           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8988         {
8989           frag_opcode_byte (i.prefix[SEG_PREFIX]);
8990           i.prefixes--;
8991         }
8992     }
8993   else
8994     {
8995       int code16;
8996
8997       code16 = 0;
8998       if (flag_code == CODE_16BIT)
8999         code16 = CODE16;
9000
9001       if (i.prefix[DATA_PREFIX] != 0)
9002         {
9003           frag_opcode_byte (DATA_PREFIX_OPCODE);
9004           i.prefixes -= 1;
9005           code16 ^= flip_code16(code16);
9006         }
9007
9008       size = 4;
9009       if (code16)
9010         size = 2;
9011     }
9012
9013   /* BND prefixed jump.  */
9014   if (i.prefix[BND_PREFIX] != 0)
9015     {
9016       frag_opcode_byte (i.prefix[BND_PREFIX]);
9017       i.prefixes -= 1;
9018     }
9019
9020   if (i.prefix[REX_PREFIX] != 0)
9021     {
9022       frag_opcode_byte (i.prefix[REX_PREFIX]);
9023       i.prefixes -= 1;
9024     }
9025
9026   if (i.prefixes != 0)
9027     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
9028
9029   if (now_seg == absolute_section)
9030     {
9031       abs_section_offset += i.opcode_length + size;
9032       return;
9033     }
9034
9035   p = frag_more (i.opcode_length + size);
9036   switch (i.opcode_length)
9037     {
9038     case 2:
9039       *p++ = i.tm.base_opcode >> 8;
9040       /* Fall through.  */
9041     case 1:
9042       *p++ = i.tm.base_opcode;
9043       break;
9044     default:
9045       abort ();
9046     }
9047
9048 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9049   if (flag_code == CODE_64BIT && size == 4
9050       && jump_reloc == NO_RELOC && i.op[0].disps->X_add_number == 0
9051       && need_plt32_p (i.op[0].disps->X_add_symbol))
9052     jump_reloc = BFD_RELOC_X86_64_PLT32;
9053 #endif
9054
9055   jump_reloc = reloc (size, 1, 1, jump_reloc);
9056
9057   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9058                       i.op[0].disps, 1, jump_reloc);
9059
9060   /* All jumps handled here are signed, but don't unconditionally use a
9061      signed limit check for 32 and 16 bit jumps as we want to allow wrap
9062      around at 4G (outside of 64-bit mode) and 64k (except for XBEGIN)
9063      respectively.  */
9064   switch (size)
9065     {
9066     case 1:
9067       fixP->fx_signed = 1;
9068       break;
9069
9070     case 2:
9071       if (i.tm.base_opcode == 0xc7f8)
9072         fixP->fx_signed = 1;
9073       break;
9074
9075     case 4:
9076       if (flag_code == CODE_64BIT)
9077         fixP->fx_signed = 1;
9078       break;
9079     }
9080 }
9081
9082 static void
9083 output_interseg_jump (void)
9084 {
9085   char *p;
9086   int size;
9087   int prefix;
9088   int code16;
9089
9090   code16 = 0;
9091   if (flag_code == CODE_16BIT)
9092     code16 = CODE16;
9093
9094   prefix = 0;
9095   if (i.prefix[DATA_PREFIX] != 0)
9096     {
9097       prefix = 1;
9098       i.prefixes -= 1;
9099       code16 ^= CODE16;
9100     }
9101
9102   gas_assert (!i.prefix[REX_PREFIX]);
9103
9104   size = 4;
9105   if (code16)
9106     size = 2;
9107
9108   if (i.prefixes != 0)
9109     as_warn (_("skipping prefixes on `%s'"), i.tm.name);
9110
9111   if (now_seg == absolute_section)
9112     {
9113       abs_section_offset += prefix + 1 + 2 + size;
9114       return;
9115     }
9116
9117   /* 1 opcode; 2 segment; offset  */
9118   p = frag_more (prefix + 1 + 2 + size);
9119
9120   if (i.prefix[DATA_PREFIX] != 0)
9121     *p++ = DATA_PREFIX_OPCODE;
9122
9123   if (i.prefix[REX_PREFIX] != 0)
9124     *p++ = i.prefix[REX_PREFIX];
9125
9126   *p++ = i.tm.base_opcode;
9127   if (i.op[1].imms->X_op == O_constant)
9128     {
9129       offsetT n = i.op[1].imms->X_add_number;
9130
9131       if (size == 2
9132           && !fits_in_unsigned_word (n)
9133           && !fits_in_signed_word (n))
9134         {
9135           as_bad (_("16-bit jump out of range"));
9136           return;
9137         }
9138       md_number_to_chars (p, n, size);
9139     }
9140   else
9141     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9142                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
9143
9144   p += size;
9145   if (i.op[0].imms->X_op == O_constant)
9146     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
9147   else
9148     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
9149                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
9150 }
9151
9152 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9153 void
9154 x86_cleanup (void)
9155 {
9156   char *p;
9157   asection *seg = now_seg;
9158   subsegT subseg = now_subseg;
9159   asection *sec;
9160   unsigned int alignment, align_size_1;
9161   unsigned int isa_1_descsz, feature_2_descsz, descsz;
9162   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
9163   unsigned int padding;
9164
9165   if (!IS_ELF || !x86_used_note)
9166     return;
9167
9168   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
9169
9170   /* The .note.gnu.property section layout:
9171
9172      Field      Length          Contents
9173      ----       ----            ----
9174      n_namsz    4               4
9175      n_descsz   4               The note descriptor size
9176      n_type     4               NT_GNU_PROPERTY_TYPE_0
9177      n_name     4               "GNU"
9178      n_desc     n_descsz        The program property array
9179      ....       ....            ....
9180    */
9181
9182   /* Create the .note.gnu.property section.  */
9183   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
9184   bfd_set_section_flags (sec,
9185                          (SEC_ALLOC
9186                           | SEC_LOAD
9187                           | SEC_DATA
9188                           | SEC_HAS_CONTENTS
9189                           | SEC_READONLY));
9190
9191   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
9192     {
9193       align_size_1 = 7;
9194       alignment = 3;
9195     }
9196   else
9197     {
9198       align_size_1 = 3;
9199       alignment = 2;
9200     }
9201
9202   bfd_set_section_alignment (sec, alignment);
9203   elf_section_type (sec) = SHT_NOTE;
9204
9205   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
9206                                   + 4-byte data  */
9207   isa_1_descsz_raw = 4 + 4 + 4;
9208   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
9209   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
9210
9211   feature_2_descsz_raw = isa_1_descsz;
9212   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
9213                                       + 4-byte data  */
9214   feature_2_descsz_raw += 4 + 4 + 4;
9215   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
9216   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
9217                       & ~align_size_1);
9218
9219   descsz = feature_2_descsz;
9220   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
9221   p = frag_more (4 + 4 + 4 + 4 + descsz);
9222
9223   /* Write n_namsz.  */
9224   md_number_to_chars (p, (valueT) 4, 4);
9225
9226   /* Write n_descsz.  */
9227   md_number_to_chars (p + 4, (valueT) descsz, 4);
9228
9229   /* Write n_type.  */
9230   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
9231
9232   /* Write n_name.  */
9233   memcpy (p + 4 * 3, "GNU", 4);
9234
9235   /* Write 4-byte type.  */
9236   md_number_to_chars (p + 4 * 4,
9237                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
9238
9239   /* Write 4-byte data size.  */
9240   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
9241
9242   /* Write 4-byte data.  */
9243   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
9244
9245   /* Zero out paddings.  */
9246   padding = isa_1_descsz - isa_1_descsz_raw;
9247   if (padding)
9248     memset (p + 4 * 7, 0, padding);
9249
9250   /* Write 4-byte type.  */
9251   md_number_to_chars (p + isa_1_descsz + 4 * 4,
9252                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
9253
9254   /* Write 4-byte data size.  */
9255   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
9256
9257   /* Write 4-byte data.  */
9258   md_number_to_chars (p + isa_1_descsz + 4 * 6,
9259                       (valueT) x86_feature_2_used, 4);
9260
9261   /* Zero out paddings.  */
9262   padding = feature_2_descsz - feature_2_descsz_raw;
9263   if (padding)
9264     memset (p + isa_1_descsz + 4 * 7, 0, padding);
9265
9266   /* We probably can't restore the current segment, for there likely
9267      isn't one yet...  */
9268   if (seg && subseg)
9269     subseg_set (seg, subseg);
9270 }
9271
9272 bool
9273 x86_support_sframe_p (void)
9274 {
9275   /* At this time, SFrame unwind is supported for AMD64 ABI only.  */
9276   return (x86_elf_abi == X86_64_ABI);
9277 }
9278
9279 bool
9280 x86_sframe_ra_tracking_p (void)
9281 {
9282   /* In AMD64, return address is always stored on the stack at a fixed offset
9283      from the CFA (provided via x86_sframe_cfa_ra_offset ()).
9284      Do not track explicitly via an SFrame Frame Row Entry.  */
9285   return false;
9286 }
9287
9288 offsetT
9289 x86_sframe_cfa_ra_offset (void)
9290 {
9291   gas_assert (x86_elf_abi == X86_64_ABI);
9292   return (offsetT) -8;
9293 }
9294
9295 unsigned char
9296 x86_sframe_get_abi_arch (void)
9297 {
9298   unsigned char sframe_abi_arch = 0;
9299
9300   if (x86_support_sframe_p ())
9301     {
9302       gas_assert (!target_big_endian);
9303       sframe_abi_arch = SFRAME_ABI_AMD64_ENDIAN_LITTLE;
9304     }
9305
9306   return sframe_abi_arch;
9307 }
9308
9309 #endif
9310
9311 static unsigned int
9312 encoding_length (const fragS *start_frag, offsetT start_off,
9313                  const char *frag_now_ptr)
9314 {
9315   unsigned int len = 0;
9316
9317   if (start_frag != frag_now)
9318     {
9319       const fragS *fr = start_frag;
9320
9321       do {
9322         len += fr->fr_fix;
9323         fr = fr->fr_next;
9324       } while (fr && fr != frag_now);
9325     }
9326
9327   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
9328 }
9329
9330 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
9331    be macro-fused with conditional jumps.
9332    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
9333    or is one of the following format:
9334
9335     cmp m, imm
9336     add m, imm
9337     sub m, imm
9338    test m, imm
9339     and m, imm
9340     inc m
9341     dec m
9342
9343    it is unfusible.  */
9344
9345 static int
9346 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
9347 {
9348   /* No RIP address.  */
9349   if (i.base_reg && i.base_reg->reg_num == RegIP)
9350     return 0;
9351
9352   /* No opcodes outside of base encoding space.  */
9353   if (i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9354     return 0;
9355
9356   /* add, sub without add/sub m, imm.  */
9357   if (i.tm.base_opcode <= 5
9358       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
9359       || ((i.tm.base_opcode | 3) == 0x83
9360           && (i.tm.extension_opcode == 0x5
9361               || i.tm.extension_opcode == 0x0)))
9362     {
9363       *mf_cmp_p = mf_cmp_alu_cmp;
9364       return !(i.mem_operands && i.imm_operands);
9365     }
9366
9367   /* and without and m, imm.  */
9368   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
9369       || ((i.tm.base_opcode | 3) == 0x83
9370           && i.tm.extension_opcode == 0x4))
9371     {
9372       *mf_cmp_p = mf_cmp_test_and;
9373       return !(i.mem_operands && i.imm_operands);
9374     }
9375
9376   /* test without test m imm.  */
9377   if ((i.tm.base_opcode | 1) == 0x85
9378       || (i.tm.base_opcode | 1) == 0xa9
9379       || ((i.tm.base_opcode | 1) == 0xf7
9380           && i.tm.extension_opcode == 0))
9381     {
9382       *mf_cmp_p = mf_cmp_test_and;
9383       return !(i.mem_operands && i.imm_operands);
9384     }
9385
9386   /* cmp without cmp m, imm.  */
9387   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
9388       || ((i.tm.base_opcode | 3) == 0x83
9389           && (i.tm.extension_opcode == 0x7)))
9390     {
9391       *mf_cmp_p = mf_cmp_alu_cmp;
9392       return !(i.mem_operands && i.imm_operands);
9393     }
9394
9395   /* inc, dec without inc/dec m.   */
9396   if ((i.tm.cpu_flags.bitfield.cpuno64
9397        && (i.tm.base_opcode | 0xf) == 0x4f)
9398       || ((i.tm.base_opcode | 1) == 0xff
9399           && i.tm.extension_opcode <= 0x1))
9400     {
9401       *mf_cmp_p = mf_cmp_incdec;
9402       return !i.mem_operands;
9403     }
9404
9405   return 0;
9406 }
9407
9408 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
9409
9410 static int
9411 add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
9412 {
9413   /* NB: Don't work with COND_JUMP86 without i386.  */
9414   if (!align_branch_power
9415       || now_seg == absolute_section
9416       || !cpu_arch_flags.bitfield.cpui386
9417       || !(align_branch & align_branch_fused_bit))
9418     return 0;
9419
9420   if (maybe_fused_with_jcc_p (mf_cmp_p))
9421     {
9422       if (last_insn.kind == last_insn_other
9423           || last_insn.seg != now_seg)
9424         return 1;
9425       if (flag_debug)
9426         as_warn_where (last_insn.file, last_insn.line,
9427                        _("`%s` skips -malign-branch-boundary on `%s`"),
9428                        last_insn.name, i.tm.name);
9429     }
9430
9431   return 0;
9432 }
9433
9434 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
9435
9436 static int
9437 add_branch_prefix_frag_p (void)
9438 {
9439   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
9440      to PadLock instructions since they include prefixes in opcode.  */
9441   if (!align_branch_power
9442       || !align_branch_prefix_size
9443       || now_seg == absolute_section
9444       || i.tm.cpu_flags.bitfield.cpupadlock
9445       || !cpu_arch_flags.bitfield.cpui386)
9446     return 0;
9447
9448   /* Don't add prefix if it is a prefix or there is no operand in case
9449      that segment prefix is special.  */
9450   if (!i.operands || i.tm.opcode_modifier.isprefix)
9451     return 0;
9452
9453   if (last_insn.kind == last_insn_other
9454       || last_insn.seg != now_seg)
9455     return 1;
9456
9457   if (flag_debug)
9458     as_warn_where (last_insn.file, last_insn.line,
9459                    _("`%s` skips -malign-branch-boundary on `%s`"),
9460                    last_insn.name, i.tm.name);
9461
9462   return 0;
9463 }
9464
9465 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
9466
9467 static int
9468 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
9469                            enum mf_jcc_kind *mf_jcc_p)
9470 {
9471   int add_padding;
9472
9473   /* NB: Don't work with COND_JUMP86 without i386.  */
9474   if (!align_branch_power
9475       || now_seg == absolute_section
9476       || !cpu_arch_flags.bitfield.cpui386
9477       || i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9478     return 0;
9479
9480   add_padding = 0;
9481
9482   /* Check for jcc and direct jmp.  */
9483   if (i.tm.opcode_modifier.jump == JUMP)
9484     {
9485       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
9486         {
9487           *branch_p = align_branch_jmp;
9488           add_padding = align_branch & align_branch_jmp_bit;
9489         }
9490       else
9491         {
9492           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
9493              igore the lowest bit.  */
9494           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
9495           *branch_p = align_branch_jcc;
9496           if ((align_branch & align_branch_jcc_bit))
9497             add_padding = 1;
9498         }
9499     }
9500   else if ((i.tm.base_opcode | 1) == 0xc3)
9501     {
9502       /* Near ret.  */
9503       *branch_p = align_branch_ret;
9504       if ((align_branch & align_branch_ret_bit))
9505         add_padding = 1;
9506     }
9507   else
9508     {
9509       /* Check for indirect jmp, direct and indirect calls.  */
9510       if (i.tm.base_opcode == 0xe8)
9511         {
9512           /* Direct call.  */
9513           *branch_p = align_branch_call;
9514           if ((align_branch & align_branch_call_bit))
9515             add_padding = 1;
9516         }
9517       else if (i.tm.base_opcode == 0xff
9518                && (i.tm.extension_opcode == 2
9519                    || i.tm.extension_opcode == 4))
9520         {
9521           /* Indirect call and jmp.  */
9522           *branch_p = align_branch_indirect;
9523           if ((align_branch & align_branch_indirect_bit))
9524             add_padding = 1;
9525         }
9526
9527       if (add_padding
9528           && i.disp_operands
9529           && tls_get_addr
9530           && (i.op[0].disps->X_op == O_symbol
9531               || (i.op[0].disps->X_op == O_subtract
9532                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
9533         {
9534           symbolS *s = i.op[0].disps->X_add_symbol;
9535           /* No padding to call to global or undefined tls_get_addr.  */
9536           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
9537               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
9538             return 0;
9539         }
9540     }
9541
9542   if (add_padding
9543       && last_insn.kind != last_insn_other
9544       && last_insn.seg == now_seg)
9545     {
9546       if (flag_debug)
9547         as_warn_where (last_insn.file, last_insn.line,
9548                        _("`%s` skips -malign-branch-boundary on `%s`"),
9549                        last_insn.name, i.tm.name);
9550       return 0;
9551     }
9552
9553   return add_padding;
9554 }
9555
9556 static void
9557 output_insn (void)
9558 {
9559   fragS *insn_start_frag;
9560   offsetT insn_start_off;
9561   fragS *fragP = NULL;
9562   enum align_branch_kind branch = align_branch_none;
9563   /* The initializer is arbitrary just to avoid uninitialized error.
9564      it's actually either assigned in add_branch_padding_frag_p
9565      or never be used.  */
9566   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
9567
9568 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9569   if (IS_ELF && x86_used_note && now_seg != absolute_section)
9570     {
9571       if ((i.xstate & xstate_tmm) == xstate_tmm
9572           || i.tm.cpu_flags.bitfield.cpuamx_tile)
9573         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
9574
9575       if (i.tm.cpu_flags.bitfield.cpu8087
9576           || i.tm.cpu_flags.bitfield.cpu287
9577           || i.tm.cpu_flags.bitfield.cpu387
9578           || i.tm.cpu_flags.bitfield.cpu687
9579           || i.tm.cpu_flags.bitfield.cpufisttp)
9580         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
9581
9582       if ((i.xstate & xstate_mmx)
9583           || (i.tm.opcode_modifier.opcodespace == SPACE_0F
9584               && !is_any_vex_encoding (&i.tm)
9585               && (i.tm.base_opcode == 0x77 /* emms */
9586                   || i.tm.base_opcode == 0x0e /* femms */)))
9587         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
9588
9589       if (i.index_reg)
9590         {
9591           if (i.index_reg->reg_type.bitfield.zmmword)
9592             i.xstate |= xstate_zmm;
9593           else if (i.index_reg->reg_type.bitfield.ymmword)
9594             i.xstate |= xstate_ymm;
9595           else if (i.index_reg->reg_type.bitfield.xmmword)
9596             i.xstate |= xstate_xmm;
9597         }
9598
9599       /* vzeroall / vzeroupper */
9600       if (i.tm.base_opcode == 0x77 && i.tm.cpu_flags.bitfield.cpuavx)
9601         i.xstate |= xstate_ymm;
9602
9603       if ((i.xstate & xstate_xmm)
9604           /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
9605           || (i.tm.base_opcode == 0xae
9606               && (i.tm.cpu_flags.bitfield.cpusse
9607                   || i.tm.cpu_flags.bitfield.cpuavx))
9608           || i.tm.cpu_flags.bitfield.cpuwidekl
9609           || i.tm.cpu_flags.bitfield.cpukl)
9610         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
9611
9612       if ((i.xstate & xstate_ymm) == xstate_ymm)
9613         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
9614       if ((i.xstate & xstate_zmm) == xstate_zmm)
9615         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
9616       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
9617         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
9618       if (i.tm.cpu_flags.bitfield.cpufxsr)
9619         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
9620       if (i.tm.cpu_flags.bitfield.cpuxsave)
9621         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
9622       if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
9623         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
9624       if (i.tm.cpu_flags.bitfield.cpuxsavec)
9625         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
9626
9627       if (x86_feature_2_used
9628           || i.tm.cpu_flags.bitfield.cpucmov
9629           || i.tm.cpu_flags.bitfield.cpusyscall
9630           || (i.tm.opcode_modifier.opcodespace == SPACE_0F
9631               && i.tm.base_opcode == 0xc7
9632               && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
9633               && i.tm.extension_opcode == 1) /* cmpxchg8b */)
9634         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
9635       if (i.tm.cpu_flags.bitfield.cpusse3
9636           || i.tm.cpu_flags.bitfield.cpussse3
9637           || i.tm.cpu_flags.bitfield.cpusse4_1
9638           || i.tm.cpu_flags.bitfield.cpusse4_2
9639           || i.tm.cpu_flags.bitfield.cpucx16
9640           || i.tm.cpu_flags.bitfield.cpupopcnt
9641           /* LAHF-SAHF insns in 64-bit mode.  */
9642           || (flag_code == CODE_64BIT
9643               && (i.tm.base_opcode | 1) == 0x9f
9644               && i.tm.opcode_modifier.opcodespace == SPACE_BASE))
9645         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
9646       if (i.tm.cpu_flags.bitfield.cpuavx
9647           || i.tm.cpu_flags.bitfield.cpuavx2
9648           /* Any VEX encoded insns execpt for AVX512F, AVX512BW, AVX512DQ,
9649              XOP, FMA4, LPW, TBM, and AMX.  */
9650           || (i.tm.opcode_modifier.vex
9651               && !i.tm.cpu_flags.bitfield.cpuavx512f
9652               && !i.tm.cpu_flags.bitfield.cpuavx512bw
9653               && !i.tm.cpu_flags.bitfield.cpuavx512dq
9654               && !i.tm.cpu_flags.bitfield.cpuxop
9655               && !i.tm.cpu_flags.bitfield.cpufma4
9656               && !i.tm.cpu_flags.bitfield.cpulwp
9657               && !i.tm.cpu_flags.bitfield.cputbm
9658               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
9659           || i.tm.cpu_flags.bitfield.cpuf16c
9660           || i.tm.cpu_flags.bitfield.cpufma
9661           || i.tm.cpu_flags.bitfield.cpulzcnt
9662           || i.tm.cpu_flags.bitfield.cpumovbe
9663           || i.tm.cpu_flags.bitfield.cpuxsaves
9664           || (x86_feature_2_used
9665               & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
9666                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
9667                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
9668         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
9669       if (i.tm.cpu_flags.bitfield.cpuavx512f
9670           || i.tm.cpu_flags.bitfield.cpuavx512bw
9671           || i.tm.cpu_flags.bitfield.cpuavx512dq
9672           || i.tm.cpu_flags.bitfield.cpuavx512vl
9673           /* Any EVEX encoded insns except for AVX512ER, AVX512PF,
9674              AVX512-4FMAPS, and AVX512-4VNNIW.  */
9675           || (i.tm.opcode_modifier.evex
9676               && !i.tm.cpu_flags.bitfield.cpuavx512er
9677               && !i.tm.cpu_flags.bitfield.cpuavx512pf
9678               && !i.tm.cpu_flags.bitfield.cpuavx512_4fmaps
9679               && !i.tm.cpu_flags.bitfield.cpuavx512_4vnniw))
9680         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
9681     }
9682 #endif
9683
9684   /* Tie dwarf2 debug info to the address at the start of the insn.
9685      We can't do this after the insn has been output as the current
9686      frag may have been closed off.  eg. by frag_var.  */
9687   dwarf2_emit_insn (0);
9688
9689   insn_start_frag = frag_now;
9690   insn_start_off = frag_now_fix ();
9691
9692   if (add_branch_padding_frag_p (&branch, &mf_jcc))
9693     {
9694       char *p;
9695       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
9696       unsigned int max_branch_padding_size = 14;
9697
9698       /* Align section to boundary.  */
9699       record_alignment (now_seg, align_branch_power);
9700
9701       /* Make room for padding.  */
9702       frag_grow (max_branch_padding_size);
9703
9704       /* Start of the padding.  */
9705       p = frag_more (0);
9706
9707       fragP = frag_now;
9708
9709       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
9710                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
9711                 NULL, 0, p);
9712
9713       fragP->tc_frag_data.mf_type = mf_jcc;
9714       fragP->tc_frag_data.branch_type = branch;
9715       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
9716     }
9717
9718   if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT)
9719       && !pre_386_16bit_warned)
9720     {
9721       as_warn (_("use .code16 to ensure correct addressing mode"));
9722       pre_386_16bit_warned = true;
9723     }
9724
9725   /* Output jumps.  */
9726   if (i.tm.opcode_modifier.jump == JUMP)
9727     output_branch ();
9728   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
9729            || i.tm.opcode_modifier.jump == JUMP_DWORD)
9730     output_jump ();
9731   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
9732     output_interseg_jump ();
9733   else
9734     {
9735       /* Output normal instructions here.  */
9736       char *p;
9737       unsigned char *q;
9738       unsigned int j;
9739       enum mf_cmp_kind mf_cmp;
9740
9741       if (avoid_fence
9742           && (i.tm.base_opcode == 0xaee8
9743               || i.tm.base_opcode == 0xaef0
9744               || i.tm.base_opcode == 0xaef8))
9745         {
9746           /* Encode lfence, mfence, and sfence as
9747              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
9748           if (flag_code == CODE_16BIT)
9749             as_bad (_("Cannot convert `%s' in 16-bit mode"), i.tm.name);
9750           else if (omit_lock_prefix)
9751             as_bad (_("Cannot convert `%s' with `-momit-lock-prefix=yes' in effect"),
9752                     i.tm.name);
9753           else if (now_seg != absolute_section)
9754             {
9755               offsetT val = 0x240483f0ULL;
9756
9757               p = frag_more (5);
9758               md_number_to_chars (p, val, 5);
9759             }
9760           else
9761             abs_section_offset += 5;
9762           return;
9763         }
9764
9765       /* Some processors fail on LOCK prefix. This options makes
9766          assembler ignore LOCK prefix and serves as a workaround.  */
9767       if (omit_lock_prefix)
9768         {
9769           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
9770               && i.tm.opcode_modifier.isprefix)
9771             return;
9772           i.prefix[LOCK_PREFIX] = 0;
9773         }
9774
9775       if (branch)
9776         /* Skip if this is a branch.  */
9777         ;
9778       else if (add_fused_jcc_padding_frag_p (&mf_cmp))
9779         {
9780           /* Make room for padding.  */
9781           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
9782           p = frag_more (0);
9783
9784           fragP = frag_now;
9785
9786           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
9787                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
9788                     NULL, 0, p);
9789
9790           fragP->tc_frag_data.mf_type = mf_cmp;
9791           fragP->tc_frag_data.branch_type = align_branch_fused;
9792           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
9793         }
9794       else if (add_branch_prefix_frag_p ())
9795         {
9796           unsigned int max_prefix_size = align_branch_prefix_size;
9797
9798           /* Make room for padding.  */
9799           frag_grow (max_prefix_size);
9800           p = frag_more (0);
9801
9802           fragP = frag_now;
9803
9804           frag_var (rs_machine_dependent, max_prefix_size, 0,
9805                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
9806                     NULL, 0, p);
9807
9808           fragP->tc_frag_data.max_bytes = max_prefix_size;
9809         }
9810
9811       /* Since the VEX/EVEX prefix contains the implicit prefix, we
9812          don't need the explicit prefix.  */
9813       if (!is_any_vex_encoding (&i.tm))
9814         {
9815           switch (i.tm.opcode_modifier.opcodeprefix)
9816             {
9817             case PREFIX_0X66:
9818               add_prefix (0x66);
9819               break;
9820             case PREFIX_0XF2:
9821               add_prefix (0xf2);
9822               break;
9823             case PREFIX_0XF3:
9824               if (!i.tm.cpu_flags.bitfield.cpupadlock
9825                   || (i.prefix[REP_PREFIX] != 0xf3))
9826                 add_prefix (0xf3);
9827               break;
9828             case PREFIX_NONE:
9829               switch (i.opcode_length)
9830                 {
9831                 case 2:
9832                   break;
9833                 case 1:
9834                   /* Check for pseudo prefixes.  */
9835                   if (!i.tm.opcode_modifier.isprefix || i.tm.base_opcode)
9836                     break;
9837                   as_bad_where (insn_start_frag->fr_file,
9838                                 insn_start_frag->fr_line,
9839                                 _("pseudo prefix without instruction"));
9840                   return;
9841                 default:
9842                   abort ();
9843                 }
9844               break;
9845             default:
9846               abort ();
9847             }
9848
9849 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
9850           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
9851              R_X86_64_GOTTPOFF relocation so that linker can safely
9852              perform IE->LE optimization.  A dummy REX_OPCODE prefix
9853              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
9854              relocation for GDesc -> IE/LE optimization.  */
9855           if (x86_elf_abi == X86_64_X32_ABI
9856               && i.operands == 2
9857               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
9858                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
9859               && i.prefix[REX_PREFIX] == 0)
9860             add_prefix (REX_OPCODE);
9861 #endif
9862
9863           /* The prefix bytes.  */
9864           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
9865             if (*q)
9866               frag_opcode_byte (*q);
9867         }
9868       else
9869         {
9870           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
9871             if (*q)
9872               switch (j)
9873                 {
9874                 case SEG_PREFIX:
9875                 case ADDR_PREFIX:
9876                   frag_opcode_byte (*q);
9877                   break;
9878                 default:
9879                   /* There should be no other prefixes for instructions
9880                      with VEX prefix.  */
9881                   abort ();
9882                 }
9883
9884           /* For EVEX instructions i.vrex should become 0 after
9885              build_evex_prefix.  For VEX instructions upper 16 registers
9886              aren't available, so VREX should be 0.  */
9887           if (i.vrex)
9888             abort ();
9889           /* Now the VEX prefix.  */
9890           if (now_seg != absolute_section)
9891             {
9892               p = frag_more (i.vex.length);
9893               for (j = 0; j < i.vex.length; j++)
9894                 p[j] = i.vex.bytes[j];
9895             }
9896           else
9897             abs_section_offset += i.vex.length;
9898         }
9899
9900       /* Now the opcode; be careful about word order here!  */
9901       j = i.opcode_length;
9902       if (!i.vex.length)
9903         switch (i.tm.opcode_modifier.opcodespace)
9904           {
9905           case SPACE_BASE:
9906             break;
9907           case SPACE_0F:
9908             ++j;
9909             break;
9910           case SPACE_0F38:
9911           case SPACE_0F3A:
9912             j += 2;
9913             break;
9914           default:
9915             abort ();
9916           }
9917
9918       if (now_seg == absolute_section)
9919         abs_section_offset += j;
9920       else if (j == 1)
9921         {
9922           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
9923         }
9924       else
9925         {
9926           p = frag_more (j);
9927           if (!i.vex.length
9928               && i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9929             {
9930               *p++ = 0x0f;
9931               if (i.tm.opcode_modifier.opcodespace != SPACE_0F)
9932                 *p++ = i.tm.opcode_modifier.opcodespace == SPACE_0F38
9933                        ? 0x38 : 0x3a;
9934             }
9935
9936           switch (i.opcode_length)
9937             {
9938             case 2:
9939               /* Put out high byte first: can't use md_number_to_chars!  */
9940               *p++ = (i.tm.base_opcode >> 8) & 0xff;
9941               /* Fall through.  */
9942             case 1:
9943               *p = i.tm.base_opcode & 0xff;
9944               break;
9945             default:
9946               abort ();
9947               break;
9948             }
9949
9950         }
9951
9952       /* Now the modrm byte and sib byte (if present).  */
9953       if (i.tm.opcode_modifier.modrm)
9954         {
9955           frag_opcode_byte ((i.rm.regmem << 0)
9956                              | (i.rm.reg << 3)
9957                              | (i.rm.mode << 6));
9958           /* If i.rm.regmem == ESP (4)
9959              && i.rm.mode != (Register mode)
9960              && not 16 bit
9961              ==> need second modrm byte.  */
9962           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
9963               && i.rm.mode != 3
9964               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
9965             frag_opcode_byte ((i.sib.base << 0)
9966                               | (i.sib.index << 3)
9967                               | (i.sib.scale << 6));
9968         }
9969
9970       if (i.disp_operands)
9971         output_disp (insn_start_frag, insn_start_off);
9972
9973       if (i.imm_operands)
9974         output_imm (insn_start_frag, insn_start_off);
9975
9976       /*
9977        * frag_now_fix () returning plain abs_section_offset when we're in the
9978        * absolute section, and abs_section_offset not getting updated as data
9979        * gets added to the frag breaks the logic below.
9980        */
9981       if (now_seg != absolute_section)
9982         {
9983           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
9984           if (j > 15)
9985             as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
9986                      j);
9987           else if (fragP)
9988             {
9989               /* NB: Don't add prefix with GOTPC relocation since
9990                  output_disp() above depends on the fixed encoding
9991                  length.  Can't add prefix with TLS relocation since
9992                  it breaks TLS linker optimization.  */
9993               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
9994               /* Prefix count on the current instruction.  */
9995               unsigned int count = i.vex.length;
9996               unsigned int k;
9997               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
9998                 /* REX byte is encoded in VEX/EVEX prefix.  */
9999                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
10000                   count++;
10001
10002               /* Count prefixes for extended opcode maps.  */
10003               if (!i.vex.length)
10004                 switch (i.tm.opcode_modifier.opcodespace)
10005                   {
10006                   case SPACE_BASE:
10007                     break;
10008                   case SPACE_0F:
10009                     count++;
10010                     break;
10011                   case SPACE_0F38:
10012                   case SPACE_0F3A:
10013                     count += 2;
10014                     break;
10015                   default:
10016                     abort ();
10017                   }
10018
10019               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
10020                   == BRANCH_PREFIX)
10021                 {
10022                   /* Set the maximum prefix size in BRANCH_PREFIX
10023                      frag.  */
10024                   if (fragP->tc_frag_data.max_bytes > max)
10025                     fragP->tc_frag_data.max_bytes = max;
10026                   if (fragP->tc_frag_data.max_bytes > count)
10027                     fragP->tc_frag_data.max_bytes -= count;
10028                   else
10029                     fragP->tc_frag_data.max_bytes = 0;
10030                 }
10031               else
10032                 {
10033                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
10034                      frag.  */
10035                   unsigned int max_prefix_size;
10036                   if (align_branch_prefix_size > max)
10037                     max_prefix_size = max;
10038                   else
10039                     max_prefix_size = align_branch_prefix_size;
10040                   if (max_prefix_size > count)
10041                     fragP->tc_frag_data.max_prefix_length
10042                       = max_prefix_size - count;
10043                 }
10044
10045               /* Use existing segment prefix if possible.  Use CS
10046                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
10047                  segment prefix with ESP/EBP base register and use DS
10048                  segment prefix without ESP/EBP base register.  */
10049               if (i.prefix[SEG_PREFIX])
10050                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
10051               else if (flag_code == CODE_64BIT)
10052                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
10053               else if (i.base_reg
10054                        && (i.base_reg->reg_num == 4
10055                            || i.base_reg->reg_num == 5))
10056                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
10057               else
10058                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
10059             }
10060         }
10061     }
10062
10063   /* NB: Don't work with COND_JUMP86 without i386.  */
10064   if (align_branch_power
10065       && now_seg != absolute_section
10066       && cpu_arch_flags.bitfield.cpui386)
10067     {
10068       /* Terminate each frag so that we can add prefix and check for
10069          fused jcc.  */
10070       frag_wane (frag_now);
10071       frag_new (0);
10072     }
10073
10074 #ifdef DEBUG386
10075   if (flag_debug)
10076     {
10077       pi ("" /*line*/, &i);
10078     }
10079 #endif /* DEBUG386  */
10080 }
10081
10082 /* Return the size of the displacement operand N.  */
10083
10084 static int
10085 disp_size (unsigned int n)
10086 {
10087   int size = 4;
10088
10089   if (i.types[n].bitfield.disp64)
10090     size = 8;
10091   else if (i.types[n].bitfield.disp8)
10092     size = 1;
10093   else if (i.types[n].bitfield.disp16)
10094     size = 2;
10095   return size;
10096 }
10097
10098 /* Return the size of the immediate operand N.  */
10099
10100 static int
10101 imm_size (unsigned int n)
10102 {
10103   int size = 4;
10104   if (i.types[n].bitfield.imm64)
10105     size = 8;
10106   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
10107     size = 1;
10108   else if (i.types[n].bitfield.imm16)
10109     size = 2;
10110   return size;
10111 }
10112
10113 static void
10114 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
10115 {
10116   char *p;
10117   unsigned int n;
10118
10119   for (n = 0; n < i.operands; n++)
10120     {
10121       if (operand_type_check (i.types[n], disp))
10122         {
10123           int size = disp_size (n);
10124
10125           if (now_seg == absolute_section)
10126             abs_section_offset += size;
10127           else if (i.op[n].disps->X_op == O_constant)
10128             {
10129               offsetT val = i.op[n].disps->X_add_number;
10130
10131               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
10132                                      size);
10133               p = frag_more (size);
10134               md_number_to_chars (p, val, size);
10135             }
10136           else
10137             {
10138               enum bfd_reloc_code_real reloc_type;
10139               bool pcrel = (i.flags[n] & Operand_PCrel) != 0;
10140               bool sign = (flag_code == CODE_64BIT && size == 4
10141                            && (!want_disp32 (&i.tm)
10142                                || (i.tm.opcode_modifier.jump && !i.jumpabsolute
10143                                    && !i.types[n].bitfield.baseindex)))
10144                           || pcrel;
10145               fixS *fixP;
10146
10147               /* We can't have 8 bit displacement here.  */
10148               gas_assert (!i.types[n].bitfield.disp8);
10149
10150               /* The PC relative address is computed relative
10151                  to the instruction boundary, so in case immediate
10152                  fields follows, we need to adjust the value.  */
10153               if (pcrel && i.imm_operands)
10154                 {
10155                   unsigned int n1;
10156                   int sz = 0;
10157
10158                   for (n1 = 0; n1 < i.operands; n1++)
10159                     if (operand_type_check (i.types[n1], imm))
10160                       {
10161                         /* Only one immediate is allowed for PC
10162                            relative address.  */
10163                         gas_assert (sz == 0);
10164                         sz = imm_size (n1);
10165                         i.op[n].disps->X_add_number -= sz;
10166                       }
10167                   /* We should find the immediate.  */
10168                   gas_assert (sz != 0);
10169                 }
10170
10171               p = frag_more (size);
10172               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
10173               if (GOT_symbol
10174                   && GOT_symbol == i.op[n].disps->X_add_symbol
10175                   && (((reloc_type == BFD_RELOC_32
10176                         || reloc_type == BFD_RELOC_X86_64_32S
10177                         || (reloc_type == BFD_RELOC_64
10178                             && object_64bit))
10179                        && (i.op[n].disps->X_op == O_symbol
10180                            || (i.op[n].disps->X_op == O_add
10181                                && ((symbol_get_value_expression
10182                                     (i.op[n].disps->X_op_symbol)->X_op)
10183                                    == O_subtract))))
10184                       || reloc_type == BFD_RELOC_32_PCREL))
10185                 {
10186                   if (!object_64bit)
10187                     {
10188                       reloc_type = BFD_RELOC_386_GOTPC;
10189                       i.has_gotpc_tls_reloc = true;
10190                       i.op[n].disps->X_add_number +=
10191                         encoding_length (insn_start_frag, insn_start_off, p);
10192                     }
10193                   else if (reloc_type == BFD_RELOC_64)
10194                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10195                   else
10196                     /* Don't do the adjustment for x86-64, as there
10197                        the pcrel addressing is relative to the _next_
10198                        insn, and that is taken care of in other code.  */
10199                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10200                 }
10201               else if (align_branch_power)
10202                 {
10203                   switch (reloc_type)
10204                     {
10205                     case BFD_RELOC_386_TLS_GD:
10206                     case BFD_RELOC_386_TLS_LDM:
10207                     case BFD_RELOC_386_TLS_IE:
10208                     case BFD_RELOC_386_TLS_IE_32:
10209                     case BFD_RELOC_386_TLS_GOTIE:
10210                     case BFD_RELOC_386_TLS_GOTDESC:
10211                     case BFD_RELOC_386_TLS_DESC_CALL:
10212                     case BFD_RELOC_X86_64_TLSGD:
10213                     case BFD_RELOC_X86_64_TLSLD:
10214                     case BFD_RELOC_X86_64_GOTTPOFF:
10215                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
10216                     case BFD_RELOC_X86_64_TLSDESC_CALL:
10217                       i.has_gotpc_tls_reloc = true;
10218                     default:
10219                       break;
10220                     }
10221                 }
10222               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
10223                                   size, i.op[n].disps, pcrel,
10224                                   reloc_type);
10225
10226               if (flag_code == CODE_64BIT && size == 4 && pcrel
10227                   && !i.prefix[ADDR_PREFIX])
10228                 fixP->fx_signed = 1;
10229
10230               /* Check for "call/jmp *mem", "mov mem, %reg",
10231                  "test %reg, mem" and "binop mem, %reg" where binop
10232                  is one of adc, add, and, cmp, or, sbb, sub, xor
10233                  instructions without data prefix.  Always generate
10234                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
10235               if (i.prefix[DATA_PREFIX] == 0
10236                   && (generate_relax_relocations
10237                       || (!object_64bit
10238                           && i.rm.mode == 0
10239                           && i.rm.regmem == 5))
10240                   && (i.rm.mode == 2
10241                       || (i.rm.mode == 0 && i.rm.regmem == 5))
10242                   && i.tm.opcode_modifier.opcodespace == SPACE_BASE
10243                   && ((i.operands == 1
10244                        && i.tm.base_opcode == 0xff
10245                        && (i.rm.reg == 2 || i.rm.reg == 4))
10246                       || (i.operands == 2
10247                           && (i.tm.base_opcode == 0x8b
10248                               || i.tm.base_opcode == 0x85
10249                               || (i.tm.base_opcode & ~0x38) == 0x03))))
10250                 {
10251                   if (object_64bit)
10252                     {
10253                       fixP->fx_tcbit = i.rex != 0;
10254                       if (i.base_reg
10255                           && (i.base_reg->reg_num == RegIP))
10256                       fixP->fx_tcbit2 = 1;
10257                     }
10258                   else
10259                     fixP->fx_tcbit2 = 1;
10260                 }
10261             }
10262         }
10263     }
10264 }
10265
10266 static void
10267 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
10268 {
10269   char *p;
10270   unsigned int n;
10271
10272   for (n = 0; n < i.operands; n++)
10273     {
10274       if (operand_type_check (i.types[n], imm))
10275         {
10276           int size = imm_size (n);
10277
10278           if (now_seg == absolute_section)
10279             abs_section_offset += size;
10280           else if (i.op[n].imms->X_op == O_constant)
10281             {
10282               offsetT val;
10283
10284               val = offset_in_range (i.op[n].imms->X_add_number,
10285                                      size);
10286               p = frag_more (size);
10287               md_number_to_chars (p, val, size);
10288             }
10289           else
10290             {
10291               /* Not absolute_section.
10292                  Need a 32-bit fixup (don't support 8bit
10293                  non-absolute imms).  Try to support other
10294                  sizes ...  */
10295               enum bfd_reloc_code_real reloc_type;
10296               int sign;
10297
10298               if (i.types[n].bitfield.imm32s
10299                   && (i.suffix == QWORD_MNEM_SUFFIX
10300                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)))
10301                 sign = 1;
10302               else
10303                 sign = 0;
10304
10305               p = frag_more (size);
10306               reloc_type = reloc (size, 0, sign, i.reloc[n]);
10307
10308               /*   This is tough to explain.  We end up with this one if we
10309                * have operands that look like
10310                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
10311                * obtain the absolute address of the GOT, and it is strongly
10312                * preferable from a performance point of view to avoid using
10313                * a runtime relocation for this.  The actual sequence of
10314                * instructions often look something like:
10315                *
10316                *        call    .L66
10317                * .L66:
10318                *        popl    %ebx
10319                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
10320                *
10321                *   The call and pop essentially return the absolute address
10322                * of the label .L66 and store it in %ebx.  The linker itself
10323                * will ultimately change the first operand of the addl so
10324                * that %ebx points to the GOT, but to keep things simple, the
10325                * .o file must have this operand set so that it generates not
10326                * the absolute address of .L66, but the absolute address of
10327                * itself.  This allows the linker itself simply treat a GOTPC
10328                * relocation as asking for a pcrel offset to the GOT to be
10329                * added in, and the addend of the relocation is stored in the
10330                * operand field for the instruction itself.
10331                *
10332                *   Our job here is to fix the operand so that it would add
10333                * the correct offset so that %ebx would point to itself.  The
10334                * thing that is tricky is that .-.L66 will point to the
10335                * beginning of the instruction, so we need to further modify
10336                * the operand so that it will point to itself.  There are
10337                * other cases where you have something like:
10338                *
10339                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
10340                *
10341                * and here no correction would be required.  Internally in
10342                * the assembler we treat operands of this form as not being
10343                * pcrel since the '.' is explicitly mentioned, and I wonder
10344                * whether it would simplify matters to do it this way.  Who
10345                * knows.  In earlier versions of the PIC patches, the
10346                * pcrel_adjust field was used to store the correction, but
10347                * since the expression is not pcrel, I felt it would be
10348                * confusing to do it this way.  */
10349
10350               if ((reloc_type == BFD_RELOC_32
10351                    || reloc_type == BFD_RELOC_X86_64_32S
10352                    || reloc_type == BFD_RELOC_64)
10353                   && GOT_symbol
10354                   && GOT_symbol == i.op[n].imms->X_add_symbol
10355                   && (i.op[n].imms->X_op == O_symbol
10356                       || (i.op[n].imms->X_op == O_add
10357                           && ((symbol_get_value_expression
10358                                (i.op[n].imms->X_op_symbol)->X_op)
10359                               == O_subtract))))
10360                 {
10361                   if (!object_64bit)
10362                     reloc_type = BFD_RELOC_386_GOTPC;
10363                   else if (size == 4)
10364                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10365                   else if (size == 8)
10366                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10367                   i.has_gotpc_tls_reloc = true;
10368                   i.op[n].imms->X_add_number +=
10369                     encoding_length (insn_start_frag, insn_start_off, p);
10370                 }
10371               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
10372                            i.op[n].imms, 0, reloc_type);
10373             }
10374         }
10375     }
10376 }
10377 \f
10378 /* x86_cons_fix_new is called via the expression parsing code when a
10379    reloc is needed.  We use this hook to get the correct .got reloc.  */
10380 static int cons_sign = -1;
10381
10382 void
10383 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
10384                   expressionS *exp, bfd_reloc_code_real_type r)
10385 {
10386   r = reloc (len, 0, cons_sign, r);
10387
10388 #ifdef TE_PE
10389   if (exp->X_op == O_secrel)
10390     {
10391       exp->X_op = O_symbol;
10392       r = BFD_RELOC_32_SECREL;
10393     }
10394   else if (exp->X_op == O_secidx)
10395     r = BFD_RELOC_16_SECIDX;
10396 #endif
10397
10398   fix_new_exp (frag, off, len, exp, 0, r);
10399 }
10400
10401 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
10402    purpose of the `.dc.a' internal pseudo-op.  */
10403
10404 int
10405 x86_address_bytes (void)
10406 {
10407   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
10408     return 4;
10409   return stdoutput->arch_info->bits_per_address / 8;
10410 }
10411
10412 #if (!(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
10413      || defined (LEX_AT)) && !defined (TE_PE)
10414 # define lex_got(reloc, adjust, types) NULL
10415 #else
10416 /* Parse operands of the form
10417    <symbol>@GOTOFF+<nnn>
10418    and similar .plt or .got references.
10419
10420    If we find one, set up the correct relocation in RELOC and copy the
10421    input string, minus the `@GOTOFF' into a malloc'd buffer for
10422    parsing by the calling routine.  Return this buffer, and if ADJUST
10423    is non-null set it to the length of the string we removed from the
10424    input line.  Otherwise return NULL.  */
10425 static char *
10426 lex_got (enum bfd_reloc_code_real *rel,
10427          int *adjust,
10428          i386_operand_type *types)
10429 {
10430   /* Some of the relocations depend on the size of what field is to
10431      be relocated.  But in our callers i386_immediate and i386_displacement
10432      we don't yet know the operand size (this will be set by insn
10433      matching).  Hence we record the word32 relocation here,
10434      and adjust the reloc according to the real size in reloc().  */
10435   static const struct
10436   {
10437     const char *str;
10438     int len;
10439     const enum bfd_reloc_code_real rel[2];
10440     const i386_operand_type types64;
10441     bool need_GOT_symbol;
10442   }
10443     gotrel[] =
10444   {
10445
10446 #define OPERAND_TYPE_IMM32_32S_DISP32 { .bitfield = \
10447   { .imm32 = 1, .imm32s = 1, .disp32 = 1 } }
10448 #define OPERAND_TYPE_IMM32_32S_64_DISP32 { .bitfield = \
10449   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1 } }
10450 #define OPERAND_TYPE_IMM32_32S_64_DISP32_64 { .bitfield = \
10451   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1, .disp64 = 1 } }
10452 #define OPERAND_TYPE_IMM64_DISP64 { .bitfield = \
10453   { .imm64 = 1, .disp64 = 1 } }
10454
10455 #ifndef TE_PE
10456 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10457     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
10458                                         BFD_RELOC_SIZE32 },
10459       { .bitfield = { .imm32 = 1, .imm64 = 1 } }, false },
10460 #endif
10461     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
10462                                        BFD_RELOC_X86_64_PLTOFF64 },
10463       { .bitfield = { .imm64 = 1 } }, true },
10464     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
10465                                        BFD_RELOC_X86_64_PLT32    },
10466       OPERAND_TYPE_IMM32_32S_DISP32, false },
10467     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
10468                                        BFD_RELOC_X86_64_GOTPLT64 },
10469       OPERAND_TYPE_IMM64_DISP64, true },
10470     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
10471                                        BFD_RELOC_X86_64_GOTOFF64 },
10472       OPERAND_TYPE_IMM64_DISP64, true },
10473     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
10474                                        BFD_RELOC_X86_64_GOTPCREL },
10475       OPERAND_TYPE_IMM32_32S_DISP32, true },
10476     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
10477                                        BFD_RELOC_X86_64_TLSGD    },
10478       OPERAND_TYPE_IMM32_32S_DISP32, true },
10479     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
10480                                        _dummy_first_bfd_reloc_code_real },
10481       OPERAND_TYPE_NONE, true },
10482     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
10483                                        BFD_RELOC_X86_64_TLSLD    },
10484       OPERAND_TYPE_IMM32_32S_DISP32, true },
10485     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
10486                                        BFD_RELOC_X86_64_GOTTPOFF },
10487       OPERAND_TYPE_IMM32_32S_DISP32, true },
10488     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
10489                                        BFD_RELOC_X86_64_TPOFF32  },
10490       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10491     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
10492                                        _dummy_first_bfd_reloc_code_real },
10493       OPERAND_TYPE_NONE, true },
10494     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
10495                                        BFD_RELOC_X86_64_DTPOFF32 },
10496       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10497     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
10498                                        _dummy_first_bfd_reloc_code_real },
10499       OPERAND_TYPE_NONE, true },
10500     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
10501                                        _dummy_first_bfd_reloc_code_real },
10502       OPERAND_TYPE_NONE, true },
10503     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
10504                                        BFD_RELOC_X86_64_GOT32    },
10505       OPERAND_TYPE_IMM32_32S_64_DISP32, true },
10506     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
10507                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
10508       OPERAND_TYPE_IMM32_32S_DISP32, true },
10509     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
10510                                        BFD_RELOC_X86_64_TLSDESC_CALL },
10511       OPERAND_TYPE_IMM32_32S_DISP32, true },
10512 #else /* TE_PE */
10513     { STRING_COMMA_LEN ("SECREL32"), { BFD_RELOC_32_SECREL,
10514                                        BFD_RELOC_32_SECREL },
10515       OPERAND_TYPE_IMM32_32S_64_DISP32_64, false },
10516 #endif
10517
10518 #undef OPERAND_TYPE_IMM32_32S_DISP32
10519 #undef OPERAND_TYPE_IMM32_32S_64_DISP32
10520 #undef OPERAND_TYPE_IMM32_32S_64_DISP32_64
10521 #undef OPERAND_TYPE_IMM64_DISP64
10522
10523   };
10524   char *cp;
10525   unsigned int j;
10526
10527 #if defined (OBJ_MAYBE_ELF) && !defined (TE_PE)
10528   if (!IS_ELF)
10529     return NULL;
10530 #endif
10531
10532   for (cp = input_line_pointer; *cp != '@'; cp++)
10533     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
10534       return NULL;
10535
10536   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
10537     {
10538       int len = gotrel[j].len;
10539       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
10540         {
10541           if (gotrel[j].rel[object_64bit] != 0)
10542             {
10543               int first, second;
10544               char *tmpbuf, *past_reloc;
10545
10546               *rel = gotrel[j].rel[object_64bit];
10547
10548               if (types)
10549                 {
10550                   if (flag_code != CODE_64BIT)
10551                     {
10552                       types->bitfield.imm32 = 1;
10553                       types->bitfield.disp32 = 1;
10554                     }
10555                   else
10556                     *types = gotrel[j].types64;
10557                 }
10558
10559               if (gotrel[j].need_GOT_symbol && GOT_symbol == NULL)
10560                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
10561
10562               /* The length of the first part of our input line.  */
10563               first = cp - input_line_pointer;
10564
10565               /* The second part goes from after the reloc token until
10566                  (and including) an end_of_line char or comma.  */
10567               past_reloc = cp + 1 + len;
10568               cp = past_reloc;
10569               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
10570                 ++cp;
10571               second = cp + 1 - past_reloc;
10572
10573               /* Allocate and copy string.  The trailing NUL shouldn't
10574                  be necessary, but be safe.  */
10575               tmpbuf = XNEWVEC (char, first + second + 2);
10576               memcpy (tmpbuf, input_line_pointer, first);
10577               if (second != 0 && *past_reloc != ' ')
10578                 /* Replace the relocation token with ' ', so that
10579                    errors like foo@GOTOFF1 will be detected.  */
10580                 tmpbuf[first++] = ' ';
10581               else
10582                 /* Increment length by 1 if the relocation token is
10583                    removed.  */
10584                 len++;
10585               if (adjust)
10586                 *adjust = len;
10587               memcpy (tmpbuf + first, past_reloc, second);
10588               tmpbuf[first + second] = '\0';
10589               return tmpbuf;
10590             }
10591
10592           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10593                   gotrel[j].str, 1 << (5 + object_64bit));
10594           return NULL;
10595         }
10596     }
10597
10598   /* Might be a symbol version string.  Don't as_bad here.  */
10599   return NULL;
10600 }
10601 #endif
10602
10603 bfd_reloc_code_real_type
10604 x86_cons (expressionS *exp, int size)
10605 {
10606   bfd_reloc_code_real_type got_reloc = NO_RELOC;
10607
10608 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
10609       && !defined (LEX_AT)) \
10610     || defined (TE_PE)
10611   intel_syntax = -intel_syntax;
10612
10613   exp->X_md = 0;
10614   if (size == 4 || (object_64bit && size == 8))
10615     {
10616       /* Handle @GOTOFF and the like in an expression.  */
10617       char *save;
10618       char *gotfree_input_line;
10619       int adjust = 0;
10620
10621       save = input_line_pointer;
10622       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
10623       if (gotfree_input_line)
10624         input_line_pointer = gotfree_input_line;
10625
10626       expression (exp);
10627
10628       if (gotfree_input_line)
10629         {
10630           /* expression () has merrily parsed up to the end of line,
10631              or a comma - in the wrong buffer.  Transfer how far
10632              input_line_pointer has moved to the right buffer.  */
10633           input_line_pointer = (save
10634                                 + (input_line_pointer - gotfree_input_line)
10635                                 + adjust);
10636           free (gotfree_input_line);
10637           if (exp->X_op == O_constant
10638               || exp->X_op == O_absent
10639               || exp->X_op == O_illegal
10640               || exp->X_op == O_register
10641               || exp->X_op == O_big)
10642             {
10643               char c = *input_line_pointer;
10644               *input_line_pointer = 0;
10645               as_bad (_("missing or invalid expression `%s'"), save);
10646               *input_line_pointer = c;
10647             }
10648           else if ((got_reloc == BFD_RELOC_386_PLT32
10649                     || got_reloc == BFD_RELOC_X86_64_PLT32)
10650                    && exp->X_op != O_symbol)
10651             {
10652               char c = *input_line_pointer;
10653               *input_line_pointer = 0;
10654               as_bad (_("invalid PLT expression `%s'"), save);
10655               *input_line_pointer = c;
10656             }
10657         }
10658     }
10659   else
10660     expression (exp);
10661
10662   intel_syntax = -intel_syntax;
10663
10664   if (intel_syntax)
10665     i386_intel_simplify (exp);
10666 #else
10667   expression (exp);
10668 #endif
10669
10670   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
10671   if (size == 4 && exp->X_op == O_constant && !object_64bit)
10672     exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10673
10674   return got_reloc;
10675 }
10676
10677 static void
10678 signed_cons (int size)
10679 {
10680   if (object_64bit)
10681     cons_sign = 1;
10682   cons (size);
10683   cons_sign = -1;
10684 }
10685
10686 #ifdef TE_PE
10687 static void
10688 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
10689 {
10690   expressionS exp;
10691
10692   do
10693     {
10694       expression (&exp);
10695       if (exp.X_op == O_symbol)
10696         exp.X_op = O_secrel;
10697
10698       emit_expr (&exp, 4);
10699     }
10700   while (*input_line_pointer++ == ',');
10701
10702   input_line_pointer--;
10703   demand_empty_rest_of_line ();
10704 }
10705
10706 static void
10707 pe_directive_secidx (int dummy ATTRIBUTE_UNUSED)
10708 {
10709   expressionS exp;
10710
10711   do
10712     {
10713       expression (&exp);
10714       if (exp.X_op == O_symbol)
10715         exp.X_op = O_secidx;
10716
10717       emit_expr (&exp, 2);
10718     }
10719   while (*input_line_pointer++ == ',');
10720
10721   input_line_pointer--;
10722   demand_empty_rest_of_line ();
10723 }
10724 #endif
10725
10726 /* Handle Rounding Control / SAE specifiers.  */
10727
10728 static char *
10729 RC_SAE_specifier (const char *pstr)
10730 {
10731   unsigned int j;
10732
10733   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
10734     {
10735       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
10736         {
10737           if (i.rounding.type != rc_none)
10738             {
10739               as_bad (_("duplicated `{%s}'"), RC_NamesTable[j].name);
10740               return NULL;
10741             }
10742
10743           i.rounding.type = RC_NamesTable[j].type;
10744
10745           return (char *)(pstr + RC_NamesTable[j].len);
10746         }
10747     }
10748
10749   return NULL;
10750 }
10751
10752 /* Handle Vector operations.  */
10753
10754 static char *
10755 check_VecOperations (char *op_string)
10756 {
10757   const reg_entry *mask;
10758   const char *saved;
10759   char *end_op;
10760
10761   while (*op_string)
10762     {
10763       saved = op_string;
10764       if (*op_string == '{')
10765         {
10766           op_string++;
10767
10768           /* Check broadcasts.  */
10769           if (startswith (op_string, "1to"))
10770             {
10771               unsigned int bcst_type;
10772
10773               if (i.broadcast.type)
10774                 goto duplicated_vec_op;
10775
10776               op_string += 3;
10777               if (*op_string == '8')
10778                 bcst_type = 8;
10779               else if (*op_string == '4')
10780                 bcst_type = 4;
10781               else if (*op_string == '2')
10782                 bcst_type = 2;
10783               else if (*op_string == '1'
10784                        && *(op_string+1) == '6')
10785                 {
10786                   bcst_type = 16;
10787                   op_string++;
10788                 }
10789               else if (*op_string == '3'
10790                        && *(op_string+1) == '2')
10791                 {
10792                   bcst_type = 32;
10793                   op_string++;
10794                 }
10795               else
10796                 {
10797                   as_bad (_("Unsupported broadcast: `%s'"), saved);
10798                   return NULL;
10799                 }
10800               op_string++;
10801
10802               i.broadcast.type = bcst_type;
10803               i.broadcast.operand = this_operand;
10804             }
10805           /* Check masking operation.  */
10806           else if ((mask = parse_register (op_string, &end_op)) != NULL)
10807             {
10808               if (mask == &bad_reg)
10809                 return NULL;
10810
10811               /* k0 can't be used for write mask.  */
10812               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
10813                 {
10814                   as_bad (_("`%s%s' can't be used for write mask"),
10815                           register_prefix, mask->reg_name);
10816                   return NULL;
10817                 }
10818
10819               if (!i.mask.reg)
10820                 {
10821                   i.mask.reg = mask;
10822                   i.mask.operand = this_operand;
10823                 }
10824               else if (i.mask.reg->reg_num)
10825                 goto duplicated_vec_op;
10826               else
10827                 {
10828                   i.mask.reg = mask;
10829
10830                   /* Only "{z}" is allowed here.  No need to check
10831                      zeroing mask explicitly.  */
10832                   if (i.mask.operand != (unsigned int) this_operand)
10833                     {
10834                       as_bad (_("invalid write mask `%s'"), saved);
10835                       return NULL;
10836                     }
10837                 }
10838
10839               op_string = end_op;
10840             }
10841           /* Check zeroing-flag for masking operation.  */
10842           else if (*op_string == 'z')
10843             {
10844               if (!i.mask.reg)
10845                 {
10846                   i.mask.reg = reg_k0;
10847                   i.mask.zeroing = 1;
10848                   i.mask.operand = this_operand;
10849                 }
10850               else
10851                 {
10852                   if (i.mask.zeroing)
10853                     {
10854                     duplicated_vec_op:
10855                       as_bad (_("duplicated `%s'"), saved);
10856                       return NULL;
10857                     }
10858
10859                   i.mask.zeroing = 1;
10860
10861                   /* Only "{%k}" is allowed here.  No need to check mask
10862                      register explicitly.  */
10863                   if (i.mask.operand != (unsigned int) this_operand)
10864                     {
10865                       as_bad (_("invalid zeroing-masking `%s'"),
10866                               saved);
10867                       return NULL;
10868                     }
10869                 }
10870
10871               op_string++;
10872             }
10873           else if (intel_syntax
10874                    && (op_string = RC_SAE_specifier (op_string)) != NULL)
10875             i.rounding.modifier = true;
10876           else
10877             goto unknown_vec_op;
10878
10879           if (*op_string != '}')
10880             {
10881               as_bad (_("missing `}' in `%s'"), saved);
10882               return NULL;
10883             }
10884           op_string++;
10885
10886           /* Strip whitespace since the addition of pseudo prefixes
10887              changed how the scrubber treats '{'.  */
10888           if (is_space_char (*op_string))
10889             ++op_string;
10890
10891           continue;
10892         }
10893     unknown_vec_op:
10894       /* We don't know this one.  */
10895       as_bad (_("unknown vector operation: `%s'"), saved);
10896       return NULL;
10897     }
10898
10899   if (i.mask.reg && i.mask.zeroing && !i.mask.reg->reg_num)
10900     {
10901       as_bad (_("zeroing-masking only allowed with write mask"));
10902       return NULL;
10903     }
10904
10905   return op_string;
10906 }
10907
10908 static int
10909 i386_immediate (char *imm_start)
10910 {
10911   char *save_input_line_pointer;
10912   char *gotfree_input_line;
10913   segT exp_seg = 0;
10914   expressionS *exp;
10915   i386_operand_type types;
10916
10917   operand_type_set (&types, ~0);
10918
10919   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
10920     {
10921       as_bad (_("at most %d immediate operands are allowed"),
10922               MAX_IMMEDIATE_OPERANDS);
10923       return 0;
10924     }
10925
10926   exp = &im_expressions[i.imm_operands++];
10927   i.op[this_operand].imms = exp;
10928
10929   if (is_space_char (*imm_start))
10930     ++imm_start;
10931
10932   save_input_line_pointer = input_line_pointer;
10933   input_line_pointer = imm_start;
10934
10935   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10936   if (gotfree_input_line)
10937     input_line_pointer = gotfree_input_line;
10938
10939   exp_seg = expression (exp);
10940
10941   SKIP_WHITESPACE ();
10942   if (*input_line_pointer)
10943     as_bad (_("junk `%s' after expression"), input_line_pointer);
10944
10945   input_line_pointer = save_input_line_pointer;
10946   if (gotfree_input_line)
10947     {
10948       free (gotfree_input_line);
10949
10950       if (exp->X_op == O_constant)
10951         exp->X_op = O_illegal;
10952     }
10953
10954   if (exp_seg == reg_section)
10955     {
10956       as_bad (_("illegal immediate register operand %s"), imm_start);
10957       return 0;
10958     }
10959
10960   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
10961 }
10962
10963 static int
10964 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10965                          i386_operand_type types, const char *imm_start)
10966 {
10967   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
10968     {
10969       if (imm_start)
10970         as_bad (_("missing or invalid immediate expression `%s'"),
10971                 imm_start);
10972       return 0;
10973     }
10974   else if (exp->X_op == O_constant)
10975     {
10976       /* Size it properly later.  */
10977       i.types[this_operand].bitfield.imm64 = 1;
10978
10979       /* If not 64bit, sign/zero extend val, to account for wraparound
10980          when !BFD64.  */
10981       if (flag_code != CODE_64BIT)
10982         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10983     }
10984 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
10985   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
10986            && exp_seg != absolute_section
10987            && exp_seg != text_section
10988            && exp_seg != data_section
10989            && exp_seg != bss_section
10990            && exp_seg != undefined_section
10991            && !bfd_is_com_section (exp_seg))
10992     {
10993       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
10994       return 0;
10995     }
10996 #endif
10997   else
10998     {
10999       /* This is an address.  The size of the address will be
11000          determined later, depending on destination register,
11001          suffix, or the default for the section.  */
11002       i.types[this_operand].bitfield.imm8 = 1;
11003       i.types[this_operand].bitfield.imm16 = 1;
11004       i.types[this_operand].bitfield.imm32 = 1;
11005       i.types[this_operand].bitfield.imm32s = 1;
11006       i.types[this_operand].bitfield.imm64 = 1;
11007       i.types[this_operand] = operand_type_and (i.types[this_operand],
11008                                                 types);
11009     }
11010
11011   return 1;
11012 }
11013
11014 static char *
11015 i386_scale (char *scale)
11016 {
11017   offsetT val;
11018   char *save = input_line_pointer;
11019
11020   input_line_pointer = scale;
11021   val = get_absolute_expression ();
11022
11023   switch (val)
11024     {
11025     case 1:
11026       i.log2_scale_factor = 0;
11027       break;
11028     case 2:
11029       i.log2_scale_factor = 1;
11030       break;
11031     case 4:
11032       i.log2_scale_factor = 2;
11033       break;
11034     case 8:
11035       i.log2_scale_factor = 3;
11036       break;
11037     default:
11038       {
11039         char sep = *input_line_pointer;
11040
11041         *input_line_pointer = '\0';
11042         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
11043                 scale);
11044         *input_line_pointer = sep;
11045         input_line_pointer = save;
11046         return NULL;
11047       }
11048     }
11049   if (i.log2_scale_factor != 0 && i.index_reg == 0)
11050     {
11051       as_warn (_("scale factor of %d without an index register"),
11052                1 << i.log2_scale_factor);
11053       i.log2_scale_factor = 0;
11054     }
11055   scale = input_line_pointer;
11056   input_line_pointer = save;
11057   return scale;
11058 }
11059
11060 static int
11061 i386_displacement (char *disp_start, char *disp_end)
11062 {
11063   expressionS *exp;
11064   segT exp_seg = 0;
11065   char *save_input_line_pointer;
11066   char *gotfree_input_line;
11067   int override;
11068   i386_operand_type bigdisp, types = anydisp;
11069   int ret;
11070
11071   if (i.disp_operands == MAX_MEMORY_OPERANDS)
11072     {
11073       as_bad (_("at most %d displacement operands are allowed"),
11074               MAX_MEMORY_OPERANDS);
11075       return 0;
11076     }
11077
11078   operand_type_set (&bigdisp, 0);
11079   if (i.jumpabsolute
11080       || i.types[this_operand].bitfield.baseindex
11081       || (current_templates->start->opcode_modifier.jump != JUMP
11082           && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
11083     {
11084       i386_addressing_mode ();
11085       override = (i.prefix[ADDR_PREFIX] != 0);
11086       if (flag_code == CODE_64BIT)
11087         {
11088           bigdisp.bitfield.disp32 = 1;
11089           if (!override)
11090             bigdisp.bitfield.disp64 = 1;
11091         }
11092       else if ((flag_code == CODE_16BIT) ^ override)
11093           bigdisp.bitfield.disp16 = 1;
11094       else
11095           bigdisp.bitfield.disp32 = 1;
11096     }
11097   else
11098     {
11099       /* For PC-relative branches, the width of the displacement may be
11100          dependent upon data size, but is never dependent upon address size.
11101          Also make sure to not unintentionally match against a non-PC-relative
11102          branch template.  */
11103       static templates aux_templates;
11104       const insn_template *t = current_templates->start;
11105       bool has_intel64 = false;
11106
11107       aux_templates.start = t;
11108       while (++t < current_templates->end)
11109         {
11110           if (t->opcode_modifier.jump
11111               != current_templates->start->opcode_modifier.jump)
11112             break;
11113           if ((t->opcode_modifier.isa64 >= INTEL64))
11114             has_intel64 = true;
11115         }
11116       if (t < current_templates->end)
11117         {
11118           aux_templates.end = t;
11119           current_templates = &aux_templates;
11120         }
11121
11122       override = (i.prefix[DATA_PREFIX] != 0);
11123       if (flag_code == CODE_64BIT)
11124         {
11125           if ((override || i.suffix == WORD_MNEM_SUFFIX)
11126               && (!intel64 || !has_intel64))
11127             bigdisp.bitfield.disp16 = 1;
11128           else
11129             bigdisp.bitfield.disp32 = 1;
11130         }
11131       else
11132         {
11133           if (!override)
11134             override = (i.suffix == (flag_code != CODE_16BIT
11135                                      ? WORD_MNEM_SUFFIX
11136                                      : LONG_MNEM_SUFFIX));
11137           bigdisp.bitfield.disp32 = 1;
11138           if ((flag_code == CODE_16BIT) ^ override)
11139             {
11140               bigdisp.bitfield.disp32 = 0;
11141               bigdisp.bitfield.disp16 = 1;
11142             }
11143         }
11144     }
11145   i.types[this_operand] = operand_type_or (i.types[this_operand],
11146                                            bigdisp);
11147
11148   exp = &disp_expressions[i.disp_operands];
11149   i.op[this_operand].disps = exp;
11150   i.disp_operands++;
11151   save_input_line_pointer = input_line_pointer;
11152   input_line_pointer = disp_start;
11153   END_STRING_AND_SAVE (disp_end);
11154
11155 #ifndef GCC_ASM_O_HACK
11156 #define GCC_ASM_O_HACK 0
11157 #endif
11158 #if GCC_ASM_O_HACK
11159   END_STRING_AND_SAVE (disp_end + 1);
11160   if (i.types[this_operand].bitfield.baseIndex
11161       && displacement_string_end[-1] == '+')
11162     {
11163       /* This hack is to avoid a warning when using the "o"
11164          constraint within gcc asm statements.
11165          For instance:
11166
11167          #define _set_tssldt_desc(n,addr,limit,type) \
11168          __asm__ __volatile__ ( \
11169          "movw %w2,%0\n\t" \
11170          "movw %w1,2+%0\n\t" \
11171          "rorl $16,%1\n\t" \
11172          "movb %b1,4+%0\n\t" \
11173          "movb %4,5+%0\n\t" \
11174          "movb $0,6+%0\n\t" \
11175          "movb %h1,7+%0\n\t" \
11176          "rorl $16,%1" \
11177          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
11178
11179          This works great except that the output assembler ends
11180          up looking a bit weird if it turns out that there is
11181          no offset.  You end up producing code that looks like:
11182
11183          #APP
11184          movw $235,(%eax)
11185          movw %dx,2+(%eax)
11186          rorl $16,%edx
11187          movb %dl,4+(%eax)
11188          movb $137,5+(%eax)
11189          movb $0,6+(%eax)
11190          movb %dh,7+(%eax)
11191          rorl $16,%edx
11192          #NO_APP
11193
11194          So here we provide the missing zero.  */
11195
11196       *displacement_string_end = '0';
11197     }
11198 #endif
11199   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
11200   if (gotfree_input_line)
11201     input_line_pointer = gotfree_input_line;
11202
11203   exp_seg = expression (exp);
11204
11205   SKIP_WHITESPACE ();
11206   if (*input_line_pointer)
11207     as_bad (_("junk `%s' after expression"), input_line_pointer);
11208 #if GCC_ASM_O_HACK
11209   RESTORE_END_STRING (disp_end + 1);
11210 #endif
11211   input_line_pointer = save_input_line_pointer;
11212   if (gotfree_input_line)
11213     {
11214       free (gotfree_input_line);
11215
11216       if (exp->X_op == O_constant || exp->X_op == O_register)
11217         exp->X_op = O_illegal;
11218     }
11219
11220   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
11221
11222   RESTORE_END_STRING (disp_end);
11223
11224   return ret;
11225 }
11226
11227 static int
11228 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
11229                             i386_operand_type types, const char *disp_start)
11230 {
11231   int ret = 1;
11232
11233   /* We do this to make sure that the section symbol is in
11234      the symbol table.  We will ultimately change the relocation
11235      to be relative to the beginning of the section.  */
11236   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
11237       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
11238       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11239     {
11240       if (exp->X_op != O_symbol)
11241         goto inv_disp;
11242
11243       if (S_IS_LOCAL (exp->X_add_symbol)
11244           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
11245           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
11246         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
11247       exp->X_op = O_subtract;
11248       exp->X_op_symbol = GOT_symbol;
11249       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
11250         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
11251       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11252         i.reloc[this_operand] = BFD_RELOC_64;
11253       else
11254         i.reloc[this_operand] = BFD_RELOC_32;
11255     }
11256
11257   else if (exp->X_op == O_absent
11258            || exp->X_op == O_illegal
11259            || exp->X_op == O_big)
11260     {
11261     inv_disp:
11262       as_bad (_("missing or invalid displacement expression `%s'"),
11263               disp_start);
11264       ret = 0;
11265     }
11266
11267   else if (exp->X_op == O_constant)
11268     {
11269       /* Sizing gets taken care of by optimize_disp().
11270
11271          If not 64bit, sign/zero extend val, to account for wraparound
11272          when !BFD64.  */
11273       if (flag_code != CODE_64BIT)
11274         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
11275     }
11276
11277 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
11278   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
11279            && exp_seg != absolute_section
11280            && exp_seg != text_section
11281            && exp_seg != data_section
11282            && exp_seg != bss_section
11283            && exp_seg != undefined_section
11284            && !bfd_is_com_section (exp_seg))
11285     {
11286       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
11287       ret = 0;
11288     }
11289 #endif
11290
11291   else if (current_templates->start->opcode_modifier.jump == JUMP_BYTE)
11292     i.types[this_operand].bitfield.disp8 = 1;
11293
11294   /* Check if this is a displacement only operand.  */
11295   if (!i.types[this_operand].bitfield.baseindex)
11296     i.types[this_operand] =
11297       operand_type_or (operand_type_and_not (i.types[this_operand], anydisp),
11298                        operand_type_and (i.types[this_operand], types));
11299
11300   return ret;
11301 }
11302
11303 /* Return the active addressing mode, taking address override and
11304    registers forming the address into consideration.  Update the
11305    address override prefix if necessary.  */
11306
11307 static enum flag_code
11308 i386_addressing_mode (void)
11309 {
11310   enum flag_code addr_mode;
11311
11312   if (i.prefix[ADDR_PREFIX])
11313     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
11314   else if (flag_code == CODE_16BIT
11315            && current_templates->start->cpu_flags.bitfield.cpumpx
11316            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
11317               from md_assemble() by "is not a valid base/index expression"
11318               when there is a base and/or index.  */
11319            && !i.types[this_operand].bitfield.baseindex)
11320     {
11321       /* MPX insn memory operands with neither base nor index must be forced
11322          to use 32-bit addressing in 16-bit mode.  */
11323       addr_mode = CODE_32BIT;
11324       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11325       ++i.prefixes;
11326       gas_assert (!i.types[this_operand].bitfield.disp16);
11327       gas_assert (!i.types[this_operand].bitfield.disp32);
11328     }
11329   else
11330     {
11331       addr_mode = flag_code;
11332
11333 #if INFER_ADDR_PREFIX
11334       if (i.mem_operands == 0)
11335         {
11336           /* Infer address prefix from the first memory operand.  */
11337           const reg_entry *addr_reg = i.base_reg;
11338
11339           if (addr_reg == NULL)
11340             addr_reg = i.index_reg;
11341
11342           if (addr_reg)
11343             {
11344               if (addr_reg->reg_type.bitfield.dword)
11345                 addr_mode = CODE_32BIT;
11346               else if (flag_code != CODE_64BIT
11347                        && addr_reg->reg_type.bitfield.word)
11348                 addr_mode = CODE_16BIT;
11349
11350               if (addr_mode != flag_code)
11351                 {
11352                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11353                   i.prefixes += 1;
11354                   /* Change the size of any displacement too.  At most one
11355                      of Disp16 or Disp32 is set.
11356                      FIXME.  There doesn't seem to be any real need for
11357                      separate Disp16 and Disp32 flags.  The same goes for
11358                      Imm16 and Imm32.  Removing them would probably clean
11359                      up the code quite a lot.  */
11360                   if (flag_code != CODE_64BIT
11361                       && (i.types[this_operand].bitfield.disp16
11362                           || i.types[this_operand].bitfield.disp32))
11363                     {
11364                       static const i386_operand_type disp16_32 = {
11365                         .bitfield = { .disp16 = 1, .disp32 = 1 }
11366                       };
11367
11368                       i.types[this_operand]
11369                         = operand_type_xor (i.types[this_operand], disp16_32);
11370                     }
11371                 }
11372             }
11373         }
11374 #endif
11375     }
11376
11377   return addr_mode;
11378 }
11379
11380 /* Make sure the memory operand we've been dealt is valid.
11381    Return 1 on success, 0 on a failure.  */
11382
11383 static int
11384 i386_index_check (const char *operand_string)
11385 {
11386   const char *kind = "base/index";
11387   enum flag_code addr_mode = i386_addressing_mode ();
11388   const insn_template *t = current_templates->end - 1;
11389
11390   if (t->opcode_modifier.isstring)
11391     {
11392       /* Memory operands of string insns are special in that they only allow
11393          a single register (rDI, rSI, or rBX) as their memory address.  */
11394       const reg_entry *expected_reg;
11395       static const char *di_si[][2] =
11396         {
11397           { "esi", "edi" },
11398           { "si", "di" },
11399           { "rsi", "rdi" }
11400         };
11401       static const char *bx[] = { "ebx", "bx", "rbx" };
11402
11403       kind = "string address";
11404
11405       if (t->opcode_modifier.prefixok == PrefixRep)
11406         {
11407           int es_op = t->opcode_modifier.isstring - IS_STRING_ES_OP0;
11408           int op = 0;
11409
11410           if (!t->operand_types[0].bitfield.baseindex
11411               || ((!i.mem_operands != !intel_syntax)
11412                   && t->operand_types[1].bitfield.baseindex))
11413             op = 1;
11414           expected_reg
11415             = (const reg_entry *) str_hash_find (reg_hash,
11416                                                  di_si[addr_mode][op == es_op]);
11417         }
11418       else
11419         expected_reg
11420           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
11421
11422       if (i.base_reg != expected_reg
11423           || i.index_reg
11424           || operand_type_check (i.types[this_operand], disp))
11425         {
11426           /* The second memory operand must have the same size as
11427              the first one.  */
11428           if (i.mem_operands
11429               && i.base_reg
11430               && !((addr_mode == CODE_64BIT
11431                     && i.base_reg->reg_type.bitfield.qword)
11432                    || (addr_mode == CODE_32BIT
11433                        ? i.base_reg->reg_type.bitfield.dword
11434                        : i.base_reg->reg_type.bitfield.word)))
11435             goto bad_address;
11436
11437           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
11438                    operand_string,
11439                    intel_syntax ? '[' : '(',
11440                    register_prefix,
11441                    expected_reg->reg_name,
11442                    intel_syntax ? ']' : ')');
11443           return 1;
11444         }
11445       else
11446         return 1;
11447
11448     bad_address:
11449       as_bad (_("`%s' is not a valid %s expression"),
11450               operand_string, kind);
11451       return 0;
11452     }
11453   else
11454     {
11455       t = current_templates->start;
11456
11457       if (addr_mode != CODE_16BIT)
11458         {
11459           /* 32-bit/64-bit checks.  */
11460           if (i.disp_encoding == disp_encoding_16bit)
11461             {
11462             bad_disp:
11463               as_bad (_("invalid `%s' prefix"),
11464                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
11465               return 0;
11466             }
11467
11468           if ((i.base_reg
11469                && ((addr_mode == CODE_64BIT
11470                     ? !i.base_reg->reg_type.bitfield.qword
11471                     : !i.base_reg->reg_type.bitfield.dword)
11472                    || (i.index_reg && i.base_reg->reg_num == RegIP)
11473                    || i.base_reg->reg_num == RegIZ))
11474               || (i.index_reg
11475                   && !i.index_reg->reg_type.bitfield.xmmword
11476                   && !i.index_reg->reg_type.bitfield.ymmword
11477                   && !i.index_reg->reg_type.bitfield.zmmword
11478                   && ((addr_mode == CODE_64BIT
11479                        ? !i.index_reg->reg_type.bitfield.qword
11480                        : !i.index_reg->reg_type.bitfield.dword)
11481                       || !i.index_reg->reg_type.bitfield.baseindex)))
11482             goto bad_address;
11483
11484           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
11485           if ((t->opcode_modifier.opcodeprefix == PREFIX_0XF3
11486                && t->opcode_modifier.opcodespace == SPACE_0F
11487                && t->base_opcode == 0x1b)
11488               || (t->opcode_modifier.opcodeprefix == PREFIX_NONE
11489                   && t->opcode_modifier.opcodespace == SPACE_0F
11490                   && (t->base_opcode & ~1) == 0x1a)
11491               || t->opcode_modifier.sib == SIBMEM)
11492             {
11493               /* They cannot use RIP-relative addressing. */
11494               if (i.base_reg && i.base_reg->reg_num == RegIP)
11495                 {
11496                   as_bad (_("`%s' cannot be used here"), operand_string);
11497                   return 0;
11498                 }
11499
11500               /* bndldx and bndstx ignore their scale factor. */
11501               if (t->opcode_modifier.opcodeprefix == PREFIX_NONE
11502                   && t->opcode_modifier.opcodespace == SPACE_0F
11503                   && (t->base_opcode & ~1) == 0x1a
11504                   && i.log2_scale_factor)
11505                 as_warn (_("register scaling is being ignored here"));
11506             }
11507         }
11508       else
11509         {
11510           /* 16-bit checks.  */
11511           if (i.disp_encoding == disp_encoding_32bit)
11512             goto bad_disp;
11513
11514           if ((i.base_reg
11515                && (!i.base_reg->reg_type.bitfield.word
11516                    || !i.base_reg->reg_type.bitfield.baseindex))
11517               || (i.index_reg
11518                   && (!i.index_reg->reg_type.bitfield.word
11519                       || !i.index_reg->reg_type.bitfield.baseindex
11520                       || !(i.base_reg
11521                            && i.base_reg->reg_num < 6
11522                            && i.index_reg->reg_num >= 6
11523                            && i.log2_scale_factor == 0))))
11524             goto bad_address;
11525         }
11526     }
11527   return 1;
11528 }
11529
11530 /* Handle vector immediates.  */
11531
11532 static int
11533 RC_SAE_immediate (const char *imm_start)
11534 {
11535   const char *pstr = imm_start;
11536
11537   if (*pstr != '{')
11538     return 0;
11539
11540   pstr = RC_SAE_specifier (pstr + 1);
11541   if (pstr == NULL)
11542     return 0;
11543
11544   if (*pstr++ != '}')
11545     {
11546       as_bad (_("Missing '}': '%s'"), imm_start);
11547       return 0;
11548     }
11549   /* RC/SAE immediate string should contain nothing more.  */;
11550   if (*pstr != 0)
11551     {
11552       as_bad (_("Junk after '}': '%s'"), imm_start);
11553       return 0;
11554     }
11555
11556   /* Internally this doesn't count as an operand.  */
11557   --i.operands;
11558
11559   return 1;
11560 }
11561
11562 static INLINE bool starts_memory_operand (char c)
11563 {
11564   return ISDIGIT (c)
11565          || is_identifier_char (c)
11566          || strchr ("([\"+-!~", c);
11567 }
11568
11569 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
11570    on error.  */
11571
11572 static int
11573 i386_att_operand (char *operand_string)
11574 {
11575   const reg_entry *r;
11576   char *end_op;
11577   char *op_string = operand_string;
11578
11579   if (is_space_char (*op_string))
11580     ++op_string;
11581
11582   /* We check for an absolute prefix (differentiating,
11583      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
11584   if (*op_string == ABSOLUTE_PREFIX)
11585     {
11586       ++op_string;
11587       if (is_space_char (*op_string))
11588         ++op_string;
11589       i.jumpabsolute = true;
11590     }
11591
11592   /* Check if operand is a register.  */
11593   if ((r = parse_register (op_string, &end_op)) != NULL)
11594     {
11595       i386_operand_type temp;
11596
11597       if (r == &bad_reg)
11598         return 0;
11599
11600       /* Check for a segment override by searching for ':' after a
11601          segment register.  */
11602       op_string = end_op;
11603       if (is_space_char (*op_string))
11604         ++op_string;
11605       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
11606         {
11607           i.seg[i.mem_operands] = r;
11608
11609           /* Skip the ':' and whitespace.  */
11610           ++op_string;
11611           if (is_space_char (*op_string))
11612             ++op_string;
11613
11614           /* Handle case of %es:*foo.  */
11615           if (!i.jumpabsolute && *op_string == ABSOLUTE_PREFIX)
11616             {
11617               ++op_string;
11618               if (is_space_char (*op_string))
11619                 ++op_string;
11620               i.jumpabsolute = true;
11621             }
11622
11623           if (!starts_memory_operand (*op_string))
11624             {
11625               as_bad (_("bad memory operand `%s'"), op_string);
11626               return 0;
11627             }
11628           goto do_memory_reference;
11629         }
11630
11631       /* Handle vector operations.  */
11632       if (*op_string == '{')
11633         {
11634           op_string = check_VecOperations (op_string);
11635           if (op_string == NULL)
11636             return 0;
11637         }
11638
11639       if (*op_string)
11640         {
11641           as_bad (_("junk `%s' after register"), op_string);
11642           return 0;
11643         }
11644       temp = r->reg_type;
11645       temp.bitfield.baseindex = 0;
11646       i.types[this_operand] = operand_type_or (i.types[this_operand],
11647                                                temp);
11648       i.types[this_operand].bitfield.unspecified = 0;
11649       i.op[this_operand].regs = r;
11650       i.reg_operands++;
11651
11652       /* A GPR may follow an RC or SAE immediate only if a (vector) register
11653          operand was also present earlier on.  */
11654       if (i.rounding.type != rc_none && temp.bitfield.class == Reg
11655           && i.reg_operands == 1)
11656         {
11657           unsigned int j;
11658
11659           for (j = 0; j < ARRAY_SIZE (RC_NamesTable); ++j)
11660             if (i.rounding.type == RC_NamesTable[j].type)
11661               break;
11662           as_bad (_("`%s': misplaced `{%s}'"),
11663                   current_templates->start->name, RC_NamesTable[j].name);
11664           return 0;
11665         }
11666     }
11667   else if (*op_string == REGISTER_PREFIX)
11668     {
11669       as_bad (_("bad register name `%s'"), op_string);
11670       return 0;
11671     }
11672   else if (*op_string == IMMEDIATE_PREFIX)
11673     {
11674       ++op_string;
11675       if (i.jumpabsolute)
11676         {
11677           as_bad (_("immediate operand illegal with absolute jump"));
11678           return 0;
11679         }
11680       if (!i386_immediate (op_string))
11681         return 0;
11682       if (i.rounding.type != rc_none)
11683         {
11684           as_bad (_("`%s': RC/SAE operand must follow immediate operands"),
11685                   current_templates->start->name);
11686           return 0;
11687         }
11688     }
11689   else if (RC_SAE_immediate (operand_string))
11690     {
11691       /* If it is a RC or SAE immediate, do the necessary placement check:
11692          Only another immediate or a GPR may precede it.  */
11693       if (i.mem_operands || i.reg_operands + i.imm_operands > 1
11694           || (i.reg_operands == 1
11695               && i.op[0].regs->reg_type.bitfield.class != Reg))
11696         {
11697           as_bad (_("`%s': misplaced `%s'"),
11698                   current_templates->start->name, operand_string);
11699           return 0;
11700         }
11701     }
11702   else if (starts_memory_operand (*op_string))
11703     {
11704       /* This is a memory reference of some sort.  */
11705       char *base_string;
11706
11707       /* Start and end of displacement string expression (if found).  */
11708       char *displacement_string_start;
11709       char *displacement_string_end;
11710
11711     do_memory_reference:
11712       /* Check for base index form.  We detect the base index form by
11713          looking for an ')' at the end of the operand, searching
11714          for the '(' matching it, and finding a REGISTER_PREFIX or ','
11715          after the '('.  */
11716       base_string = op_string + strlen (op_string);
11717
11718       /* Handle vector operations.  */
11719       --base_string;
11720       if (is_space_char (*base_string))
11721         --base_string;
11722
11723       if (*base_string == '}')
11724         {
11725           char *vop_start = NULL;
11726
11727           while (base_string-- > op_string)
11728             {
11729               if (*base_string == '"')
11730                 break;
11731               if (*base_string != '{')
11732                 continue;
11733
11734               vop_start = base_string;
11735
11736               --base_string;
11737               if (is_space_char (*base_string))
11738                 --base_string;
11739
11740               if (*base_string != '}')
11741                 break;
11742
11743               vop_start = NULL;
11744             }
11745
11746           if (!vop_start)
11747             {
11748               as_bad (_("unbalanced figure braces"));
11749               return 0;
11750             }
11751
11752           if (check_VecOperations (vop_start) == NULL)
11753             return 0;
11754         }
11755
11756       /* If we only have a displacement, set-up for it to be parsed later.  */
11757       displacement_string_start = op_string;
11758       displacement_string_end = base_string + 1;
11759
11760       if (*base_string == ')')
11761         {
11762           char *temp_string;
11763           unsigned int parens_not_balanced = 0;
11764           bool in_quotes = false;
11765
11766           /* We've already checked that the number of left & right ()'s are
11767              equal, and that there's a matching set of double quotes.  */
11768           end_op = base_string;
11769           for (temp_string = op_string; temp_string < end_op; temp_string++)
11770             {
11771               if (*temp_string == '\\' && temp_string[1] == '"')
11772                 ++temp_string;
11773               else if (*temp_string == '"')
11774                 in_quotes = !in_quotes;
11775               else if (!in_quotes)
11776                 {
11777                   if (*temp_string == '(' && !parens_not_balanced++)
11778                     base_string = temp_string;
11779                   if (*temp_string == ')')
11780                     --parens_not_balanced;
11781                 }
11782             }
11783
11784           temp_string = base_string;
11785
11786           /* Skip past '(' and whitespace.  */
11787           gas_assert (*base_string == '(');
11788           ++base_string;
11789           if (is_space_char (*base_string))
11790             ++base_string;
11791
11792           if (*base_string == ','
11793               || ((i.base_reg = parse_register (base_string, &end_op))
11794                   != NULL))
11795             {
11796               displacement_string_end = temp_string;
11797
11798               i.types[this_operand].bitfield.baseindex = 1;
11799
11800               if (i.base_reg)
11801                 {
11802                   if (i.base_reg == &bad_reg)
11803                     return 0;
11804                   base_string = end_op;
11805                   if (is_space_char (*base_string))
11806                     ++base_string;
11807                 }
11808
11809               /* There may be an index reg or scale factor here.  */
11810               if (*base_string == ',')
11811                 {
11812                   ++base_string;
11813                   if (is_space_char (*base_string))
11814                     ++base_string;
11815
11816                   if ((i.index_reg = parse_register (base_string, &end_op))
11817                       != NULL)
11818                     {
11819                       if (i.index_reg == &bad_reg)
11820                         return 0;
11821                       base_string = end_op;
11822                       if (is_space_char (*base_string))
11823                         ++base_string;
11824                       if (*base_string == ',')
11825                         {
11826                           ++base_string;
11827                           if (is_space_char (*base_string))
11828                             ++base_string;
11829                         }
11830                       else if (*base_string != ')')
11831                         {
11832                           as_bad (_("expecting `,' or `)' "
11833                                     "after index register in `%s'"),
11834                                   operand_string);
11835                           return 0;
11836                         }
11837                     }
11838                   else if (*base_string == REGISTER_PREFIX)
11839                     {
11840                       end_op = strchr (base_string, ',');
11841                       if (end_op)
11842                         *end_op = '\0';
11843                       as_bad (_("bad register name `%s'"), base_string);
11844                       return 0;
11845                     }
11846
11847                   /* Check for scale factor.  */
11848                   if (*base_string != ')')
11849                     {
11850                       char *end_scale = i386_scale (base_string);
11851
11852                       if (!end_scale)
11853                         return 0;
11854
11855                       base_string = end_scale;
11856                       if (is_space_char (*base_string))
11857                         ++base_string;
11858                       if (*base_string != ')')
11859                         {
11860                           as_bad (_("expecting `)' "
11861                                     "after scale factor in `%s'"),
11862                                   operand_string);
11863                           return 0;
11864                         }
11865                     }
11866                   else if (!i.index_reg)
11867                     {
11868                       as_bad (_("expecting index register or scale factor "
11869                                 "after `,'; got '%c'"),
11870                               *base_string);
11871                       return 0;
11872                     }
11873                 }
11874               else if (*base_string != ')')
11875                 {
11876                   as_bad (_("expecting `,' or `)' "
11877                             "after base register in `%s'"),
11878                           operand_string);
11879                   return 0;
11880                 }
11881             }
11882           else if (*base_string == REGISTER_PREFIX)
11883             {
11884               end_op = strchr (base_string, ',');
11885               if (end_op)
11886                 *end_op = '\0';
11887               as_bad (_("bad register name `%s'"), base_string);
11888               return 0;
11889             }
11890         }
11891
11892       /* If there's an expression beginning the operand, parse it,
11893          assuming displacement_string_start and
11894          displacement_string_end are meaningful.  */
11895       if (displacement_string_start != displacement_string_end)
11896         {
11897           if (!i386_displacement (displacement_string_start,
11898                                   displacement_string_end))
11899             return 0;
11900         }
11901
11902       /* Special case for (%dx) while doing input/output op.  */
11903       if (i.base_reg
11904           && i.base_reg->reg_type.bitfield.instance == RegD
11905           && i.base_reg->reg_type.bitfield.word
11906           && i.index_reg == 0
11907           && i.log2_scale_factor == 0
11908           && i.seg[i.mem_operands] == 0
11909           && !operand_type_check (i.types[this_operand], disp))
11910         {
11911           i.types[this_operand] = i.base_reg->reg_type;
11912           i.input_output_operand = true;
11913           return 1;
11914         }
11915
11916       if (i386_index_check (operand_string) == 0)
11917         return 0;
11918       i.flags[this_operand] |= Operand_Mem;
11919       i.mem_operands++;
11920     }
11921   else
11922     {
11923       /* It's not a memory operand; argh!  */
11924       as_bad (_("invalid char %s beginning operand %d `%s'"),
11925               output_invalid (*op_string),
11926               this_operand + 1,
11927               op_string);
11928       return 0;
11929     }
11930   return 1;                     /* Normal return.  */
11931 }
11932 \f
11933 /* Calculate the maximum variable size (i.e., excluding fr_fix)
11934    that an rs_machine_dependent frag may reach.  */
11935
11936 unsigned int
11937 i386_frag_max_var (fragS *frag)
11938 {
11939   /* The only relaxable frags are for jumps.
11940      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
11941   gas_assert (frag->fr_type == rs_machine_dependent);
11942   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
11943 }
11944
11945 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11946 static int
11947 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
11948 {
11949   /* STT_GNU_IFUNC symbol must go through PLT.  */
11950   if ((symbol_get_bfdsym (fr_symbol)->flags
11951        & BSF_GNU_INDIRECT_FUNCTION) != 0)
11952     return 0;
11953
11954   if (!S_IS_EXTERNAL (fr_symbol))
11955     /* Symbol may be weak or local.  */
11956     return !S_IS_WEAK (fr_symbol);
11957
11958   /* Global symbols with non-default visibility can't be preempted. */
11959   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
11960     return 1;
11961
11962   if (fr_var != NO_RELOC)
11963     switch ((enum bfd_reloc_code_real) fr_var)
11964       {
11965       case BFD_RELOC_386_PLT32:
11966       case BFD_RELOC_X86_64_PLT32:
11967         /* Symbol with PLT relocation may be preempted. */
11968         return 0;
11969       default:
11970         abort ();
11971       }
11972
11973   /* Global symbols with default visibility in a shared library may be
11974      preempted by another definition.  */
11975   return !shared;
11976 }
11977 #endif
11978
11979 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
11980    Note also work for Skylake and Cascadelake.
11981 ---------------------------------------------------------------------
11982 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
11983 | ------  | ----------- | ------- | -------- |
11984 |   Jo    |      N      |    N    |     Y    |
11985 |   Jno   |      N      |    N    |     Y    |
11986 |  Jc/Jb  |      Y      |    N    |     Y    |
11987 | Jae/Jnb |      Y      |    N    |     Y    |
11988 |  Je/Jz  |      Y      |    Y    |     Y    |
11989 | Jne/Jnz |      Y      |    Y    |     Y    |
11990 | Jna/Jbe |      Y      |    N    |     Y    |
11991 | Ja/Jnbe |      Y      |    N    |     Y    |
11992 |   Js    |      N      |    N    |     Y    |
11993 |   Jns   |      N      |    N    |     Y    |
11994 |  Jp/Jpe |      N      |    N    |     Y    |
11995 | Jnp/Jpo |      N      |    N    |     Y    |
11996 | Jl/Jnge |      Y      |    Y    |     Y    |
11997 | Jge/Jnl |      Y      |    Y    |     Y    |
11998 | Jle/Jng |      Y      |    Y    |     Y    |
11999 | Jg/Jnle |      Y      |    Y    |     Y    |
12000 ---------------------------------------------------------------------  */
12001 static int
12002 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
12003 {
12004   if (mf_cmp == mf_cmp_alu_cmp)
12005     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
12006             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
12007   if (mf_cmp == mf_cmp_incdec)
12008     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
12009             || mf_jcc == mf_jcc_jle);
12010   if (mf_cmp == mf_cmp_test_and)
12011     return 1;
12012   return 0;
12013 }
12014
12015 /* Return the next non-empty frag.  */
12016
12017 static fragS *
12018 i386_next_non_empty_frag (fragS *fragP)
12019 {
12020   /* There may be a frag with a ".fill 0" when there is no room in
12021      the current frag for frag_grow in output_insn.  */
12022   for (fragP = fragP->fr_next;
12023        (fragP != NULL
12024         && fragP->fr_type == rs_fill
12025         && fragP->fr_fix == 0);
12026        fragP = fragP->fr_next)
12027     ;
12028   return fragP;
12029 }
12030
12031 /* Return the next jcc frag after BRANCH_PADDING.  */
12032
12033 static fragS *
12034 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
12035 {
12036   fragS *branch_fragP;
12037   if (!pad_fragP)
12038     return NULL;
12039
12040   if (pad_fragP->fr_type == rs_machine_dependent
12041       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
12042           == BRANCH_PADDING))
12043     {
12044       branch_fragP = i386_next_non_empty_frag (pad_fragP);
12045       if (branch_fragP->fr_type != rs_machine_dependent)
12046         return NULL;
12047       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
12048           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
12049                                    pad_fragP->tc_frag_data.mf_type))
12050         return branch_fragP;
12051     }
12052
12053   return NULL;
12054 }
12055
12056 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
12057
12058 static void
12059 i386_classify_machine_dependent_frag (fragS *fragP)
12060 {
12061   fragS *cmp_fragP;
12062   fragS *pad_fragP;
12063   fragS *branch_fragP;
12064   fragS *next_fragP;
12065   unsigned int max_prefix_length;
12066
12067   if (fragP->tc_frag_data.classified)
12068     return;
12069
12070   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
12071      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
12072   for (next_fragP = fragP;
12073        next_fragP != NULL;
12074        next_fragP = next_fragP->fr_next)
12075     {
12076       next_fragP->tc_frag_data.classified = 1;
12077       if (next_fragP->fr_type == rs_machine_dependent)
12078         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
12079           {
12080           case BRANCH_PADDING:
12081             /* The BRANCH_PADDING frag must be followed by a branch
12082                frag.  */
12083             branch_fragP = i386_next_non_empty_frag (next_fragP);
12084             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
12085             break;
12086           case FUSED_JCC_PADDING:
12087             /* Check if this is a fused jcc:
12088                FUSED_JCC_PADDING
12089                CMP like instruction
12090                BRANCH_PADDING
12091                COND_JUMP
12092                */
12093             cmp_fragP = i386_next_non_empty_frag (next_fragP);
12094             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
12095             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
12096             if (branch_fragP)
12097               {
12098                 /* The BRANCH_PADDING frag is merged with the
12099                    FUSED_JCC_PADDING frag.  */
12100                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
12101                 /* CMP like instruction size.  */
12102                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
12103                 frag_wane (pad_fragP);
12104                 /* Skip to branch_fragP.  */
12105                 next_fragP = branch_fragP;
12106               }
12107             else if (next_fragP->tc_frag_data.max_prefix_length)
12108               {
12109                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
12110                    a fused jcc.  */
12111                 next_fragP->fr_subtype
12112                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
12113                 next_fragP->tc_frag_data.max_bytes
12114                   = next_fragP->tc_frag_data.max_prefix_length;
12115                 /* This will be updated in the BRANCH_PREFIX scan.  */
12116                 next_fragP->tc_frag_data.max_prefix_length = 0;
12117               }
12118             else
12119               frag_wane (next_fragP);
12120             break;
12121           }
12122     }
12123
12124   /* Stop if there is no BRANCH_PREFIX.  */
12125   if (!align_branch_prefix_size)
12126     return;
12127
12128   /* Scan for BRANCH_PREFIX.  */
12129   for (; fragP != NULL; fragP = fragP->fr_next)
12130     {
12131       if (fragP->fr_type != rs_machine_dependent
12132           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12133               != BRANCH_PREFIX))
12134         continue;
12135
12136       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
12137          COND_JUMP_PREFIX.  */
12138       max_prefix_length = 0;
12139       for (next_fragP = fragP;
12140            next_fragP != NULL;
12141            next_fragP = next_fragP->fr_next)
12142         {
12143           if (next_fragP->fr_type == rs_fill)
12144             /* Skip rs_fill frags.  */
12145             continue;
12146           else if (next_fragP->fr_type != rs_machine_dependent)
12147             /* Stop for all other frags.  */
12148             break;
12149
12150           /* rs_machine_dependent frags.  */
12151           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12152               == BRANCH_PREFIX)
12153             {
12154               /* Count BRANCH_PREFIX frags.  */
12155               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
12156                 {
12157                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
12158                   frag_wane (next_fragP);
12159                 }
12160               else
12161                 max_prefix_length
12162                   += next_fragP->tc_frag_data.max_bytes;
12163             }
12164           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12165                     == BRANCH_PADDING)
12166                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12167                        == FUSED_JCC_PADDING))
12168             {
12169               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
12170               fragP->tc_frag_data.u.padding_fragP = next_fragP;
12171               break;
12172             }
12173           else
12174             /* Stop for other rs_machine_dependent frags.  */
12175             break;
12176         }
12177
12178       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
12179
12180       /* Skip to the next frag.  */
12181       fragP = next_fragP;
12182     }
12183 }
12184
12185 /* Compute padding size for
12186
12187         FUSED_JCC_PADDING
12188         CMP like instruction
12189         BRANCH_PADDING
12190         COND_JUMP/UNCOND_JUMP
12191
12192    or
12193
12194         BRANCH_PADDING
12195         COND_JUMP/UNCOND_JUMP
12196  */
12197
12198 static int
12199 i386_branch_padding_size (fragS *fragP, offsetT address)
12200 {
12201   unsigned int offset, size, padding_size;
12202   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
12203
12204   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
12205   if (!address)
12206     address = fragP->fr_address;
12207   address += fragP->fr_fix;
12208
12209   /* CMP like instrunction size.  */
12210   size = fragP->tc_frag_data.cmp_size;
12211
12212   /* The base size of the branch frag.  */
12213   size += branch_fragP->fr_fix;
12214
12215   /* Add opcode and displacement bytes for the rs_machine_dependent
12216      branch frag.  */
12217   if (branch_fragP->fr_type == rs_machine_dependent)
12218     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
12219
12220   /* Check if branch is within boundary and doesn't end at the last
12221      byte.  */
12222   offset = address & ((1U << align_branch_power) - 1);
12223   if ((offset + size) >= (1U << align_branch_power))
12224     /* Padding needed to avoid crossing boundary.  */
12225     padding_size = (1U << align_branch_power) - offset;
12226   else
12227     /* No padding needed.  */
12228     padding_size = 0;
12229
12230   /* The return value may be saved in tc_frag_data.length which is
12231      unsigned byte.  */
12232   if (!fits_in_unsigned_byte (padding_size))
12233     abort ();
12234
12235   return padding_size;
12236 }
12237
12238 /* i386_generic_table_relax_frag()
12239
12240    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
12241    grow/shrink padding to align branch frags.  Hand others to
12242    relax_frag().  */
12243
12244 long
12245 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
12246 {
12247   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12248       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12249     {
12250       long padding_size = i386_branch_padding_size (fragP, 0);
12251       long grow = padding_size - fragP->tc_frag_data.length;
12252
12253       /* When the BRANCH_PREFIX frag is used, the computed address
12254          must match the actual address and there should be no padding.  */
12255       if (fragP->tc_frag_data.padding_address
12256           && (fragP->tc_frag_data.padding_address != fragP->fr_address
12257               || padding_size))
12258         abort ();
12259
12260       /* Update the padding size.  */
12261       if (grow)
12262         fragP->tc_frag_data.length = padding_size;
12263
12264       return grow;
12265     }
12266   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12267     {
12268       fragS *padding_fragP, *next_fragP;
12269       long padding_size, left_size, last_size;
12270
12271       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12272       if (!padding_fragP)
12273         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
12274         return (fragP->tc_frag_data.length
12275                 - fragP->tc_frag_data.last_length);
12276
12277       /* Compute the relative address of the padding frag in the very
12278         first time where the BRANCH_PREFIX frag sizes are zero.  */
12279       if (!fragP->tc_frag_data.padding_address)
12280         fragP->tc_frag_data.padding_address
12281           = padding_fragP->fr_address - (fragP->fr_address - stretch);
12282
12283       /* First update the last length from the previous interation.  */
12284       left_size = fragP->tc_frag_data.prefix_length;
12285       for (next_fragP = fragP;
12286            next_fragP != padding_fragP;
12287            next_fragP = next_fragP->fr_next)
12288         if (next_fragP->fr_type == rs_machine_dependent
12289             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12290                 == BRANCH_PREFIX))
12291           {
12292             if (left_size)
12293               {
12294                 int max = next_fragP->tc_frag_data.max_bytes;
12295                 if (max)
12296                   {
12297                     int size;
12298                     if (max > left_size)
12299                       size = left_size;
12300                     else
12301                       size = max;
12302                     left_size -= size;
12303                     next_fragP->tc_frag_data.last_length = size;
12304                   }
12305               }
12306             else
12307               next_fragP->tc_frag_data.last_length = 0;
12308           }
12309
12310       /* Check the padding size for the padding frag.  */
12311       padding_size = i386_branch_padding_size
12312         (padding_fragP, (fragP->fr_address
12313                          + fragP->tc_frag_data.padding_address));
12314
12315       last_size = fragP->tc_frag_data.prefix_length;
12316       /* Check if there is change from the last interation.  */
12317       if (padding_size == last_size)
12318         {
12319           /* Update the expected address of the padding frag.  */
12320           padding_fragP->tc_frag_data.padding_address
12321             = (fragP->fr_address + padding_size
12322                + fragP->tc_frag_data.padding_address);
12323           return 0;
12324         }
12325
12326       if (padding_size > fragP->tc_frag_data.max_prefix_length)
12327         {
12328           /* No padding if there is no sufficient room.  Clear the
12329              expected address of the padding frag.  */
12330           padding_fragP->tc_frag_data.padding_address = 0;
12331           padding_size = 0;
12332         }
12333       else
12334         /* Store the expected address of the padding frag.  */
12335         padding_fragP->tc_frag_data.padding_address
12336           = (fragP->fr_address + padding_size
12337              + fragP->tc_frag_data.padding_address);
12338
12339       fragP->tc_frag_data.prefix_length = padding_size;
12340
12341       /* Update the length for the current interation.  */
12342       left_size = padding_size;
12343       for (next_fragP = fragP;
12344            next_fragP != padding_fragP;
12345            next_fragP = next_fragP->fr_next)
12346         if (next_fragP->fr_type == rs_machine_dependent
12347             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12348                 == BRANCH_PREFIX))
12349           {
12350             if (left_size)
12351               {
12352                 int max = next_fragP->tc_frag_data.max_bytes;
12353                 if (max)
12354                   {
12355                     int size;
12356                     if (max > left_size)
12357                       size = left_size;
12358                     else
12359                       size = max;
12360                     left_size -= size;
12361                     next_fragP->tc_frag_data.length = size;
12362                   }
12363               }
12364             else
12365               next_fragP->tc_frag_data.length = 0;
12366           }
12367
12368       return (fragP->tc_frag_data.length
12369               - fragP->tc_frag_data.last_length);
12370     }
12371   return relax_frag (segment, fragP, stretch);
12372 }
12373
12374 /* md_estimate_size_before_relax()
12375
12376    Called just before relax() for rs_machine_dependent frags.  The x86
12377    assembler uses these frags to handle variable size jump
12378    instructions.
12379
12380    Any symbol that is now undefined will not become defined.
12381    Return the correct fr_subtype in the frag.
12382    Return the initial "guess for variable size of frag" to caller.
12383    The guess is actually the growth beyond the fixed part.  Whatever
12384    we do to grow the fixed or variable part contributes to our
12385    returned value.  */
12386
12387 int
12388 md_estimate_size_before_relax (fragS *fragP, segT segment)
12389 {
12390   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12391       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
12392       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12393     {
12394       i386_classify_machine_dependent_frag (fragP);
12395       return fragP->tc_frag_data.length;
12396     }
12397
12398   /* We've already got fragP->fr_subtype right;  all we have to do is
12399      check for un-relaxable symbols.  On an ELF system, we can't relax
12400      an externally visible symbol, because it may be overridden by a
12401      shared library.  */
12402   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
12403 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12404       || (IS_ELF
12405           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
12406                                                 fragP->fr_var))
12407 #endif
12408 #if defined (OBJ_COFF) && defined (TE_PE)
12409       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
12410           && S_IS_WEAK (fragP->fr_symbol))
12411 #endif
12412       )
12413     {
12414       /* Symbol is undefined in this segment, or we need to keep a
12415          reloc so that weak symbols can be overridden.  */
12416       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
12417       enum bfd_reloc_code_real reloc_type;
12418       unsigned char *opcode;
12419       int old_fr_fix;
12420       fixS *fixP = NULL;
12421
12422       if (fragP->fr_var != NO_RELOC)
12423         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
12424       else if (size == 2)
12425         reloc_type = BFD_RELOC_16_PCREL;
12426 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12427       else if (fragP->tc_frag_data.code64 && fragP->fr_offset == 0
12428                && need_plt32_p (fragP->fr_symbol))
12429         reloc_type = BFD_RELOC_X86_64_PLT32;
12430 #endif
12431       else
12432         reloc_type = BFD_RELOC_32_PCREL;
12433
12434       old_fr_fix = fragP->fr_fix;
12435       opcode = (unsigned char *) fragP->fr_opcode;
12436
12437       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
12438         {
12439         case UNCOND_JUMP:
12440           /* Make jmp (0xeb) a (d)word displacement jump.  */
12441           opcode[0] = 0xe9;
12442           fragP->fr_fix += size;
12443           fixP = fix_new (fragP, old_fr_fix, size,
12444                           fragP->fr_symbol,
12445                           fragP->fr_offset, 1,
12446                           reloc_type);
12447           break;
12448
12449         case COND_JUMP86:
12450           if (size == 2
12451               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
12452             {
12453               /* Negate the condition, and branch past an
12454                  unconditional jump.  */
12455               opcode[0] ^= 1;
12456               opcode[1] = 3;
12457               /* Insert an unconditional jump.  */
12458               opcode[2] = 0xe9;
12459               /* We added two extra opcode bytes, and have a two byte
12460                  offset.  */
12461               fragP->fr_fix += 2 + 2;
12462               fix_new (fragP, old_fr_fix + 2, 2,
12463                        fragP->fr_symbol,
12464                        fragP->fr_offset, 1,
12465                        reloc_type);
12466               break;
12467             }
12468           /* Fall through.  */
12469
12470         case COND_JUMP:
12471           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
12472             {
12473               fragP->fr_fix += 1;
12474               fixP = fix_new (fragP, old_fr_fix, 1,
12475                               fragP->fr_symbol,
12476                               fragP->fr_offset, 1,
12477                               BFD_RELOC_8_PCREL);
12478               fixP->fx_signed = 1;
12479               break;
12480             }
12481
12482           /* This changes the byte-displacement jump 0x7N
12483              to the (d)word-displacement jump 0x0f,0x8N.  */
12484           opcode[1] = opcode[0] + 0x10;
12485           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12486           /* We've added an opcode byte.  */
12487           fragP->fr_fix += 1 + size;
12488           fixP = fix_new (fragP, old_fr_fix + 1, size,
12489                           fragP->fr_symbol,
12490                           fragP->fr_offset, 1,
12491                           reloc_type);
12492           break;
12493
12494         default:
12495           BAD_CASE (fragP->fr_subtype);
12496           break;
12497         }
12498
12499       /* All jumps handled here are signed, but don't unconditionally use a
12500          signed limit check for 32 and 16 bit jumps as we want to allow wrap
12501          around at 4G (outside of 64-bit mode) and 64k.  */
12502       if (size == 4 && flag_code == CODE_64BIT)
12503         fixP->fx_signed = 1;
12504
12505       frag_wane (fragP);
12506       return fragP->fr_fix - old_fr_fix;
12507     }
12508
12509   /* Guess size depending on current relax state.  Initially the relax
12510      state will correspond to a short jump and we return 1, because
12511      the variable part of the frag (the branch offset) is one byte
12512      long.  However, we can relax a section more than once and in that
12513      case we must either set fr_subtype back to the unrelaxed state,
12514      or return the value for the appropriate branch.  */
12515   return md_relax_table[fragP->fr_subtype].rlx_length;
12516 }
12517
12518 /* Called after relax() is finished.
12519
12520    In:  Address of frag.
12521         fr_type == rs_machine_dependent.
12522         fr_subtype is what the address relaxed to.
12523
12524    Out: Any fixSs and constants are set up.
12525         Caller will turn frag into a ".space 0".  */
12526
12527 void
12528 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
12529                  fragS *fragP)
12530 {
12531   unsigned char *opcode;
12532   unsigned char *where_to_put_displacement = NULL;
12533   offsetT target_address;
12534   offsetT opcode_address;
12535   unsigned int extension = 0;
12536   offsetT displacement_from_opcode_start;
12537
12538   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12539       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
12540       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12541     {
12542       /* Generate nop padding.  */
12543       unsigned int size = fragP->tc_frag_data.length;
12544       if (size)
12545         {
12546           if (size > fragP->tc_frag_data.max_bytes)
12547             abort ();
12548
12549           if (flag_debug)
12550             {
12551               const char *msg;
12552               const char *branch = "branch";
12553               const char *prefix = "";
12554               fragS *padding_fragP;
12555               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12556                   == BRANCH_PREFIX)
12557                 {
12558                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12559                   switch (fragP->tc_frag_data.default_prefix)
12560                     {
12561                     default:
12562                       abort ();
12563                       break;
12564                     case CS_PREFIX_OPCODE:
12565                       prefix = " cs";
12566                       break;
12567                     case DS_PREFIX_OPCODE:
12568                       prefix = " ds";
12569                       break;
12570                     case ES_PREFIX_OPCODE:
12571                       prefix = " es";
12572                       break;
12573                     case FS_PREFIX_OPCODE:
12574                       prefix = " fs";
12575                       break;
12576                     case GS_PREFIX_OPCODE:
12577                       prefix = " gs";
12578                       break;
12579                     case SS_PREFIX_OPCODE:
12580                       prefix = " ss";
12581                       break;
12582                     }
12583                   if (padding_fragP)
12584                     msg = _("%s:%u: add %d%s at 0x%llx to align "
12585                             "%s within %d-byte boundary\n");
12586                   else
12587                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
12588                             "align %s within %d-byte boundary\n");
12589                 }
12590               else
12591                 {
12592                   padding_fragP = fragP;
12593                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
12594                           "%s within %d-byte boundary\n");
12595                 }
12596
12597               if (padding_fragP)
12598                 switch (padding_fragP->tc_frag_data.branch_type)
12599                   {
12600                   case align_branch_jcc:
12601                     branch = "jcc";
12602                     break;
12603                   case align_branch_fused:
12604                     branch = "fused jcc";
12605                     break;
12606                   case align_branch_jmp:
12607                     branch = "jmp";
12608                     break;
12609                   case align_branch_call:
12610                     branch = "call";
12611                     break;
12612                   case align_branch_indirect:
12613                     branch = "indiret branch";
12614                     break;
12615                   case align_branch_ret:
12616                     branch = "ret";
12617                     break;
12618                   default:
12619                     break;
12620                   }
12621
12622               fprintf (stdout, msg,
12623                        fragP->fr_file, fragP->fr_line, size, prefix,
12624                        (long long) fragP->fr_address, branch,
12625                        1 << align_branch_power);
12626             }
12627           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12628             memset (fragP->fr_opcode,
12629                     fragP->tc_frag_data.default_prefix, size);
12630           else
12631             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
12632                                 size, 0);
12633           fragP->fr_fix += size;
12634         }
12635       return;
12636     }
12637
12638   opcode = (unsigned char *) fragP->fr_opcode;
12639
12640   /* Address we want to reach in file space.  */
12641   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
12642
12643   /* Address opcode resides at in file space.  */
12644   opcode_address = fragP->fr_address + fragP->fr_fix;
12645
12646   /* Displacement from opcode start to fill into instruction.  */
12647   displacement_from_opcode_start = target_address - opcode_address;
12648
12649   if ((fragP->fr_subtype & BIG) == 0)
12650     {
12651       /* Don't have to change opcode.  */
12652       extension = 1;            /* 1 opcode + 1 displacement  */
12653       where_to_put_displacement = &opcode[1];
12654     }
12655   else
12656     {
12657       if (no_cond_jump_promotion
12658           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
12659         as_warn_where (fragP->fr_file, fragP->fr_line,
12660                        _("long jump required"));
12661
12662       switch (fragP->fr_subtype)
12663         {
12664         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
12665           extension = 4;                /* 1 opcode + 4 displacement  */
12666           opcode[0] = 0xe9;
12667           where_to_put_displacement = &opcode[1];
12668           break;
12669
12670         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
12671           extension = 2;                /* 1 opcode + 2 displacement  */
12672           opcode[0] = 0xe9;
12673           where_to_put_displacement = &opcode[1];
12674           break;
12675
12676         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
12677         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
12678           extension = 5;                /* 2 opcode + 4 displacement  */
12679           opcode[1] = opcode[0] + 0x10;
12680           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12681           where_to_put_displacement = &opcode[2];
12682           break;
12683
12684         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
12685           extension = 3;                /* 2 opcode + 2 displacement  */
12686           opcode[1] = opcode[0] + 0x10;
12687           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12688           where_to_put_displacement = &opcode[2];
12689           break;
12690
12691         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
12692           extension = 4;
12693           opcode[0] ^= 1;
12694           opcode[1] = 3;
12695           opcode[2] = 0xe9;
12696           where_to_put_displacement = &opcode[3];
12697           break;
12698
12699         default:
12700           BAD_CASE (fragP->fr_subtype);
12701           break;
12702         }
12703     }
12704
12705   /* If size if less then four we are sure that the operand fits,
12706      but if it's 4, then it could be that the displacement is larger
12707      then -/+ 2GB.  */
12708   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
12709       && object_64bit
12710       && ((addressT) (displacement_from_opcode_start - extension
12711                       + ((addressT) 1 << 31))
12712           > (((addressT) 2 << 31) - 1)))
12713     {
12714       as_bad_where (fragP->fr_file, fragP->fr_line,
12715                     _("jump target out of range"));
12716       /* Make us emit 0.  */
12717       displacement_from_opcode_start = extension;
12718     }
12719   /* Now put displacement after opcode.  */
12720   md_number_to_chars ((char *) where_to_put_displacement,
12721                       (valueT) (displacement_from_opcode_start - extension),
12722                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
12723   fragP->fr_fix += extension;
12724 }
12725 \f
12726 /* Apply a fixup (fixP) to segment data, once it has been determined
12727    by our caller that we have all the info we need to fix it up.
12728
12729    Parameter valP is the pointer to the value of the bits.
12730
12731    On the 386, immediates, displacements, and data pointers are all in
12732    the same (little-endian) format, so we don't need to care about which
12733    we are handling.  */
12734
12735 void
12736 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
12737 {
12738   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
12739   valueT value = *valP;
12740
12741 #if !defined (TE_Mach)
12742   if (fixP->fx_pcrel)
12743     {
12744       switch (fixP->fx_r_type)
12745         {
12746         default:
12747           break;
12748
12749         case BFD_RELOC_64:
12750           fixP->fx_r_type = BFD_RELOC_64_PCREL;
12751           break;
12752         case BFD_RELOC_32:
12753         case BFD_RELOC_X86_64_32S:
12754           fixP->fx_r_type = BFD_RELOC_32_PCREL;
12755           break;
12756         case BFD_RELOC_16:
12757           fixP->fx_r_type = BFD_RELOC_16_PCREL;
12758           break;
12759         case BFD_RELOC_8:
12760           fixP->fx_r_type = BFD_RELOC_8_PCREL;
12761           break;
12762         }
12763     }
12764
12765   if (fixP->fx_addsy != NULL
12766       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
12767           || fixP->fx_r_type == BFD_RELOC_64_PCREL
12768           || fixP->fx_r_type == BFD_RELOC_16_PCREL
12769           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
12770       && !use_rela_relocations)
12771     {
12772       /* This is a hack.  There should be a better way to handle this.
12773          This covers for the fact that bfd_install_relocation will
12774          subtract the current location (for partial_inplace, PC relative
12775          relocations); see more below.  */
12776 #ifndef OBJ_AOUT
12777       if (IS_ELF
12778 #ifdef TE_PE
12779           || OUTPUT_FLAVOR == bfd_target_coff_flavour
12780 #endif
12781           )
12782         value += fixP->fx_where + fixP->fx_frag->fr_address;
12783 #endif
12784 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12785       if (IS_ELF)
12786         {
12787           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
12788
12789           if ((sym_seg == seg
12790                || (symbol_section_p (fixP->fx_addsy)
12791                    && sym_seg != absolute_section))
12792               && !generic_force_reloc (fixP))
12793             {
12794               /* Yes, we add the values in twice.  This is because
12795                  bfd_install_relocation subtracts them out again.  I think
12796                  bfd_install_relocation is broken, but I don't dare change
12797                  it.  FIXME.  */
12798               value += fixP->fx_where + fixP->fx_frag->fr_address;
12799             }
12800         }
12801 #endif
12802 #if defined (OBJ_COFF) && defined (TE_PE)
12803       /* For some reason, the PE format does not store a
12804          section address offset for a PC relative symbol.  */
12805       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
12806           || S_IS_WEAK (fixP->fx_addsy))
12807         value += md_pcrel_from (fixP);
12808 #endif
12809     }
12810 #if defined (OBJ_COFF) && defined (TE_PE)
12811   if (fixP->fx_addsy != NULL
12812       && S_IS_WEAK (fixP->fx_addsy)
12813       /* PR 16858: Do not modify weak function references.  */
12814       && ! fixP->fx_pcrel)
12815     {
12816 #if !defined (TE_PEP)
12817       /* For x86 PE weak function symbols are neither PC-relative
12818          nor do they set S_IS_FUNCTION.  So the only reliable way
12819          to detect them is to check the flags of their containing
12820          section.  */
12821       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
12822           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
12823         ;
12824       else
12825 #endif
12826       value -= S_GET_VALUE (fixP->fx_addsy);
12827     }
12828 #endif
12829
12830   /* Fix a few things - the dynamic linker expects certain values here,
12831      and we must not disappoint it.  */
12832 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12833   if (IS_ELF && fixP->fx_addsy)
12834     switch (fixP->fx_r_type)
12835       {
12836       case BFD_RELOC_386_PLT32:
12837       case BFD_RELOC_X86_64_PLT32:
12838         /* Make the jump instruction point to the address of the operand.
12839            At runtime we merely add the offset to the actual PLT entry.
12840            NB: Subtract the offset size only for jump instructions.  */
12841         if (fixP->fx_pcrel)
12842           value = -4;
12843         break;
12844
12845       case BFD_RELOC_386_TLS_GD:
12846       case BFD_RELOC_386_TLS_LDM:
12847       case BFD_RELOC_386_TLS_IE_32:
12848       case BFD_RELOC_386_TLS_IE:
12849       case BFD_RELOC_386_TLS_GOTIE:
12850       case BFD_RELOC_386_TLS_GOTDESC:
12851       case BFD_RELOC_X86_64_TLSGD:
12852       case BFD_RELOC_X86_64_TLSLD:
12853       case BFD_RELOC_X86_64_GOTTPOFF:
12854       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
12855         value = 0; /* Fully resolved at runtime.  No addend.  */
12856         /* Fallthrough */
12857       case BFD_RELOC_386_TLS_LE:
12858       case BFD_RELOC_386_TLS_LDO_32:
12859       case BFD_RELOC_386_TLS_LE_32:
12860       case BFD_RELOC_X86_64_DTPOFF32:
12861       case BFD_RELOC_X86_64_DTPOFF64:
12862       case BFD_RELOC_X86_64_TPOFF32:
12863       case BFD_RELOC_X86_64_TPOFF64:
12864         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12865         break;
12866
12867       case BFD_RELOC_386_TLS_DESC_CALL:
12868       case BFD_RELOC_X86_64_TLSDESC_CALL:
12869         value = 0; /* Fully resolved at runtime.  No addend.  */
12870         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12871         fixP->fx_done = 0;
12872         return;
12873
12874       case BFD_RELOC_VTABLE_INHERIT:
12875       case BFD_RELOC_VTABLE_ENTRY:
12876         fixP->fx_done = 0;
12877         return;
12878
12879       default:
12880         break;
12881       }
12882 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
12883
12884   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
12885   if (!object_64bit)
12886     value = extend_to_32bit_address (value);
12887
12888   *valP = value;
12889 #endif /* !defined (TE_Mach)  */
12890
12891   /* Are we finished with this relocation now?  */
12892   if (fixP->fx_addsy == NULL)
12893     {
12894       fixP->fx_done = 1;
12895       switch (fixP->fx_r_type)
12896         {
12897         case BFD_RELOC_X86_64_32S:
12898           fixP->fx_signed = 1;
12899           break;
12900
12901         default:
12902           break;
12903         }
12904     }
12905 #if defined (OBJ_COFF) && defined (TE_PE)
12906   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
12907     {
12908       fixP->fx_done = 0;
12909       /* Remember value for tc_gen_reloc.  */
12910       fixP->fx_addnumber = value;
12911       /* Clear out the frag for now.  */
12912       value = 0;
12913     }
12914 #endif
12915   else if (use_rela_relocations)
12916     {
12917       if (!disallow_64bit_reloc || fixP->fx_r_type == NO_RELOC)
12918         fixP->fx_no_overflow = 1;
12919       /* Remember value for tc_gen_reloc.  */
12920       fixP->fx_addnumber = value;
12921       value = 0;
12922     }
12923
12924   md_number_to_chars (p, value, fixP->fx_size);
12925 }
12926 \f
12927 const char *
12928 md_atof (int type, char *litP, int *sizeP)
12929 {
12930   /* This outputs the LITTLENUMs in REVERSE order;
12931      in accord with the bigendian 386.  */
12932   return ieee_md_atof (type, litP, sizeP, false);
12933 }
12934 \f
12935 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
12936
12937 static char *
12938 output_invalid (int c)
12939 {
12940   if (ISPRINT (c))
12941     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12942               "'%c'", c);
12943   else
12944     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12945               "(0x%x)", (unsigned char) c);
12946   return output_invalid_buf;
12947 }
12948
12949 /* Verify that @r can be used in the current context.  */
12950
12951 static bool check_register (const reg_entry *r)
12952 {
12953   if (allow_pseudo_reg)
12954     return true;
12955
12956   if (operand_type_all_zero (&r->reg_type))
12957     return false;
12958
12959   if ((r->reg_type.bitfield.dword
12960        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
12961        || r->reg_type.bitfield.class == RegCR
12962        || r->reg_type.bitfield.class == RegDR)
12963       && !cpu_arch_flags.bitfield.cpui386)
12964     return false;
12965
12966   if (r->reg_type.bitfield.class == RegTR
12967       && (flag_code == CODE_64BIT
12968           || !cpu_arch_flags.bitfield.cpui386
12969           || cpu_arch_isa_flags.bitfield.cpui586
12970           || cpu_arch_isa_flags.bitfield.cpui686))
12971     return false;
12972
12973   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
12974     return false;
12975
12976   if (!cpu_arch_flags.bitfield.cpuavx512f)
12977     {
12978       if (r->reg_type.bitfield.zmmword
12979           || r->reg_type.bitfield.class == RegMask)
12980         return false;
12981
12982       if (!cpu_arch_flags.bitfield.cpuavx)
12983         {
12984           if (r->reg_type.bitfield.ymmword)
12985             return false;
12986
12987           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
12988             return false;
12989         }
12990     }
12991
12992   if (r->reg_type.bitfield.tmmword
12993       && (!cpu_arch_flags.bitfield.cpuamx_tile
12994           || flag_code != CODE_64BIT))
12995     return false;
12996
12997   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
12998     return false;
12999
13000   /* Don't allow fake index register unless allow_index_reg isn't 0. */
13001   if (!allow_index_reg && r->reg_num == RegIZ)
13002     return false;
13003
13004   /* Upper 16 vector registers are only available with VREX in 64bit
13005      mode, and require EVEX encoding.  */
13006   if (r->reg_flags & RegVRex)
13007     {
13008       if (!cpu_arch_flags.bitfield.cpuavx512f
13009           || flag_code != CODE_64BIT)
13010         return false;
13011
13012       if (i.vec_encoding == vex_encoding_default)
13013         i.vec_encoding = vex_encoding_evex;
13014       else if (i.vec_encoding != vex_encoding_evex)
13015         i.vec_encoding = vex_encoding_error;
13016     }
13017
13018   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
13019       && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
13020       && flag_code != CODE_64BIT)
13021     return false;
13022
13023   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
13024       && !intel_syntax)
13025     return false;
13026
13027   return true;
13028 }
13029
13030 /* REG_STRING starts *before* REGISTER_PREFIX.  */
13031
13032 static const reg_entry *
13033 parse_real_register (char *reg_string, char **end_op)
13034 {
13035   char *s = reg_string;
13036   char *p;
13037   char reg_name_given[MAX_REG_NAME_SIZE + 1];
13038   const reg_entry *r;
13039
13040   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
13041   if (*s == REGISTER_PREFIX)
13042     ++s;
13043
13044   if (is_space_char (*s))
13045     ++s;
13046
13047   p = reg_name_given;
13048   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
13049     {
13050       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
13051         return (const reg_entry *) NULL;
13052       s++;
13053     }
13054
13055   /* For naked regs, make sure that we are not dealing with an identifier.
13056      This prevents confusing an identifier like `eax_var' with register
13057      `eax'.  */
13058   if (allow_naked_reg && identifier_chars[(unsigned char) *s])
13059     return (const reg_entry *) NULL;
13060
13061   *end_op = s;
13062
13063   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
13064
13065   /* Handle floating point regs, allowing spaces in the (i) part.  */
13066   if (r == reg_st0)
13067     {
13068       if (!cpu_arch_flags.bitfield.cpu8087
13069           && !cpu_arch_flags.bitfield.cpu287
13070           && !cpu_arch_flags.bitfield.cpu387
13071           && !allow_pseudo_reg)
13072         return (const reg_entry *) NULL;
13073
13074       if (is_space_char (*s))
13075         ++s;
13076       if (*s == '(')
13077         {
13078           ++s;
13079           if (is_space_char (*s))
13080             ++s;
13081           if (*s >= '0' && *s <= '7')
13082             {
13083               int fpr = *s - '0';
13084               ++s;
13085               if (is_space_char (*s))
13086                 ++s;
13087               if (*s == ')')
13088                 {
13089                   *end_op = s + 1;
13090                   know (r[fpr].reg_num == fpr);
13091                   return r + fpr;
13092                 }
13093             }
13094           /* We have "%st(" then garbage.  */
13095           return (const reg_entry *) NULL;
13096         }
13097     }
13098
13099   return r && check_register (r) ? r : NULL;
13100 }
13101
13102 /* REG_STRING starts *before* REGISTER_PREFIX.  */
13103
13104 static const reg_entry *
13105 parse_register (char *reg_string, char **end_op)
13106 {
13107   const reg_entry *r;
13108
13109   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
13110     r = parse_real_register (reg_string, end_op);
13111   else
13112     r = NULL;
13113   if (!r)
13114     {
13115       char *save = input_line_pointer;
13116       char c;
13117       symbolS *symbolP;
13118
13119       input_line_pointer = reg_string;
13120       c = get_symbol_name (&reg_string);
13121       symbolP = symbol_find (reg_string);
13122       while (symbolP && S_GET_SEGMENT (symbolP) != reg_section)
13123         {
13124           const expressionS *e = symbol_get_value_expression(symbolP);
13125
13126           if (e->X_op != O_symbol || e->X_add_number)
13127             break;
13128           symbolP = e->X_add_symbol;
13129         }
13130       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
13131         {
13132           const expressionS *e = symbol_get_value_expression (symbolP);
13133
13134           know (e->X_op == O_register);
13135           know (e->X_add_number >= 0
13136                 && (valueT) e->X_add_number < i386_regtab_size);
13137           r = i386_regtab + e->X_add_number;
13138           if (!check_register (r))
13139             {
13140               as_bad (_("register '%s%s' cannot be used here"),
13141                       register_prefix, r->reg_name);
13142               r = &bad_reg;
13143             }
13144           *end_op = input_line_pointer;
13145         }
13146       *input_line_pointer = c;
13147       input_line_pointer = save;
13148     }
13149   return r;
13150 }
13151
13152 int
13153 i386_parse_name (char *name, expressionS *e, char *nextcharP)
13154 {
13155   const reg_entry *r = NULL;
13156   char *end = input_line_pointer;
13157
13158   *end = *nextcharP;
13159   if (*name == REGISTER_PREFIX || allow_naked_reg)
13160     r = parse_real_register (name, &input_line_pointer);
13161   if (r && end <= input_line_pointer)
13162     {
13163       *nextcharP = *input_line_pointer;
13164       *input_line_pointer = 0;
13165       if (r != &bad_reg)
13166         {
13167           e->X_op = O_register;
13168           e->X_add_number = r - i386_regtab;
13169         }
13170       else
13171           e->X_op = O_illegal;
13172       return 1;
13173     }
13174   input_line_pointer = end;
13175   *end = 0;
13176   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
13177 }
13178
13179 void
13180 md_operand (expressionS *e)
13181 {
13182   char *end;
13183   const reg_entry *r;
13184
13185   switch (*input_line_pointer)
13186     {
13187     case REGISTER_PREFIX:
13188       r = parse_real_register (input_line_pointer, &end);
13189       if (r)
13190         {
13191           e->X_op = O_register;
13192           e->X_add_number = r - i386_regtab;
13193           input_line_pointer = end;
13194         }
13195       break;
13196
13197     case '[':
13198       gas_assert (intel_syntax);
13199       end = input_line_pointer++;
13200       expression (e);
13201       if (*input_line_pointer == ']')
13202         {
13203           ++input_line_pointer;
13204           e->X_op_symbol = make_expr_symbol (e);
13205           e->X_add_symbol = NULL;
13206           e->X_add_number = 0;
13207           e->X_op = O_index;
13208         }
13209       else
13210         {
13211           e->X_op = O_absent;
13212           input_line_pointer = end;
13213         }
13214       break;
13215     }
13216 }
13217
13218 \f
13219 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13220 const char *md_shortopts = "kVQ:sqnO::";
13221 #else
13222 const char *md_shortopts = "qnO::";
13223 #endif
13224
13225 #define OPTION_32 (OPTION_MD_BASE + 0)
13226 #define OPTION_64 (OPTION_MD_BASE + 1)
13227 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
13228 #define OPTION_MARCH (OPTION_MD_BASE + 3)
13229 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
13230 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
13231 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
13232 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
13233 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
13234 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
13235 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
13236 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
13237 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
13238 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
13239 #define OPTION_X32 (OPTION_MD_BASE + 14)
13240 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
13241 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
13242 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
13243 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
13244 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
13245 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
13246 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
13247 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
13248 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
13249 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
13250 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
13251 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
13252 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
13253 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
13254 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
13255 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
13256 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
13257 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
13258 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
13259 #define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34)
13260
13261 struct option md_longopts[] =
13262 {
13263   {"32", no_argument, NULL, OPTION_32},
13264 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13265      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13266   {"64", no_argument, NULL, OPTION_64},
13267 #endif
13268 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13269   {"x32", no_argument, NULL, OPTION_X32},
13270   {"mshared", no_argument, NULL, OPTION_MSHARED},
13271   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
13272 #endif
13273   {"divide", no_argument, NULL, OPTION_DIVIDE},
13274   {"march", required_argument, NULL, OPTION_MARCH},
13275   {"mtune", required_argument, NULL, OPTION_MTUNE},
13276   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
13277   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
13278   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
13279   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
13280   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
13281   {"muse-unaligned-vector-move", no_argument, NULL, OPTION_MUSE_UNALIGNED_VECTOR_MOVE},
13282   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
13283   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
13284   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
13285   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
13286   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
13287   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
13288   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
13289 # if defined (TE_PE) || defined (TE_PEP)
13290   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
13291 #endif
13292   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
13293   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
13294   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
13295   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
13296   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
13297   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
13298   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
13299   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
13300   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
13301   {"mlfence-before-indirect-branch", required_argument, NULL,
13302    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
13303   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
13304   {"mamd64", no_argument, NULL, OPTION_MAMD64},
13305   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
13306   {NULL, no_argument, NULL, 0}
13307 };
13308 size_t md_longopts_size = sizeof (md_longopts);
13309
13310 int
13311 md_parse_option (int c, const char *arg)
13312 {
13313   unsigned int j;
13314   char *arch, *next, *saved, *type;
13315
13316   switch (c)
13317     {
13318     case 'n':
13319       optimize_align_code = 0;
13320       break;
13321
13322     case 'q':
13323       quiet_warnings = 1;
13324       break;
13325
13326 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13327       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
13328          should be emitted or not.  FIXME: Not implemented.  */
13329     case 'Q':
13330       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
13331         return 0;
13332       break;
13333
13334       /* -V: SVR4 argument to print version ID.  */
13335     case 'V':
13336       print_version_id ();
13337       break;
13338
13339       /* -k: Ignore for FreeBSD compatibility.  */
13340     case 'k':
13341       break;
13342
13343     case 's':
13344       /* -s: On i386 Solaris, this tells the native assembler to use
13345          .stab instead of .stab.excl.  We always use .stab anyhow.  */
13346       break;
13347
13348     case OPTION_MSHARED:
13349       shared = 1;
13350       break;
13351
13352     case OPTION_X86_USED_NOTE:
13353       if (strcasecmp (arg, "yes") == 0)
13354         x86_used_note = 1;
13355       else if (strcasecmp (arg, "no") == 0)
13356         x86_used_note = 0;
13357       else
13358         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
13359       break;
13360
13361
13362 #endif
13363 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13364      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13365     case OPTION_64:
13366       {
13367         const char **list, **l;
13368
13369         list = bfd_target_list ();
13370         for (l = list; *l != NULL; l++)
13371           if (startswith (*l, "elf64-x86-64")
13372               || strcmp (*l, "coff-x86-64") == 0
13373               || strcmp (*l, "pe-x86-64") == 0
13374               || strcmp (*l, "pei-x86-64") == 0
13375               || strcmp (*l, "mach-o-x86-64") == 0)
13376             {
13377               default_arch = "x86_64";
13378               break;
13379             }
13380         if (*l == NULL)
13381           as_fatal (_("no compiled in support for x86_64"));
13382         free (list);
13383       }
13384       break;
13385 #endif
13386
13387 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13388     case OPTION_X32:
13389       if (IS_ELF)
13390         {
13391           const char **list, **l;
13392
13393           list = bfd_target_list ();
13394           for (l = list; *l != NULL; l++)
13395             if (startswith (*l, "elf32-x86-64"))
13396               {
13397                 default_arch = "x86_64:32";
13398                 break;
13399               }
13400           if (*l == NULL)
13401             as_fatal (_("no compiled in support for 32bit x86_64"));
13402           free (list);
13403         }
13404       else
13405         as_fatal (_("32bit x86_64 is only supported for ELF"));
13406       break;
13407 #endif
13408
13409     case OPTION_32:
13410       default_arch = "i386";
13411       break;
13412
13413     case OPTION_DIVIDE:
13414 #ifdef SVR4_COMMENT_CHARS
13415       {
13416         char *n, *t;
13417         const char *s;
13418
13419         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
13420         t = n;
13421         for (s = i386_comment_chars; *s != '\0'; s++)
13422           if (*s != '/')
13423             *t++ = *s;
13424         *t = '\0';
13425         i386_comment_chars = n;
13426       }
13427 #endif
13428       break;
13429
13430     case OPTION_MARCH:
13431       saved = xstrdup (arg);
13432       arch = saved;
13433       /* Allow -march=+nosse.  */
13434       if (*arch == '+')
13435         arch++;
13436       do
13437         {
13438           if (*arch == '.')
13439             as_fatal (_("invalid -march= option: `%s'"), arg);
13440           next = strchr (arch, '+');
13441           if (next)
13442             *next++ = '\0';
13443           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13444             {
13445               if (arch == saved && cpu_arch[j].type != PROCESSOR_NONE
13446                   && strcmp (arch, cpu_arch[j].name) == 0)
13447                 {
13448                   /* Processor.  */
13449                   if (! cpu_arch[j].enable.bitfield.cpui386)
13450                     continue;
13451
13452                   cpu_arch_name = cpu_arch[j].name;
13453                   free (cpu_sub_arch_name);
13454                   cpu_sub_arch_name = NULL;
13455                   cpu_arch_flags = cpu_arch[j].enable;
13456                   cpu_arch_isa = cpu_arch[j].type;
13457                   cpu_arch_isa_flags = cpu_arch[j].enable;
13458                   if (!cpu_arch_tune_set)
13459                     {
13460                       cpu_arch_tune = cpu_arch_isa;
13461                       cpu_arch_tune_flags = cpu_arch_isa_flags;
13462                     }
13463                   break;
13464                 }
13465               else if (cpu_arch[j].type == PROCESSOR_NONE
13466                        && strcmp (arch, cpu_arch[j].name) == 0
13467                        && !cpu_flags_all_zero (&cpu_arch[j].enable))
13468                 {
13469                   /* ISA extension.  */
13470                   i386_cpu_flags flags;
13471
13472                   flags = cpu_flags_or (cpu_arch_flags,
13473                                         cpu_arch[j].enable);
13474
13475                   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13476                     {
13477                       extend_cpu_sub_arch_name (arch);
13478                       cpu_arch_flags = flags;
13479                       cpu_arch_isa_flags = flags;
13480                     }
13481                   else
13482                     cpu_arch_isa_flags
13483                       = cpu_flags_or (cpu_arch_isa_flags,
13484                                       cpu_arch[j].enable);
13485                   break;
13486                 }
13487             }
13488
13489           if (j >= ARRAY_SIZE (cpu_arch) && startswith (arch, "no"))
13490             {
13491               /* Disable an ISA extension.  */
13492               for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13493                 if (cpu_arch[j].type == PROCESSOR_NONE
13494                     && strcmp (arch + 2, cpu_arch[j].name) == 0)
13495                   {
13496                     i386_cpu_flags flags;
13497
13498                     flags = cpu_flags_and_not (cpu_arch_flags,
13499                                                cpu_arch[j].disable);
13500                     if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13501                       {
13502                         extend_cpu_sub_arch_name (arch);
13503                         cpu_arch_flags = flags;
13504                         cpu_arch_isa_flags = flags;
13505                       }
13506                     break;
13507                   }
13508             }
13509
13510           if (j >= ARRAY_SIZE (cpu_arch))
13511             as_fatal (_("invalid -march= option: `%s'"), arg);
13512
13513           arch = next;
13514         }
13515       while (next != NULL);
13516       free (saved);
13517       break;
13518
13519     case OPTION_MTUNE:
13520       if (*arg == '.')
13521         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13522       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13523         {
13524           if (cpu_arch[j].type != PROCESSOR_NONE
13525               && strcmp (arg, cpu_arch[j].name) == 0)
13526             {
13527               cpu_arch_tune_set = 1;
13528               cpu_arch_tune = cpu_arch [j].type;
13529               cpu_arch_tune_flags = cpu_arch[j].enable;
13530               break;
13531             }
13532         }
13533       if (j >= ARRAY_SIZE (cpu_arch))
13534         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13535       break;
13536
13537     case OPTION_MMNEMONIC:
13538       if (strcasecmp (arg, "att") == 0)
13539         intel_mnemonic = 0;
13540       else if (strcasecmp (arg, "intel") == 0)
13541         intel_mnemonic = 1;
13542       else
13543         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
13544       break;
13545
13546     case OPTION_MSYNTAX:
13547       if (strcasecmp (arg, "att") == 0)
13548         intel_syntax = 0;
13549       else if (strcasecmp (arg, "intel") == 0)
13550         intel_syntax = 1;
13551       else
13552         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
13553       break;
13554
13555     case OPTION_MINDEX_REG:
13556       allow_index_reg = 1;
13557       break;
13558
13559     case OPTION_MNAKED_REG:
13560       allow_naked_reg = 1;
13561       break;
13562
13563     case OPTION_MSSE2AVX:
13564       sse2avx = 1;
13565       break;
13566
13567     case OPTION_MUSE_UNALIGNED_VECTOR_MOVE:
13568       use_unaligned_vector_move = 1;
13569       break;
13570
13571     case OPTION_MSSE_CHECK:
13572       if (strcasecmp (arg, "error") == 0)
13573         sse_check = check_error;
13574       else if (strcasecmp (arg, "warning") == 0)
13575         sse_check = check_warning;
13576       else if (strcasecmp (arg, "none") == 0)
13577         sse_check = check_none;
13578       else
13579         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
13580       break;
13581
13582     case OPTION_MOPERAND_CHECK:
13583       if (strcasecmp (arg, "error") == 0)
13584         operand_check = check_error;
13585       else if (strcasecmp (arg, "warning") == 0)
13586         operand_check = check_warning;
13587       else if (strcasecmp (arg, "none") == 0)
13588         operand_check = check_none;
13589       else
13590         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
13591       break;
13592
13593     case OPTION_MAVXSCALAR:
13594       if (strcasecmp (arg, "128") == 0)
13595         avxscalar = vex128;
13596       else if (strcasecmp (arg, "256") == 0)
13597         avxscalar = vex256;
13598       else
13599         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
13600       break;
13601
13602     case OPTION_MVEXWIG:
13603       if (strcmp (arg, "0") == 0)
13604         vexwig = vexw0;
13605       else if (strcmp (arg, "1") == 0)
13606         vexwig = vexw1;
13607       else
13608         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
13609       break;
13610
13611     case OPTION_MADD_BND_PREFIX:
13612       add_bnd_prefix = 1;
13613       break;
13614
13615     case OPTION_MEVEXLIG:
13616       if (strcmp (arg, "128") == 0)
13617         evexlig = evexl128;
13618       else if (strcmp (arg, "256") == 0)
13619         evexlig = evexl256;
13620       else  if (strcmp (arg, "512") == 0)
13621         evexlig = evexl512;
13622       else
13623         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
13624       break;
13625
13626     case OPTION_MEVEXRCIG:
13627       if (strcmp (arg, "rne") == 0)
13628         evexrcig = rne;
13629       else if (strcmp (arg, "rd") == 0)
13630         evexrcig = rd;
13631       else if (strcmp (arg, "ru") == 0)
13632         evexrcig = ru;
13633       else if (strcmp (arg, "rz") == 0)
13634         evexrcig = rz;
13635       else
13636         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
13637       break;
13638
13639     case OPTION_MEVEXWIG:
13640       if (strcmp (arg, "0") == 0)
13641         evexwig = evexw0;
13642       else if (strcmp (arg, "1") == 0)
13643         evexwig = evexw1;
13644       else
13645         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
13646       break;
13647
13648 # if defined (TE_PE) || defined (TE_PEP)
13649     case OPTION_MBIG_OBJ:
13650       use_big_obj = 1;
13651       break;
13652 #endif
13653
13654     case OPTION_MOMIT_LOCK_PREFIX:
13655       if (strcasecmp (arg, "yes") == 0)
13656         omit_lock_prefix = 1;
13657       else if (strcasecmp (arg, "no") == 0)
13658         omit_lock_prefix = 0;
13659       else
13660         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
13661       break;
13662
13663     case OPTION_MFENCE_AS_LOCK_ADD:
13664       if (strcasecmp (arg, "yes") == 0)
13665         avoid_fence = 1;
13666       else if (strcasecmp (arg, "no") == 0)
13667         avoid_fence = 0;
13668       else
13669         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
13670       break;
13671
13672     case OPTION_MLFENCE_AFTER_LOAD:
13673       if (strcasecmp (arg, "yes") == 0)
13674         lfence_after_load = 1;
13675       else if (strcasecmp (arg, "no") == 0)
13676         lfence_after_load = 0;
13677       else
13678         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
13679       break;
13680
13681     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
13682       if (strcasecmp (arg, "all") == 0)
13683         {
13684           lfence_before_indirect_branch = lfence_branch_all;
13685           if (lfence_before_ret == lfence_before_ret_none)
13686             lfence_before_ret = lfence_before_ret_shl;
13687         }
13688       else if (strcasecmp (arg, "memory") == 0)
13689         lfence_before_indirect_branch = lfence_branch_memory;
13690       else if (strcasecmp (arg, "register") == 0)
13691         lfence_before_indirect_branch = lfence_branch_register;
13692       else if (strcasecmp (arg, "none") == 0)
13693         lfence_before_indirect_branch = lfence_branch_none;
13694       else
13695         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
13696                   arg);
13697       break;
13698
13699     case OPTION_MLFENCE_BEFORE_RET:
13700       if (strcasecmp (arg, "or") == 0)
13701         lfence_before_ret = lfence_before_ret_or;
13702       else if (strcasecmp (arg, "not") == 0)
13703         lfence_before_ret = lfence_before_ret_not;
13704       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
13705         lfence_before_ret = lfence_before_ret_shl;
13706       else if (strcasecmp (arg, "none") == 0)
13707         lfence_before_ret = lfence_before_ret_none;
13708       else
13709         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
13710                   arg);
13711       break;
13712
13713     case OPTION_MRELAX_RELOCATIONS:
13714       if (strcasecmp (arg, "yes") == 0)
13715         generate_relax_relocations = 1;
13716       else if (strcasecmp (arg, "no") == 0)
13717         generate_relax_relocations = 0;
13718       else
13719         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
13720       break;
13721
13722     case OPTION_MALIGN_BRANCH_BOUNDARY:
13723       {
13724         char *end;
13725         long int align = strtoul (arg, &end, 0);
13726         if (*end == '\0')
13727           {
13728             if (align == 0)
13729               {
13730                 align_branch_power = 0;
13731                 break;
13732               }
13733             else if (align >= 16)
13734               {
13735                 int align_power;
13736                 for (align_power = 0;
13737                      (align & 1) == 0;
13738                      align >>= 1, align_power++)
13739                   continue;
13740                 /* Limit alignment power to 31.  */
13741                 if (align == 1 && align_power < 32)
13742                   {
13743                     align_branch_power = align_power;
13744                     break;
13745                   }
13746               }
13747           }
13748         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
13749       }
13750       break;
13751
13752     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
13753       {
13754         char *end;
13755         int align = strtoul (arg, &end, 0);
13756         /* Some processors only support 5 prefixes.  */
13757         if (*end == '\0' && align >= 0 && align < 6)
13758           {
13759             align_branch_prefix_size = align;
13760             break;
13761           }
13762         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
13763                   arg);
13764       }
13765       break;
13766
13767     case OPTION_MALIGN_BRANCH:
13768       align_branch = 0;
13769       saved = xstrdup (arg);
13770       type = saved;
13771       do
13772         {
13773           next = strchr (type, '+');
13774           if (next)
13775             *next++ = '\0';
13776           if (strcasecmp (type, "jcc") == 0)
13777             align_branch |= align_branch_jcc_bit;
13778           else if (strcasecmp (type, "fused") == 0)
13779             align_branch |= align_branch_fused_bit;
13780           else if (strcasecmp (type, "jmp") == 0)
13781             align_branch |= align_branch_jmp_bit;
13782           else if (strcasecmp (type, "call") == 0)
13783             align_branch |= align_branch_call_bit;
13784           else if (strcasecmp (type, "ret") == 0)
13785             align_branch |= align_branch_ret_bit;
13786           else if (strcasecmp (type, "indirect") == 0)
13787             align_branch |= align_branch_indirect_bit;
13788           else
13789             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
13790           type = next;
13791         }
13792       while (next != NULL);
13793       free (saved);
13794       break;
13795
13796     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
13797       align_branch_power = 5;
13798       align_branch_prefix_size = 5;
13799       align_branch = (align_branch_jcc_bit
13800                       | align_branch_fused_bit
13801                       | align_branch_jmp_bit);
13802       break;
13803
13804     case OPTION_MAMD64:
13805       isa64 = amd64;
13806       break;
13807
13808     case OPTION_MINTEL64:
13809       isa64 = intel64;
13810       break;
13811
13812     case 'O':
13813       if (arg == NULL)
13814         {
13815           optimize = 1;
13816           /* Turn off -Os.  */
13817           optimize_for_space = 0;
13818         }
13819       else if (*arg == 's')
13820         {
13821           optimize_for_space = 1;
13822           /* Turn on all encoding optimizations.  */
13823           optimize = INT_MAX;
13824         }
13825       else
13826         {
13827           optimize = atoi (arg);
13828           /* Turn off -Os.  */
13829           optimize_for_space = 0;
13830         }
13831       break;
13832
13833     default:
13834       return 0;
13835     }
13836   return 1;
13837 }
13838
13839 #define MESSAGE_TEMPLATE \
13840 "                                                                                "
13841
13842 static char *
13843 output_message (FILE *stream, char *p, char *message, char *start,
13844                 int *left_p, const char *name, int len)
13845 {
13846   int size = sizeof (MESSAGE_TEMPLATE);
13847   int left = *left_p;
13848
13849   /* Reserve 2 spaces for ", " or ",\0" */
13850   left -= len + 2;
13851
13852   /* Check if there is any room.  */
13853   if (left >= 0)
13854     {
13855       if (p != start)
13856         {
13857           *p++ = ',';
13858           *p++ = ' ';
13859         }
13860       p = mempcpy (p, name, len);
13861     }
13862   else
13863     {
13864       /* Output the current message now and start a new one.  */
13865       *p++ = ',';
13866       *p = '\0';
13867       fprintf (stream, "%s\n", message);
13868       p = start;
13869       left = size - (start - message) - len - 2;
13870
13871       gas_assert (left >= 0);
13872
13873       p = mempcpy (p, name, len);
13874     }
13875
13876   *left_p = left;
13877   return p;
13878 }
13879
13880 static void
13881 show_arch (FILE *stream, int ext, int check)
13882 {
13883   static char message[] = MESSAGE_TEMPLATE;
13884   char *start = message + 27;
13885   char *p;
13886   int size = sizeof (MESSAGE_TEMPLATE);
13887   int left;
13888   const char *name;
13889   int len;
13890   unsigned int j;
13891
13892   p = start;
13893   left = size - (start - message);
13894
13895   if (!ext && check)
13896     {
13897       p = output_message (stream, p, message, start, &left,
13898                           STRING_COMMA_LEN ("default"));
13899       p = output_message (stream, p, message, start, &left,
13900                           STRING_COMMA_LEN ("push"));
13901       p = output_message (stream, p, message, start, &left,
13902                           STRING_COMMA_LEN ("pop"));
13903     }
13904
13905   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13906     {
13907       /* Should it be skipped?  */
13908       if (cpu_arch [j].skip)
13909         continue;
13910
13911       name = cpu_arch [j].name;
13912       len = cpu_arch [j].len;
13913       if (cpu_arch[j].type == PROCESSOR_NONE)
13914         {
13915           /* It is an extension.  Skip if we aren't asked to show it.  */
13916           if (!ext || cpu_flags_all_zero (&cpu_arch[j].enable))
13917             continue;
13918         }
13919       else if (ext)
13920         {
13921           /* It is an processor.  Skip if we show only extension.  */
13922           continue;
13923         }
13924       else if (check && ! cpu_arch[j].enable.bitfield.cpui386)
13925         {
13926           /* It is an impossible processor - skip.  */
13927           continue;
13928         }
13929
13930       p = output_message (stream, p, message, start, &left, name, len);
13931     }
13932
13933   /* Display disabled extensions.  */
13934   if (ext)
13935     for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13936       {
13937         char *str;
13938
13939         if (cpu_arch[j].type != PROCESSOR_NONE
13940             || !cpu_flags_all_zero (&cpu_arch[j].enable))
13941           continue;
13942         str = xasprintf ("no%s", cpu_arch[j].name);
13943         p = output_message (stream, p, message, start, &left, str,
13944                             strlen (str));
13945         free (str);
13946       }
13947
13948   *p = '\0';
13949   fprintf (stream, "%s\n", message);
13950 }
13951
13952 void
13953 md_show_usage (FILE *stream)
13954 {
13955 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13956   fprintf (stream, _("\
13957   -Qy, -Qn                ignored\n\
13958   -V                      print assembler version number\n\
13959   -k                      ignored\n"));
13960 #endif
13961   fprintf (stream, _("\
13962   -n                      do not optimize code alignment\n\
13963   -O{012s}                attempt some code optimizations\n\
13964   -q                      quieten some warnings\n"));
13965 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13966   fprintf (stream, _("\
13967   -s                      ignored\n"));
13968 #endif
13969 #ifdef BFD64
13970 # if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13971   fprintf (stream, _("\
13972   --32/--64/--x32         generate 32bit/64bit/x32 object\n"));
13973 # elif defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O)
13974   fprintf (stream, _("\
13975   --32/--64               generate 32bit/64bit object\n"));
13976 # endif
13977 #endif
13978 #ifdef SVR4_COMMENT_CHARS
13979   fprintf (stream, _("\
13980   --divide                do not treat `/' as a comment character\n"));
13981 #else
13982   fprintf (stream, _("\
13983   --divide                ignored\n"));
13984 #endif
13985   fprintf (stream, _("\
13986   -march=CPU[,+EXTENSION...]\n\
13987                           generate code for CPU and EXTENSION, CPU is one of:\n"));
13988   show_arch (stream, 0, 1);
13989   fprintf (stream, _("\
13990                           EXTENSION is combination of (possibly \"no\"-prefixed):\n"));
13991   show_arch (stream, 1, 0);
13992   fprintf (stream, _("\
13993   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
13994   show_arch (stream, 0, 0);
13995   fprintf (stream, _("\
13996   -msse2avx               encode SSE instructions with VEX prefix\n"));
13997   fprintf (stream, _("\
13998   -muse-unaligned-vector-move\n\
13999                           encode aligned vector move as unaligned vector move\n"));
14000   fprintf (stream, _("\
14001   -msse-check=[none|error|warning] (default: warning)\n\
14002                           check SSE instructions\n"));
14003   fprintf (stream, _("\
14004   -moperand-check=[none|error|warning] (default: warning)\n\
14005                           check operand combinations for validity\n"));
14006   fprintf (stream, _("\
14007   -mavxscalar=[128|256] (default: 128)\n\
14008                           encode scalar AVX instructions with specific vector\n\
14009                            length\n"));
14010   fprintf (stream, _("\
14011   -mvexwig=[0|1] (default: 0)\n\
14012                           encode VEX instructions with specific VEX.W value\n\
14013                            for VEX.W bit ignored instructions\n"));
14014   fprintf (stream, _("\
14015   -mevexlig=[128|256|512] (default: 128)\n\
14016                           encode scalar EVEX instructions with specific vector\n\
14017                            length\n"));
14018   fprintf (stream, _("\
14019   -mevexwig=[0|1] (default: 0)\n\
14020                           encode EVEX instructions with specific EVEX.W value\n\
14021                            for EVEX.W bit ignored instructions\n"));
14022   fprintf (stream, _("\
14023   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
14024                           encode EVEX instructions with specific EVEX.RC value\n\
14025                            for SAE-only ignored instructions\n"));
14026   fprintf (stream, _("\
14027   -mmnemonic=[att|intel] "));
14028   if (SYSV386_COMPAT)
14029     fprintf (stream, _("(default: att)\n"));
14030   else
14031     fprintf (stream, _("(default: intel)\n"));
14032   fprintf (stream, _("\
14033                           use AT&T/Intel mnemonic\n"));
14034   fprintf (stream, _("\
14035   -msyntax=[att|intel] (default: att)\n\
14036                           use AT&T/Intel syntax\n"));
14037   fprintf (stream, _("\
14038   -mindex-reg             support pseudo index registers\n"));
14039   fprintf (stream, _("\
14040   -mnaked-reg             don't require `%%' prefix for registers\n"));
14041   fprintf (stream, _("\
14042   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
14043 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14044   fprintf (stream, _("\
14045   -mshared                disable branch optimization for shared code\n"));
14046   fprintf (stream, _("\
14047   -mx86-used-note=[no|yes] "));
14048   if (DEFAULT_X86_USED_NOTE)
14049     fprintf (stream, _("(default: yes)\n"));
14050   else
14051     fprintf (stream, _("(default: no)\n"));
14052   fprintf (stream, _("\
14053                           generate x86 used ISA and feature properties\n"));
14054 #endif
14055 #if defined (TE_PE) || defined (TE_PEP)
14056   fprintf (stream, _("\
14057   -mbig-obj               generate big object files\n"));
14058 #endif
14059   fprintf (stream, _("\
14060   -momit-lock-prefix=[no|yes] (default: no)\n\
14061                           strip all lock prefixes\n"));
14062   fprintf (stream, _("\
14063   -mfence-as-lock-add=[no|yes] (default: no)\n\
14064                           encode lfence, mfence and sfence as\n\
14065                            lock addl $0x0, (%%{re}sp)\n"));
14066   fprintf (stream, _("\
14067   -mrelax-relocations=[no|yes] "));
14068   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
14069     fprintf (stream, _("(default: yes)\n"));
14070   else
14071     fprintf (stream, _("(default: no)\n"));
14072   fprintf (stream, _("\
14073                           generate relax relocations\n"));
14074   fprintf (stream, _("\
14075   -malign-branch-boundary=NUM (default: 0)\n\
14076                           align branches within NUM byte boundary\n"));
14077   fprintf (stream, _("\
14078   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
14079                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
14080                            indirect\n\
14081                           specify types of branches to align\n"));
14082   fprintf (stream, _("\
14083   -malign-branch-prefix-size=NUM (default: 5)\n\
14084                           align branches with NUM prefixes per instruction\n"));
14085   fprintf (stream, _("\
14086   -mbranches-within-32B-boundaries\n\
14087                           align branches within 32 byte boundary\n"));
14088   fprintf (stream, _("\
14089   -mlfence-after-load=[no|yes] (default: no)\n\
14090                           generate lfence after load\n"));
14091   fprintf (stream, _("\
14092   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
14093                           generate lfence before indirect near branch\n"));
14094   fprintf (stream, _("\
14095   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
14096                           generate lfence before ret\n"));
14097   fprintf (stream, _("\
14098   -mamd64                 accept only AMD64 ISA [default]\n"));
14099   fprintf (stream, _("\
14100   -mintel64               accept only Intel64 ISA\n"));
14101 }
14102
14103 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
14104      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
14105      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
14106
14107 /* Pick the target format to use.  */
14108
14109 const char *
14110 i386_target_format (void)
14111 {
14112   if (startswith (default_arch, "x86_64"))
14113     {
14114       update_code_flag (CODE_64BIT, 1);
14115       if (default_arch[6] == '\0')
14116         x86_elf_abi = X86_64_ABI;
14117       else
14118         x86_elf_abi = X86_64_X32_ABI;
14119     }
14120   else if (!strcmp (default_arch, "i386"))
14121     update_code_flag (CODE_32BIT, 1);
14122   else if (!strcmp (default_arch, "iamcu"))
14123     {
14124       update_code_flag (CODE_32BIT, 1);
14125       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
14126         {
14127           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
14128           cpu_arch_name = "iamcu";
14129           free (cpu_sub_arch_name);
14130           cpu_sub_arch_name = NULL;
14131           cpu_arch_flags = iamcu_flags;
14132           cpu_arch_isa = PROCESSOR_IAMCU;
14133           cpu_arch_isa_flags = iamcu_flags;
14134           if (!cpu_arch_tune_set)
14135             {
14136               cpu_arch_tune = cpu_arch_isa;
14137               cpu_arch_tune_flags = cpu_arch_isa_flags;
14138             }
14139         }
14140       else if (cpu_arch_isa != PROCESSOR_IAMCU)
14141         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
14142                   cpu_arch_name);
14143     }
14144   else
14145     as_fatal (_("unknown architecture"));
14146
14147   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
14148     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
14149   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
14150     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].enable;
14151
14152   switch (OUTPUT_FLAVOR)
14153     {
14154 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
14155     case bfd_target_aout_flavour:
14156       return AOUT_TARGET_FORMAT;
14157 #endif
14158 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
14159 # if defined (TE_PE) || defined (TE_PEP)
14160     case bfd_target_coff_flavour:
14161       if (flag_code == CODE_64BIT)
14162         {
14163           object_64bit = 1;
14164           return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
14165         }
14166       return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
14167 # elif defined (TE_GO32)
14168     case bfd_target_coff_flavour:
14169       return "coff-go32";
14170 # else
14171     case bfd_target_coff_flavour:
14172       return "coff-i386";
14173 # endif
14174 #endif
14175 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14176     case bfd_target_elf_flavour:
14177       {
14178         const char *format;
14179
14180         switch (x86_elf_abi)
14181           {
14182           default:
14183             format = ELF_TARGET_FORMAT;
14184 #ifndef TE_SOLARIS
14185             tls_get_addr = "___tls_get_addr";
14186 #endif
14187             break;
14188           case X86_64_ABI:
14189             use_rela_relocations = 1;
14190             object_64bit = 1;
14191 #ifndef TE_SOLARIS
14192             tls_get_addr = "__tls_get_addr";
14193 #endif
14194             format = ELF_TARGET_FORMAT64;
14195             break;
14196           case X86_64_X32_ABI:
14197             use_rela_relocations = 1;
14198             object_64bit = 1;
14199 #ifndef TE_SOLARIS
14200             tls_get_addr = "__tls_get_addr";
14201 #endif
14202             disallow_64bit_reloc = 1;
14203             format = ELF_TARGET_FORMAT32;
14204             break;
14205           }
14206         if (cpu_arch_isa == PROCESSOR_IAMCU)
14207           {
14208             if (x86_elf_abi != I386_ABI)
14209               as_fatal (_("Intel MCU is 32bit only"));
14210             return ELF_TARGET_IAMCU_FORMAT;
14211           }
14212         else
14213           return format;
14214       }
14215 #endif
14216 #if defined (OBJ_MACH_O)
14217     case bfd_target_mach_o_flavour:
14218       if (flag_code == CODE_64BIT)
14219         {
14220           use_rela_relocations = 1;
14221           object_64bit = 1;
14222           return "mach-o-x86-64";
14223         }
14224       else
14225         return "mach-o-i386";
14226 #endif
14227     default:
14228       abort ();
14229       return NULL;
14230     }
14231 }
14232
14233 #endif /* OBJ_MAYBE_ more than one  */
14234 \f
14235 symbolS *
14236 md_undefined_symbol (char *name)
14237 {
14238   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
14239       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
14240       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
14241       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
14242     {
14243       if (!GOT_symbol)
14244         {
14245           if (symbol_find (name))
14246             as_bad (_("GOT already in symbol table"));
14247           GOT_symbol = symbol_new (name, undefined_section,
14248                                    &zero_address_frag, 0);
14249         };
14250       return GOT_symbol;
14251     }
14252   return 0;
14253 }
14254
14255 /* Round up a section size to the appropriate boundary.  */
14256
14257 valueT
14258 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
14259 {
14260 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
14261   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
14262     {
14263       /* For a.out, force the section size to be aligned.  If we don't do
14264          this, BFD will align it for us, but it will not write out the
14265          final bytes of the section.  This may be a bug in BFD, but it is
14266          easier to fix it here since that is how the other a.out targets
14267          work.  */
14268       int align;
14269
14270       align = bfd_section_alignment (segment);
14271       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
14272     }
14273 #endif
14274
14275   return size;
14276 }
14277
14278 /* On the i386, PC-relative offsets are relative to the start of the
14279    next instruction.  That is, the address of the offset, plus its
14280    size, since the offset is always the last part of the insn.  */
14281
14282 long
14283 md_pcrel_from (fixS *fixP)
14284 {
14285   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
14286 }
14287
14288 #ifndef I386COFF
14289
14290 static void
14291 s_bss (int ignore ATTRIBUTE_UNUSED)
14292 {
14293   int temp;
14294
14295 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14296   if (IS_ELF)
14297     obj_elf_section_change_hook ();
14298 #endif
14299   temp = get_absolute_expression ();
14300   subseg_set (bss_section, (subsegT) temp);
14301   demand_empty_rest_of_line ();
14302 }
14303
14304 #endif
14305
14306 /* Remember constant directive.  */
14307
14308 void
14309 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
14310 {
14311   if (last_insn.kind != last_insn_directive
14312       && (bfd_section_flags (now_seg) & SEC_CODE))
14313     {
14314       last_insn.seg = now_seg;
14315       last_insn.kind = last_insn_directive;
14316       last_insn.name = "constant directive";
14317       last_insn.file = as_where (&last_insn.line);
14318       if (lfence_before_ret != lfence_before_ret_none)
14319         {
14320           if (lfence_before_indirect_branch != lfence_branch_none)
14321             as_warn (_("constant directive skips -mlfence-before-ret "
14322                        "and -mlfence-before-indirect-branch"));
14323           else
14324             as_warn (_("constant directive skips -mlfence-before-ret"));
14325         }
14326       else if (lfence_before_indirect_branch != lfence_branch_none)
14327         as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
14328     }
14329 }
14330
14331 int
14332 i386_validate_fix (fixS *fixp)
14333 {
14334   if (fixp->fx_addsy && S_GET_SEGMENT(fixp->fx_addsy) == reg_section)
14335     {
14336       reloc_howto_type *howto;
14337
14338       howto = bfd_reloc_type_lookup (stdoutput, fixp->fx_r_type);
14339       as_bad_where (fixp->fx_file, fixp->fx_line,
14340                     _("invalid %s relocation against register"),
14341                     howto ? howto->name : "<unknown>");
14342       return 0;
14343     }
14344
14345 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14346   if (fixp->fx_r_type == BFD_RELOC_SIZE32
14347       || fixp->fx_r_type == BFD_RELOC_SIZE64)
14348     return IS_ELF && fixp->fx_addsy
14349            && (!S_IS_DEFINED (fixp->fx_addsy)
14350                || S_IS_EXTERNAL (fixp->fx_addsy));
14351 #endif
14352
14353   if (fixp->fx_subsy)
14354     {
14355       if (fixp->fx_subsy == GOT_symbol)
14356         {
14357           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
14358             {
14359               if (!object_64bit)
14360                 abort ();
14361 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14362               if (fixp->fx_tcbit2)
14363                 fixp->fx_r_type = (fixp->fx_tcbit
14364                                    ? BFD_RELOC_X86_64_REX_GOTPCRELX
14365                                    : BFD_RELOC_X86_64_GOTPCRELX);
14366               else
14367 #endif
14368                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
14369             }
14370           else
14371             {
14372               if (!object_64bit)
14373                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
14374               else
14375                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
14376             }
14377           fixp->fx_subsy = 0;
14378         }
14379     }
14380 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14381   else
14382     {
14383       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
14384          to section.  Since PLT32 relocation must be against symbols,
14385          turn such PLT32 relocation into PC32 relocation.  */
14386       if (fixp->fx_addsy
14387           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
14388               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
14389           && symbol_section_p (fixp->fx_addsy))
14390         fixp->fx_r_type = BFD_RELOC_32_PCREL;
14391       if (!object_64bit)
14392         {
14393           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
14394               && fixp->fx_tcbit2)
14395             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
14396         }
14397     }
14398 #endif
14399
14400   return 1;
14401 }
14402
14403 arelent *
14404 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
14405 {
14406   arelent *rel;
14407   bfd_reloc_code_real_type code;
14408
14409   switch (fixp->fx_r_type)
14410     {
14411 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14412       symbolS *sym;
14413
14414     case BFD_RELOC_SIZE32:
14415     case BFD_RELOC_SIZE64:
14416       if (fixp->fx_addsy
14417           && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))
14418           && (!fixp->fx_subsy
14419               || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))))
14420         sym = fixp->fx_addsy;
14421       else if (fixp->fx_subsy
14422                && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))
14423                && (!fixp->fx_addsy
14424                    || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))))
14425         sym = fixp->fx_subsy;
14426       else
14427         sym = NULL;
14428       if (IS_ELF && sym && S_IS_DEFINED (sym) && !S_IS_EXTERNAL (sym))
14429         {
14430           /* Resolve size relocation against local symbol to size of
14431              the symbol plus addend.  */
14432           valueT value = S_GET_SIZE (sym);
14433
14434           if (symbol_get_bfdsym (sym)->flags & BSF_SECTION_SYM)
14435             value = bfd_section_size (S_GET_SEGMENT (sym));
14436           if (sym == fixp->fx_subsy)
14437             {
14438               value = -value;
14439               if (fixp->fx_addsy)
14440                 value += S_GET_VALUE (fixp->fx_addsy);
14441             }
14442           else if (fixp->fx_subsy)
14443             value -= S_GET_VALUE (fixp->fx_subsy);
14444           value += fixp->fx_offset;
14445           if (fixp->fx_r_type == BFD_RELOC_SIZE32
14446               && object_64bit
14447               && !fits_in_unsigned_long (value))
14448             as_bad_where (fixp->fx_file, fixp->fx_line,
14449                           _("symbol size computation overflow"));
14450           fixp->fx_addsy = NULL;
14451           fixp->fx_subsy = NULL;
14452           md_apply_fix (fixp, (valueT *) &value, NULL);
14453           return NULL;
14454         }
14455       if (!fixp->fx_addsy || fixp->fx_subsy)
14456         {
14457           as_bad_where (fixp->fx_file, fixp->fx_line,
14458                         "unsupported expression involving @size");
14459           return NULL;
14460         }
14461 #endif
14462       /* Fall through.  */
14463
14464     case BFD_RELOC_X86_64_PLT32:
14465     case BFD_RELOC_X86_64_GOT32:
14466     case BFD_RELOC_X86_64_GOTPCREL:
14467     case BFD_RELOC_X86_64_GOTPCRELX:
14468     case BFD_RELOC_X86_64_REX_GOTPCRELX:
14469     case BFD_RELOC_386_PLT32:
14470     case BFD_RELOC_386_GOT32:
14471     case BFD_RELOC_386_GOT32X:
14472     case BFD_RELOC_386_GOTOFF:
14473     case BFD_RELOC_386_GOTPC:
14474     case BFD_RELOC_386_TLS_GD:
14475     case BFD_RELOC_386_TLS_LDM:
14476     case BFD_RELOC_386_TLS_LDO_32:
14477     case BFD_RELOC_386_TLS_IE_32:
14478     case BFD_RELOC_386_TLS_IE:
14479     case BFD_RELOC_386_TLS_GOTIE:
14480     case BFD_RELOC_386_TLS_LE_32:
14481     case BFD_RELOC_386_TLS_LE:
14482     case BFD_RELOC_386_TLS_GOTDESC:
14483     case BFD_RELOC_386_TLS_DESC_CALL:
14484     case BFD_RELOC_X86_64_TLSGD:
14485     case BFD_RELOC_X86_64_TLSLD:
14486     case BFD_RELOC_X86_64_DTPOFF32:
14487     case BFD_RELOC_X86_64_DTPOFF64:
14488     case BFD_RELOC_X86_64_GOTTPOFF:
14489     case BFD_RELOC_X86_64_TPOFF32:
14490     case BFD_RELOC_X86_64_TPOFF64:
14491     case BFD_RELOC_X86_64_GOTOFF64:
14492     case BFD_RELOC_X86_64_GOTPC32:
14493     case BFD_RELOC_X86_64_GOT64:
14494     case BFD_RELOC_X86_64_GOTPCREL64:
14495     case BFD_RELOC_X86_64_GOTPC64:
14496     case BFD_RELOC_X86_64_GOTPLT64:
14497     case BFD_RELOC_X86_64_PLTOFF64:
14498     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14499     case BFD_RELOC_X86_64_TLSDESC_CALL:
14500     case BFD_RELOC_RVA:
14501     case BFD_RELOC_VTABLE_ENTRY:
14502     case BFD_RELOC_VTABLE_INHERIT:
14503 #ifdef TE_PE
14504     case BFD_RELOC_32_SECREL:
14505     case BFD_RELOC_16_SECIDX:
14506 #endif
14507       code = fixp->fx_r_type;
14508       break;
14509     case BFD_RELOC_X86_64_32S:
14510       if (!fixp->fx_pcrel)
14511         {
14512           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
14513           code = fixp->fx_r_type;
14514           break;
14515         }
14516       /* Fall through.  */
14517     default:
14518       if (fixp->fx_pcrel)
14519         {
14520           switch (fixp->fx_size)
14521             {
14522             default:
14523               as_bad_where (fixp->fx_file, fixp->fx_line,
14524                             _("can not do %d byte pc-relative relocation"),
14525                             fixp->fx_size);
14526               code = BFD_RELOC_32_PCREL;
14527               break;
14528             case 1: code = BFD_RELOC_8_PCREL;  break;
14529             case 2: code = BFD_RELOC_16_PCREL; break;
14530             case 4: code = BFD_RELOC_32_PCREL; break;
14531 #ifdef BFD64
14532             case 8: code = BFD_RELOC_64_PCREL; break;
14533 #endif
14534             }
14535         }
14536       else
14537         {
14538           switch (fixp->fx_size)
14539             {
14540             default:
14541               as_bad_where (fixp->fx_file, fixp->fx_line,
14542                             _("can not do %d byte relocation"),
14543                             fixp->fx_size);
14544               code = BFD_RELOC_32;
14545               break;
14546             case 1: code = BFD_RELOC_8;  break;
14547             case 2: code = BFD_RELOC_16; break;
14548             case 4: code = BFD_RELOC_32; break;
14549 #ifdef BFD64
14550             case 8: code = BFD_RELOC_64; break;
14551 #endif
14552             }
14553         }
14554       break;
14555     }
14556
14557   if ((code == BFD_RELOC_32
14558        || code == BFD_RELOC_32_PCREL
14559        || code == BFD_RELOC_X86_64_32S)
14560       && GOT_symbol
14561       && fixp->fx_addsy == GOT_symbol)
14562     {
14563       if (!object_64bit)
14564         code = BFD_RELOC_386_GOTPC;
14565       else
14566         code = BFD_RELOC_X86_64_GOTPC32;
14567     }
14568   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
14569       && GOT_symbol
14570       && fixp->fx_addsy == GOT_symbol)
14571     {
14572       code = BFD_RELOC_X86_64_GOTPC64;
14573     }
14574
14575   rel = XNEW (arelent);
14576   rel->sym_ptr_ptr = XNEW (asymbol *);
14577   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
14578
14579   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
14580
14581   if (!use_rela_relocations)
14582     {
14583       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
14584          vtable entry to be used in the relocation's section offset.  */
14585       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
14586         rel->address = fixp->fx_offset;
14587 #if defined (OBJ_COFF) && defined (TE_PE)
14588       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
14589         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
14590       else
14591 #endif
14592       rel->addend = 0;
14593     }
14594   /* Use the rela in 64bit mode.  */
14595   else
14596     {
14597       if (disallow_64bit_reloc)
14598         switch (code)
14599           {
14600           case BFD_RELOC_X86_64_DTPOFF64:
14601           case BFD_RELOC_X86_64_TPOFF64:
14602           case BFD_RELOC_64_PCREL:
14603           case BFD_RELOC_X86_64_GOTOFF64:
14604           case BFD_RELOC_X86_64_GOT64:
14605           case BFD_RELOC_X86_64_GOTPCREL64:
14606           case BFD_RELOC_X86_64_GOTPC64:
14607           case BFD_RELOC_X86_64_GOTPLT64:
14608           case BFD_RELOC_X86_64_PLTOFF64:
14609             as_bad_where (fixp->fx_file, fixp->fx_line,
14610                           _("cannot represent relocation type %s in x32 mode"),
14611                           bfd_get_reloc_code_name (code));
14612             break;
14613           default:
14614             break;
14615           }
14616
14617       if (!fixp->fx_pcrel)
14618         rel->addend = fixp->fx_offset;
14619       else
14620         switch (code)
14621           {
14622           case BFD_RELOC_X86_64_PLT32:
14623           case BFD_RELOC_X86_64_GOT32:
14624           case BFD_RELOC_X86_64_GOTPCREL:
14625           case BFD_RELOC_X86_64_GOTPCRELX:
14626           case BFD_RELOC_X86_64_REX_GOTPCRELX:
14627           case BFD_RELOC_X86_64_TLSGD:
14628           case BFD_RELOC_X86_64_TLSLD:
14629           case BFD_RELOC_X86_64_GOTTPOFF:
14630           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14631           case BFD_RELOC_X86_64_TLSDESC_CALL:
14632             rel->addend = fixp->fx_offset - fixp->fx_size;
14633             break;
14634           default:
14635             rel->addend = (section->vma
14636                            - fixp->fx_size
14637                            + fixp->fx_addnumber
14638                            + md_pcrel_from (fixp));
14639             break;
14640           }
14641     }
14642
14643   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
14644   if (rel->howto == NULL)
14645     {
14646       as_bad_where (fixp->fx_file, fixp->fx_line,
14647                     _("cannot represent relocation type %s"),
14648                     bfd_get_reloc_code_name (code));
14649       /* Set howto to a garbage value so that we can keep going.  */
14650       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
14651       gas_assert (rel->howto != NULL);
14652     }
14653
14654   return rel;
14655 }
14656
14657 #include "tc-i386-intel.c"
14658
14659 void
14660 tc_x86_parse_to_dw2regnum (expressionS *exp)
14661 {
14662   int saved_naked_reg;
14663   char saved_register_dot;
14664
14665   saved_naked_reg = allow_naked_reg;
14666   allow_naked_reg = 1;
14667   saved_register_dot = register_chars['.'];
14668   register_chars['.'] = '.';
14669   allow_pseudo_reg = 1;
14670   expression_and_evaluate (exp);
14671   allow_pseudo_reg = 0;
14672   register_chars['.'] = saved_register_dot;
14673   allow_naked_reg = saved_naked_reg;
14674
14675   if (exp->X_op == O_register && exp->X_add_number >= 0)
14676     {
14677       if ((addressT) exp->X_add_number < i386_regtab_size)
14678         {
14679           exp->X_op = O_constant;
14680           exp->X_add_number = i386_regtab[exp->X_add_number]
14681                               .dw2_regnum[flag_code >> 1];
14682         }
14683       else
14684         exp->X_op = O_illegal;
14685     }
14686 }
14687
14688 void
14689 tc_x86_frame_initial_instructions (void)
14690 {
14691   static unsigned int sp_regno[2];
14692
14693   if (!sp_regno[flag_code >> 1])
14694     {
14695       char *saved_input = input_line_pointer;
14696       char sp[][4] = {"esp", "rsp"};
14697       expressionS exp;
14698
14699       input_line_pointer = sp[flag_code >> 1];
14700       tc_x86_parse_to_dw2regnum (&exp);
14701       gas_assert (exp.X_op == O_constant);
14702       sp_regno[flag_code >> 1] = exp.X_add_number;
14703       input_line_pointer = saved_input;
14704     }
14705
14706   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
14707   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
14708 }
14709
14710 int
14711 x86_dwarf2_addr_size (void)
14712 {
14713 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14714   if (x86_elf_abi == X86_64_X32_ABI)
14715     return 4;
14716 #endif
14717   return bfd_arch_bits_per_address (stdoutput) / 8;
14718 }
14719
14720 int
14721 i386_elf_section_type (const char *str, size_t len)
14722 {
14723   if (flag_code == CODE_64BIT
14724       && len == sizeof ("unwind") - 1
14725       && startswith (str, "unwind"))
14726     return SHT_X86_64_UNWIND;
14727
14728   return -1;
14729 }
14730
14731 #ifdef TE_SOLARIS
14732 void
14733 i386_solaris_fix_up_eh_frame (segT sec)
14734 {
14735   if (flag_code == CODE_64BIT)
14736     elf_section_type (sec) = SHT_X86_64_UNWIND;
14737 }
14738 #endif
14739
14740 #ifdef TE_PE
14741 void
14742 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
14743 {
14744   expressionS exp;
14745
14746   exp.X_op = O_secrel;
14747   exp.X_add_symbol = symbol;
14748   exp.X_add_number = 0;
14749   emit_expr (&exp, size);
14750 }
14751 #endif
14752
14753 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14754 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
14755
14756 bfd_vma
14757 x86_64_section_letter (int letter, const char **ptr_msg)
14758 {
14759   if (flag_code == CODE_64BIT)
14760     {
14761       if (letter == 'l')
14762         return SHF_X86_64_LARGE;
14763
14764       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
14765     }
14766   else
14767     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
14768   return -1;
14769 }
14770
14771 bfd_vma
14772 x86_64_section_word (char *str, size_t len)
14773 {
14774   if (len == 5 && flag_code == CODE_64BIT && startswith (str, "large"))
14775     return SHF_X86_64_LARGE;
14776
14777   return -1;
14778 }
14779
14780 static void
14781 handle_large_common (int small ATTRIBUTE_UNUSED)
14782 {
14783   if (flag_code != CODE_64BIT)
14784     {
14785       s_comm_internal (0, elf_common_parse);
14786       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
14787     }
14788   else
14789     {
14790       static segT lbss_section;
14791       asection *saved_com_section_ptr = elf_com_section_ptr;
14792       asection *saved_bss_section = bss_section;
14793
14794       if (lbss_section == NULL)
14795         {
14796           flagword applicable;
14797           segT seg = now_seg;
14798           subsegT subseg = now_subseg;
14799
14800           /* The .lbss section is for local .largecomm symbols.  */
14801           lbss_section = subseg_new (".lbss", 0);
14802           applicable = bfd_applicable_section_flags (stdoutput);
14803           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
14804           seg_info (lbss_section)->bss = 1;
14805
14806           subseg_set (seg, subseg);
14807         }
14808
14809       elf_com_section_ptr = &_bfd_elf_large_com_section;
14810       bss_section = lbss_section;
14811
14812       s_comm_internal (0, elf_common_parse);
14813
14814       elf_com_section_ptr = saved_com_section_ptr;
14815       bss_section = saved_bss_section;
14816     }
14817 }
14818 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */