gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2023 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
  23    x86_64 support by Jan Hubicka (jh@suse.cz)
  24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "gen-sframe.h"
  34 #include "sframe.h"
  35 #include "elf/x86-64.h"
  36 #include "opcodes/i386-init.h"
  37 #include <limits.h>
  38
  39 #ifndef INFER_ADDR_PREFIX
  40 #define INFER_ADDR_PREFIX 1
  41 #endif
  42
  43 #ifndef DEFAULT_ARCH
  44 #define DEFAULT_ARCH "i386"
  45 #endif
  46
  47 #ifndef INLINE
  48 #if __GNUC__ >= 2
  49 #define INLINE __inline__
  50 #else
  51 #define INLINE
  52 #endif
  53 #endif
  54
  55 /* Prefixes will be emitted in the order defined below.
  56    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  57    instruction, and so must come before any prefixes.
  58    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  59    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  60 #define WAIT_PREFIX     0
  61 #define SEG_PREFIX      1
  62 #define ADDR_PREFIX     2
  63 #define DATA_PREFIX     3
  64 #define REP_PREFIX      4
  65 #define HLE_PREFIX      REP_PREFIX
  66 #define BND_PREFIX      REP_PREFIX
  67 #define LOCK_PREFIX     5
  68 #define REX_PREFIX      6       /* must come last.  */
  69 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  70
  71 /* we define the syntax here (modulo base,index,scale syntax) */
  72 #define REGISTER_PREFIX '%'
  73 #define IMMEDIATE_PREFIX '$'
  74 #define ABSOLUTE_PREFIX '*'
  75
  76 /* these are the instruction mnemonic suffixes in AT&T syntax or
  77    memory operand size in Intel syntax.  */
  78 #define WORD_MNEM_SUFFIX  'w'
  79 #define BYTE_MNEM_SUFFIX  'b'
  80 #define SHORT_MNEM_SUFFIX 's'
  81 #define LONG_MNEM_SUFFIX  'l'
  82 #define QWORD_MNEM_SUFFIX  'q'
  83
  84 #define END_OF_INSN '\0'
  85
  86 #define OPERAND_TYPE_NONE { .bitfield = { .class = ClassNone } }
  87
  88 /* This matches the C -> StaticRounding alias in the opcode table.  */
  89 #define commutative staticrounding
  90
  91 /*
  92   'templates' is for grouping together 'template' structures for opcodes
  93   of the same name.  This is only used for storing the insns in the grand
  94   ole hash table of insns.
  95   The templates themselves start at START and range up to (but not including)
  96   END.
  97   */
  98 typedef struct
  99 {
 100   const insn_template *start;
 101   const insn_template *end;
 102 }
 103 templates;
 104
 105 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 106 typedef struct
 107 {
 108   unsigned int regmem;  /* codes register or memory operand */
 109   unsigned int reg;     /* codes register operand (or extended opcode) */
 110   unsigned int mode;    /* how to interpret regmem & reg */
 111 }
 112 modrm_byte;
 113
 114 /* x86-64 extension prefix.  */
 115 typedef int rex_byte;
 116
 117 /* 386 opcode byte to code indirect addressing.  */
 118 typedef struct
 119 {
 120   unsigned base;
 121   unsigned index;
 122   unsigned scale;
 123 }
 124 sib_byte;
 125
 126 /* x86 arch names, types and features */
 127 typedef struct
 128 {
 129   const char *name;             /* arch name */
 130   unsigned int len:8;           /* arch string length */
 131   bool skip:1;                  /* show_arch should skip this. */
 132   enum processor_type type;     /* arch type */
 133   i386_cpu_flags enable;                /* cpu feature enable flags */
 134   i386_cpu_flags disable;       /* cpu feature disable flags */
 135 }
 136 arch_entry;
 137
 138 static void update_code_flag (int, int);
 139 static void set_code_flag (int);
 140 static void set_16bit_gcc_code_flag (int);
 141 static void set_intel_syntax (int);
 142 static void set_intel_mnemonic (int);
 143 static void set_allow_index_reg (int);
 144 static void set_check (int);
 145 static void set_cpu_arch (int);
 146 #ifdef TE_PE
 147 static void pe_directive_secrel (int);
 148 static void pe_directive_secidx (int);
 149 #endif
 150 static void signed_cons (int);
 151 static char *output_invalid (int c);
 152 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 153                                     const char *);
 154 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 155                                        const char *);
 156 static int i386_att_operand (char *);
 157 static int i386_intel_operand (char *, int);
 158 static int i386_intel_simplify (expressionS *);
 159 static int i386_intel_parse_name (const char *, expressionS *);
 160 static const reg_entry *parse_register (char *, char **);
 161 static const char *parse_insn (const char *, char *);
 162 static char *parse_operands (char *, const char *);
 163 static void swap_operands (void);
 164 static void swap_2_operands (unsigned int, unsigned int);
 165 static enum flag_code i386_addressing_mode (void);
 166 static void optimize_imm (void);
 167 static void optimize_disp (void);
 168 static const insn_template *match_template (char);
 169 static int check_string (void);
 170 static int process_suffix (void);
 171 static int check_byte_reg (void);
 172 static int check_long_reg (void);
 173 static int check_qword_reg (void);
 174 static int check_word_reg (void);
 175 static int finalize_imm (void);
 176 static int process_operands (void);
 177 static const reg_entry *build_modrm_byte (void);
 178 static void output_insn (void);
 179 static void output_imm (fragS *, offsetT);
 180 static void output_disp (fragS *, offsetT);
 181 #ifndef I386COFF
 182 static void s_bss (int);
 183 #endif
 184 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 185 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 186
 187 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 188 static unsigned int x86_isa_1_used;
 189 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 190 static unsigned int x86_feature_2_used;
 191 /* Generate x86 used ISA and feature properties.  */
 192 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 193 #endif
 194
 195 static const char *default_arch = DEFAULT_ARCH;
 196
 197 /* parse_register() returns this when a register alias cannot be used.  */
 198 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 199                                    { Dw2Inval, Dw2Inval } };
 200
 201 static const reg_entry *reg_eax;
 202 static const reg_entry *reg_ds;
 203 static const reg_entry *reg_es;
 204 static const reg_entry *reg_ss;
 205 static const reg_entry *reg_st0;
 206 static const reg_entry *reg_k0;
 207
 208 /* VEX prefix.  */
 209 typedef struct
 210 {
 211   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 212   unsigned char bytes[4];
 213   unsigned int length;
 214   /* Destination or source register specifier.  */
 215   const reg_entry *register_specifier;
 216 } vex_prefix;
 217
 218 /* 'md_assemble ()' gathers together information and puts it into a
 219    i386_insn.  */
 220
 221 union i386_op
 222   {
 223     expressionS *disps;
 224     expressionS *imms;
 225     const reg_entry *regs;
 226   };
 227
 228 enum i386_error
 229   {
 230     no_error, /* Must be first.  */
 231     operand_size_mismatch,
 232     operand_type_mismatch,
 233     register_type_mismatch,
 234     number_of_operands_mismatch,
 235     invalid_instruction_suffix,
 236     bad_imm4,
 237     unsupported_with_intel_mnemonic,
 238     unsupported_syntax,
 239     unsupported,
 240     unsupported_on_arch,
 241     unsupported_64bit,
 242     invalid_sib_address,
 243     invalid_vsib_address,
 244     invalid_vector_register_set,
 245     invalid_tmm_register_set,
 246     invalid_dest_and_src_register_set,
 247     unsupported_vector_index_register,
 248     unsupported_broadcast,
 249     broadcast_needed,
 250     unsupported_masking,
 251     mask_not_on_destination,
 252     no_default_mask,
 253     unsupported_rc_sae,
 254     invalid_register_operand,
 255   };
 256
 257 struct _i386_insn
 258   {
 259     /* TM holds the template for the insn were currently assembling.  */
 260     insn_template tm;
 261
 262     /* SUFFIX holds the instruction size suffix for byte, word, dword
 263        or qword, if given.  */
 264     char suffix;
 265
 266     /* OPCODE_LENGTH holds the number of base opcode bytes.  */
 267     unsigned char opcode_length;
 268
 269     /* OPERANDS gives the number of given operands.  */
 270     unsigned int operands;
 271
 272     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 273        of given register, displacement, memory operands and immediate
 274        operands.  */
 275     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 276
 277     /* TYPES [i] is the type (see above #defines) which tells us how to
 278        use OP[i] for the corresponding operand.  */
 279     i386_operand_type types[MAX_OPERANDS];
 280
 281     /* Displacement expression, immediate expression, or register for each
 282        operand.  */
 283     union i386_op op[MAX_OPERANDS];
 284
 285     /* Flags for operands.  */
 286     unsigned int flags[MAX_OPERANDS];
 287 #define Operand_PCrel 1
 288 #define Operand_Mem   2
 289
 290     /* Relocation type for operand */
 291     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 292
 293     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 294        the base index byte below.  */
 295     const reg_entry *base_reg;
 296     const reg_entry *index_reg;
 297     unsigned int log2_scale_factor;
 298
 299     /* SEG gives the seg_entries of this insn.  They are zero unless
 300        explicit segment overrides are given.  */
 301     const reg_entry *seg[2];
 302
 303     /* PREFIX holds all the given prefix opcodes (usually null).
 304        PREFIXES is the number of prefix opcodes.  */
 305     unsigned int prefixes;
 306     unsigned char prefix[MAX_PREFIXES];
 307
 308     /* Register is in low 3 bits of opcode.  */
 309     bool short_form;
 310
 311     /* The operand to a branch insn indicates an absolute branch.  */
 312     bool jumpabsolute;
 313
 314     /* The operand to a branch insn indicates a far branch.  */
 315     bool far_branch;
 316
 317     /* There is a memory operand of (%dx) which should be only used
 318        with input/output instructions.  */
 319     bool input_output_operand;
 320
 321     /* Extended states.  */
 322     enum
 323       {
 324         /* Use MMX state.  */
 325         xstate_mmx = 1 << 0,
 326         /* Use XMM state.  */
 327         xstate_xmm = 1 << 1,
 328         /* Use YMM state.  */
 329         xstate_ymm = 1 << 2 | xstate_xmm,
 330         /* Use ZMM state.  */
 331         xstate_zmm = 1 << 3 | xstate_ymm,
 332         /* Use TMM state.  */
 333         xstate_tmm = 1 << 4,
 334         /* Use MASK state.  */
 335         xstate_mask = 1 << 5
 336       } xstate;
 337
 338     /* Has GOTPC or TLS relocation.  */
 339     bool has_gotpc_tls_reloc;
 340
 341     /* RM and SIB are the modrm byte and the sib byte where the
 342        addressing modes of this insn are encoded.  */
 343     modrm_byte rm;
 344     rex_byte rex;
 345     rex_byte vrex;
 346     sib_byte sib;
 347     vex_prefix vex;
 348
 349     /* Masking attributes.
 350
 351        The struct describes masking, applied to OPERAND in the instruction.
 352        REG is a pointer to the corresponding mask register.  ZEROING tells
 353        whether merging or zeroing mask is used.  */
 354     struct Mask_Operation
 355     {
 356       const reg_entry *reg;
 357       unsigned int zeroing;
 358       /* The operand where this operation is associated.  */
 359       unsigned int operand;
 360     } mask;
 361
 362     /* Rounding control and SAE attributes.  */
 363     struct RC_Operation
 364     {
 365       enum rc_type
 366         {
 367           rc_none = -1,
 368           rne,
 369           rd,
 370           ru,
 371           rz,
 372           saeonly
 373         } type;
 374       /* In Intel syntax the operand modifier form is supposed to be used, but
 375          we continue to accept the immediate forms as well.  */
 376       bool modifier;
 377     } rounding;
 378
 379     /* Broadcasting attributes.
 380
 381        The struct describes broadcasting, applied to OPERAND.  TYPE is
 382        expresses the broadcast factor.  */
 383     struct Broadcast_Operation
 384     {
 385       /* Type of broadcast: {1to2}, {1to4}, {1to8}, {1to16} or {1to32}.  */
 386       unsigned int type;
 387
 388       /* Index of broadcasted operand.  */
 389       unsigned int operand;
 390
 391       /* Number of bytes to broadcast.  */
 392       unsigned int bytes;
 393     } broadcast;
 394
 395     /* Compressed disp8*N attribute.  */
 396     unsigned int memshift;
 397
 398     /* Prefer load or store in encoding.  */
 399     enum
 400       {
 401         dir_encoding_default = 0,
 402         dir_encoding_load,
 403         dir_encoding_store,
 404         dir_encoding_swap
 405       } dir_encoding;
 406
 407     /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 408     enum
 409       {
 410         disp_encoding_default = 0,
 411         disp_encoding_8bit,
 412         disp_encoding_16bit,
 413         disp_encoding_32bit
 414       } disp_encoding;
 415
 416     /* Prefer the REX byte in encoding.  */
 417     bool rex_encoding;
 418
 419     /* Disable instruction size optimization.  */
 420     bool no_optimize;
 421
 422     /* How to encode vector instructions.  */
 423     enum
 424       {
 425         vex_encoding_default = 0,
 426         vex_encoding_vex,
 427         vex_encoding_vex3,
 428         vex_encoding_evex,
 429         vex_encoding_error
 430       } vec_encoding;
 431
 432     /* REP prefix.  */
 433     const char *rep_prefix;
 434
 435     /* HLE prefix.  */
 436     const char *hle_prefix;
 437
 438     /* Have BND prefix.  */
 439     const char *bnd_prefix;
 440
 441     /* Have NOTRACK prefix.  */
 442     const char *notrack_prefix;
 443
 444     /* Error message.  */
 445     enum i386_error error;
 446   };
 447
 448 typedef struct _i386_insn i386_insn;
 449
 450 /* Link RC type with corresponding string, that'll be looked for in
 451    asm.  */
 452 struct RC_name
 453 {
 454   enum rc_type type;
 455   const char *name;
 456   unsigned int len;
 457 };
 458
 459 static const struct RC_name RC_NamesTable[] =
 460 {
 461   {  rne, STRING_COMMA_LEN ("rn-sae") },
 462   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 463   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 464   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 465   {  saeonly,  STRING_COMMA_LEN ("sae") },
 466 };
 467
 468 /* To be indexed by segment register number.  */
 469 static const unsigned char i386_seg_prefixes[] = {
 470   ES_PREFIX_OPCODE,
 471   CS_PREFIX_OPCODE,
 472   SS_PREFIX_OPCODE,
 473   DS_PREFIX_OPCODE,
 474   FS_PREFIX_OPCODE,
 475   GS_PREFIX_OPCODE
 476 };
 477
 478 /* List of chars besides those in app.c:symbol_chars that can start an
 479    operand.  Used to prevent the scrubber eating vital white-space.  */
 480 const char extra_symbol_chars[] = "*%-([{}"
 481 #ifdef LEX_AT
 482         "@"
 483 #endif
 484 #ifdef LEX_QM
 485         "?"
 486 #endif
 487         ;
 488
 489 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))     \
 490      && !defined (TE_GNU)                               \
 491      && !defined (TE_LINUX)                             \
 492      && !defined (TE_Haiku)                             \
 493      && !defined (TE_FreeBSD)                           \
 494      && !defined (TE_DragonFly)                         \
 495      && !defined (TE_NetBSD))
 496 /* This array holds the chars that always start a comment.  If the
 497    pre-processor is disabled, these aren't very useful.  The option
 498    --divide will remove '/' from this list.  */
 499 const char *i386_comment_chars = "#/";
 500 #define SVR4_COMMENT_CHARS 1
 501 #define PREFIX_SEPARATOR '\\'
 502
 503 #else
 504 const char *i386_comment_chars = "#";
 505 #define PREFIX_SEPARATOR '/'
 506 #endif
 507
 508 /* This array holds the chars that only start a comment at the beginning of
 509    a line.  If the line seems to have the form '# 123 filename'
 510    .line and .file directives will appear in the pre-processed output.
 511    Note that input_file.c hand checks for '#' at the beginning of the
 512    first line of the input file.  This is because the compiler outputs
 513    #NO_APP at the beginning of its output.
 514    Also note that comments started like this one will always work if
 515    '/' isn't otherwise defined.  */
 516 const char line_comment_chars[] = "#/";
 517
 518 const char line_separator_chars[] = ";";
 519
 520 /* Chars that can be used to separate mant from exp in floating point
 521    nums.  */
 522 const char EXP_CHARS[] = "eE";
 523
 524 /* Chars that mean this number is a floating point constant
 525    As in 0f12.456
 526    or    0d1.2345e12.  */
 527 const char FLT_CHARS[] = "fFdDxXhHbB";
 528
 529 /* Tables for lexical analysis.  */
 530 static char mnemonic_chars[256];
 531 static char register_chars[256];
 532 static char operand_chars[256];
 533 static char identifier_chars[256];
 534
 535 /* Lexical macros.  */
 536 #define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
 537 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 538 #define is_register_char(x) (register_chars[(unsigned char) x])
 539 #define is_space_char(x) ((x) == ' ')
 540 #define is_identifier_char(x) (identifier_chars[(unsigned char) x])
 541
 542 /* All non-digit non-letter characters that may occur in an operand.  */
 543 static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!:[@]";
 544
 545 /* md_assemble() always leaves the strings it's passed unaltered.  To
 546    effect this we maintain a stack of saved characters that we've smashed
 547    with '\0's (indicating end of strings for various sub-fields of the
 548    assembler instruction).  */
 549 static char save_stack[32];
 550 static char *save_stack_p;
 551 #define END_STRING_AND_SAVE(s) \
 552         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 553 #define RESTORE_END_STRING(s) \
 554         do { *(s) = *--save_stack_p; } while (0)
 555
 556 /* The instruction we're assembling.  */
 557 static i386_insn i;
 558
 559 /* Possible templates for current insn.  */
 560 static const templates *current_templates;
 561
 562 /* Per instruction expressionS buffers: max displacements & immediates.  */
 563 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 564 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 565
 566 /* Current operand we are working on.  */
 567 static int this_operand = -1;
 568
 569 /* We support four different modes.  FLAG_CODE variable is used to distinguish
 570    these.  */
 571
 572 enum flag_code {
 573         CODE_32BIT,
 574         CODE_16BIT,
 575         CODE_64BIT };
 576
 577 static enum flag_code flag_code;
 578 static unsigned int object_64bit;
 579 static unsigned int disallow_64bit_reloc;
 580 static int use_rela_relocations = 0;
 581 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 582 static const char *tls_get_addr;
 583
 584 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
 585      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
 586      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
 587
 588 /* The ELF ABI to use.  */
 589 enum x86_elf_abi
 590 {
 591   I386_ABI,
 592   X86_64_ABI,
 593   X86_64_X32_ABI
 594 };
 595
 596 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 597 #endif
 598
 599 #if defined (TE_PE) || defined (TE_PEP)
 600 /* Use big object file format.  */
 601 static int use_big_obj = 0;
 602 #endif
 603
 604 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 605 /* 1 if generating code for a shared library.  */
 606 static int shared = 0;
 607
 608 unsigned int x86_sframe_cfa_sp_reg;
 609 /* The other CFA base register for SFrame unwind info.  */
 610 unsigned int x86_sframe_cfa_fp_reg;
 611 unsigned int x86_sframe_cfa_ra_reg;
 612
 613 #endif
 614
 615 /* 1 for intel syntax,
 616    0 if att syntax.  */
 617 static int intel_syntax = 0;
 618
 619 static enum x86_64_isa
 620 {
 621   amd64 = 1,    /* AMD64 ISA.  */
 622   intel64       /* Intel64 ISA.  */
 623 } isa64;
 624
 625 /* 1 for intel mnemonic,
 626    0 if att mnemonic.  */
 627 static int intel_mnemonic = !SYSV386_COMPAT;
 628
 629 /* 1 if pseudo registers are permitted.  */
 630 static int allow_pseudo_reg = 0;
 631
 632 /* 1 if register prefix % not required.  */
 633 static int allow_naked_reg = 0;
 634
 635 /* 1 if the assembler should add BND prefix for all control-transferring
 636    instructions supporting it, even if this prefix wasn't specified
 637    explicitly.  */
 638 static int add_bnd_prefix = 0;
 639
 640 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 641 static int allow_index_reg = 0;
 642
 643 /* 1 if the assembler should ignore LOCK prefix, even if it was
 644    specified explicitly.  */
 645 static int omit_lock_prefix = 0;
 646
 647 /* 1 if the assembler should encode lfence, mfence, and sfence as
 648    "lock addl $0, (%{re}sp)".  */
 649 static int avoid_fence = 0;
 650
 651 /* 1 if lfence should be inserted after every load.  */
 652 static int lfence_after_load = 0;
 653
 654 /* Non-zero if lfence should be inserted before indirect branch.  */
 655 static enum lfence_before_indirect_branch_kind
 656   {
 657     lfence_branch_none = 0,
 658     lfence_branch_register,
 659     lfence_branch_memory,
 660     lfence_branch_all
 661   }
 662 lfence_before_indirect_branch;
 663
 664 /* Non-zero if lfence should be inserted before ret.  */
 665 static enum lfence_before_ret_kind
 666   {
 667     lfence_before_ret_none = 0,
 668     lfence_before_ret_not,
 669     lfence_before_ret_or,
 670     lfence_before_ret_shl
 671   }
 672 lfence_before_ret;
 673
 674 /* Types of previous instruction is .byte or prefix.  */
 675 static struct
 676   {
 677     segT seg;
 678     const char *file;
 679     const char *name;
 680     unsigned int line;
 681     enum last_insn_kind
 682       {
 683         last_insn_other = 0,
 684         last_insn_directive,
 685         last_insn_prefix
 686       } kind;
 687   } last_insn;
 688
 689 /* 1 if the assembler should generate relax relocations.  */
 690
 691 static int generate_relax_relocations
 692   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 693
 694 static enum check_kind
 695   {
 696     check_none = 0,
 697     check_warning,
 698     check_error
 699   }
 700 sse_check, operand_check = check_warning;
 701
 702 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 703 static int align_branch_power = 0;
 704
 705 /* Types of branches to align.  */
 706 enum align_branch_kind
 707   {
 708     align_branch_none = 0,
 709     align_branch_jcc = 1,
 710     align_branch_fused = 2,
 711     align_branch_jmp = 3,
 712     align_branch_call = 4,
 713     align_branch_indirect = 5,
 714     align_branch_ret = 6
 715   };
 716
 717 /* Type bits of branches to align.  */
 718 enum align_branch_bit
 719   {
 720     align_branch_jcc_bit = 1 << align_branch_jcc,
 721     align_branch_fused_bit = 1 << align_branch_fused,
 722     align_branch_jmp_bit = 1 << align_branch_jmp,
 723     align_branch_call_bit = 1 << align_branch_call,
 724     align_branch_indirect_bit = 1 << align_branch_indirect,
 725     align_branch_ret_bit = 1 << align_branch_ret
 726   };
 727
 728 static unsigned int align_branch = (align_branch_jcc_bit
 729                                     | align_branch_fused_bit
 730                                     | align_branch_jmp_bit);
 731
 732 /* Types of condition jump used by macro-fusion.  */
 733 enum mf_jcc_kind
 734   {
 735     mf_jcc_jo = 0,  /* base opcode 0x70  */
 736     mf_jcc_jc,      /* base opcode 0x72  */
 737     mf_jcc_je,      /* base opcode 0x74  */
 738     mf_jcc_jna,     /* base opcode 0x76  */
 739     mf_jcc_js,      /* base opcode 0x78  */
 740     mf_jcc_jp,      /* base opcode 0x7a  */
 741     mf_jcc_jl,      /* base opcode 0x7c  */
 742     mf_jcc_jle,     /* base opcode 0x7e  */
 743   };
 744
 745 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 746 enum mf_cmp_kind
 747   {
 748     mf_cmp_test_and,  /* test/cmp */
 749     mf_cmp_alu_cmp,  /* add/sub/cmp */
 750     mf_cmp_incdec  /* inc/dec */
 751   };
 752
 753 /* The maximum padding size for fused jcc.  CMP like instruction can
 754    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 755    prefixes.   */
 756 #define MAX_FUSED_JCC_PADDING_SIZE 20
 757
 758 /* The maximum number of prefixes added for an instruction.  */
 759 static unsigned int align_branch_prefix_size = 5;
 760
 761 /* Optimization:
 762    1. Clear the REX_W bit with register operand if possible.
 763    2. Above plus use 128bit vector instruction to clear the full vector
 764       register.
 765  */
 766 static int optimize = 0;
 767
 768 /* Optimization:
 769    1. Clear the REX_W bit with register operand if possible.
 770    2. Above plus use 128bit vector instruction to clear the full vector
 771       register.
 772    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 773       "testb $imm7,%r8".
 774  */
 775 static int optimize_for_space = 0;
 776
 777 /* Register prefix used for error message.  */
 778 static const char *register_prefix = "%";
 779
 780 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 781    leave, push, and pop instructions so that gcc has the same stack
 782    frame as in 32 bit mode.  */
 783 static char stackop_size = '\0';
 784
 785 /* Non-zero to optimize code alignment.  */
 786 int optimize_align_code = 1;
 787
 788 /* Non-zero to quieten some warnings.  */
 789 static int quiet_warnings = 0;
 790
 791 /* Guard to avoid repeated warnings about non-16-bit code on 16-bit CPUs.  */
 792 static bool pre_386_16bit_warned;
 793
 794 /* CPU name.  */
 795 static const char *cpu_arch_name = NULL;
 796 static char *cpu_sub_arch_name = NULL;
 797
 798 /* CPU feature flags.  */
 799 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 800
 801 /* If we have selected a cpu we are generating instructions for.  */
 802 static int cpu_arch_tune_set = 0;
 803
 804 /* Cpu we are generating instructions for.  */
 805 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 806
 807 /* CPU feature flags of cpu we are generating instructions for.  */
 808 static i386_cpu_flags cpu_arch_tune_flags;
 809
 810 /* CPU instruction set architecture used.  */
 811 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 812
 813 /* CPU feature flags of instruction set architecture used.  */
 814 i386_cpu_flags cpu_arch_isa_flags;
 815
 816 /* If set, conditional jumps are not automatically promoted to handle
 817    larger than a byte offset.  */
 818 static bool no_cond_jump_promotion = false;
 819
 820 /* Encode SSE instructions with VEX prefix.  */
 821 static unsigned int sse2avx;
 822
 823 /* Encode aligned vector move as unaligned vector move.  */
 824 static unsigned int use_unaligned_vector_move;
 825
 826 /* Encode scalar AVX instructions with specific vector length.  */
 827 static enum
 828   {
 829     vex128 = 0,
 830     vex256
 831   } avxscalar;
 832
 833 /* Encode VEX WIG instructions with specific vex.w.  */
 834 static enum
 835   {
 836     vexw0 = 0,
 837     vexw1
 838   } vexwig;
 839
 840 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 841 static enum
 842   {
 843     evexl128 = 0,
 844     evexl256,
 845     evexl512
 846   } evexlig;
 847
 848 /* Encode EVEX WIG instructions with specific evex.w.  */
 849 static enum
 850   {
 851     evexw0 = 0,
 852     evexw1
 853   } evexwig;
 854
 855 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 856 static enum rc_type evexrcig = rne;
 857
 858 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 859 static symbolS *GOT_symbol;
 860
 861 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 862 unsigned int x86_dwarf2_return_column;
 863
 864 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 865 int x86_cie_data_alignment;
 866
 867 /* Interface to relax_segment.
 868    There are 3 major relax states for 386 jump insns because the
 869    different types of jumps add different sizes to frags when we're
 870    figuring out what sort of jump to choose to reach a given label.
 871
 872    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 873    branches which are handled by md_estimate_size_before_relax() and
 874    i386_generic_table_relax_frag().  */
 875
 876 /* Types.  */
 877 #define UNCOND_JUMP 0
 878 #define COND_JUMP 1
 879 #define COND_JUMP86 2
 880 #define BRANCH_PADDING 3
 881 #define BRANCH_PREFIX 4
 882 #define FUSED_JCC_PADDING 5
 883
 884 /* Sizes.  */
 885 #define CODE16  1
 886 #define SMALL   0
 887 #define SMALL16 (SMALL | CODE16)
 888 #define BIG     2
 889 #define BIG16   (BIG | CODE16)
 890
 891 #ifndef INLINE
 892 #ifdef __GNUC__
 893 #define INLINE __inline__
 894 #else
 895 #define INLINE
 896 #endif
 897 #endif
 898
 899 #define ENCODE_RELAX_STATE(type, size) \
 900   ((relax_substateT) (((type) << 2) | (size)))
 901 #define TYPE_FROM_RELAX_STATE(s) \
 902   ((s) >> 2)
 903 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 904     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 905
 906 /* This table is used by relax_frag to promote short jumps to long
 907    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 908    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 909    don't allow a short jump in a 32 bit code segment to be promoted to
 910    a 16 bit offset jump because it's slower (requires data size
 911    prefix), and doesn't work, unless the destination is in the bottom
 912    64k of the code segment (The top 16 bits of eip are zeroed).  */
 913
 914 const relax_typeS md_relax_table[] =
 915 {
 916   /* The fields are:
 917      1) most positive reach of this state,
 918      2) most negative reach of this state,
 919      3) how many bytes this mode will have in the variable part of the frag
 920      4) which index into the table to try if we can't fit into this one.  */
 921
 922   /* UNCOND_JUMP states.  */
 923   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 924   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 925   /* dword jmp adds 4 bytes to frag:
 926      0 extra opcode bytes, 4 displacement bytes.  */
 927   {0, 0, 4, 0},
 928   /* word jmp adds 2 byte2 to frag:
 929      0 extra opcode bytes, 2 displacement bytes.  */
 930   {0, 0, 2, 0},
 931
 932   /* COND_JUMP states.  */
 933   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 934   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 935   /* dword conditionals adds 5 bytes to frag:
 936      1 extra opcode byte, 4 displacement bytes.  */
 937   {0, 0, 5, 0},
 938   /* word conditionals add 3 bytes to frag:
 939      1 extra opcode byte, 2 displacement bytes.  */
 940   {0, 0, 3, 0},
 941
 942   /* COND_JUMP86 states.  */
 943   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 944   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 945   /* dword conditionals adds 5 bytes to frag:
 946      1 extra opcode byte, 4 displacement bytes.  */
 947   {0, 0, 5, 0},
 948   /* word conditionals add 4 bytes to frag:
 949      1 displacement byte and a 3 byte long branch insn.  */
 950   {0, 0, 4, 0}
 951 };
 952
 953 #define ARCH(n, t, f, s) \
 954   { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, CPU_ ## f ## _FLAGS, \
 955     CPU_NONE_FLAGS }
 956 #define SUBARCH(n, e, d, s) \
 957   { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, CPU_ ## e ## _FLAGS, \
 958     CPU_ ## d ## _FLAGS }
 959
 960 static const arch_entry cpu_arch[] =
 961 {
 962   /* Do not replace the first two entries - i386_target_format() and
 963      set_cpu_arch() rely on them being there in this order.  */
 964   ARCH (generic32, GENERIC32, GENERIC32, false),
 965   ARCH (generic64, GENERIC64, GENERIC64, false),
 966   ARCH (i8086, UNKNOWN, NONE, false),
 967   ARCH (i186, UNKNOWN, 186, false),
 968   ARCH (i286, UNKNOWN, 286, false),
 969   ARCH (i386, I386, 386, false),
 970   ARCH (i486, I486, 486, false),
 971   ARCH (i586, PENTIUM, 586, false),
 972   ARCH (i686, PENTIUMPRO, 686, false),
 973   ARCH (pentium, PENTIUM, 586, false),
 974   ARCH (pentiumpro, PENTIUMPRO, PENTIUMPRO, false),
 975   ARCH (pentiumii, PENTIUMPRO, P2, false),
 976   ARCH (pentiumiii, PENTIUMPRO, P3, false),
 977   ARCH (pentium4, PENTIUM4, P4, false),
 978   ARCH (prescott, NOCONA, CORE, false),
 979   ARCH (nocona, NOCONA, NOCONA, false),
 980   ARCH (yonah, CORE, CORE, true),
 981   ARCH (core, CORE, CORE, false),
 982   ARCH (merom, CORE2, CORE2, true),
 983   ARCH (core2, CORE2, CORE2, false),
 984   ARCH (corei7, COREI7, COREI7, false),
 985   ARCH (iamcu, IAMCU, IAMCU, false),
 986   ARCH (k6, K6, K6, false),
 987   ARCH (k6_2, K6, K6_2, false),
 988   ARCH (athlon, ATHLON, ATHLON, false),
 989   ARCH (sledgehammer, K8, K8, true),
 990   ARCH (opteron, K8, K8, false),
 991   ARCH (k8, K8, K8, false),
 992   ARCH (amdfam10, AMDFAM10, AMDFAM10, false),
 993   ARCH (bdver1, BD, BDVER1, false),
 994   ARCH (bdver2, BD, BDVER2, false),
 995   ARCH (bdver3, BD, BDVER3, false),
 996   ARCH (bdver4, BD, BDVER4, false),
 997   ARCH (znver1, ZNVER, ZNVER1, false),
 998   ARCH (znver2, ZNVER, ZNVER2, false),
 999   ARCH (znver3, ZNVER, ZNVER3, false),
1000   ARCH (znver4, ZNVER, ZNVER4, false),
1001   ARCH (btver1, BT, BTVER1, false),
1002   ARCH (btver2, BT, BTVER2, false),
1003
1004   SUBARCH (8087, 8087, ANY_8087, false),
1005   SUBARCH (87, NONE, ANY_8087, false), /* Disable only!  */
1006   SUBARCH (287, 287, ANY_287, false),
1007   SUBARCH (387, 387, ANY_387, false),
1008   SUBARCH (687, 687, ANY_687, false),
1009   SUBARCH (cmov, CMOV, CMOV, false),
1010   SUBARCH (fxsr, FXSR, ANY_FXSR, false),
1011   SUBARCH (mmx, MMX, ANY_MMX, false),
1012   SUBARCH (sse, SSE, ANY_SSE, false),
1013   SUBARCH (sse2, SSE2, ANY_SSE2, false),
1014   SUBARCH (sse3, SSE3, ANY_SSE3, false),
1015   SUBARCH (sse4a, SSE4A, ANY_SSE4A, false),
1016   SUBARCH (ssse3, SSSE3, ANY_SSSE3, false),
1017   SUBARCH (sse4.1, SSE4_1, ANY_SSE4_1, false),
1018   SUBARCH (sse4.2, SSE4_2, ANY_SSE4_2, false),
1019   SUBARCH (sse4, SSE4_2, ANY_SSE4_1, false),
1020   SUBARCH (avx, AVX, ANY_AVX, false),
1021   SUBARCH (avx2, AVX2, ANY_AVX2, false),
1022   SUBARCH (avx512f, AVX512F, ANY_AVX512F, false),
1023   SUBARCH (avx512cd, AVX512CD, ANY_AVX512CD, false),
1024   SUBARCH (avx512er, AVX512ER, ANY_AVX512ER, false),
1025   SUBARCH (avx512pf, AVX512PF, ANY_AVX512PF, false),
1026   SUBARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, false),
1027   SUBARCH (avx512bw, AVX512BW, ANY_AVX512BW, false),
1028   SUBARCH (avx512vl, AVX512VL, ANY_AVX512VL, false),
1029   SUBARCH (vmx, VMX, ANY_VMX, false),
1030   SUBARCH (vmfunc, VMFUNC, ANY_VMFUNC, false),
1031   SUBARCH (smx, SMX, SMX, false),
1032   SUBARCH (xsave, XSAVE, ANY_XSAVE, false),
1033   SUBARCH (xsaveopt, XSAVEOPT, ANY_XSAVEOPT, false),
1034   SUBARCH (xsavec, XSAVEC, ANY_XSAVEC, false),
1035   SUBARCH (xsaves, XSAVES, ANY_XSAVES, false),
1036   SUBARCH (aes, AES, ANY_AES, false),
1037   SUBARCH (pclmul, PCLMUL, ANY_PCLMUL, false),
1038   SUBARCH (clmul, PCLMUL, ANY_PCLMUL, true),
1039   SUBARCH (fsgsbase, FSGSBASE, FSGSBASE, false),
1040   SUBARCH (rdrnd, RDRND, RDRND, false),
1041   SUBARCH (f16c, F16C, ANY_F16C, false),
1042   SUBARCH (bmi2, BMI2, BMI2, false),
1043   SUBARCH (fma, FMA, ANY_FMA, false),
1044   SUBARCH (fma4, FMA4, ANY_FMA4, false),
1045   SUBARCH (xop, XOP, ANY_XOP, false),
1046   SUBARCH (lwp, LWP, ANY_LWP, false),
1047   SUBARCH (movbe, MOVBE, MOVBE, false),
1048   SUBARCH (cx16, CX16, CX16, false),
1049   SUBARCH (ept, EPT, ANY_EPT, false),
1050   SUBARCH (lzcnt, LZCNT, LZCNT, false),
1051   SUBARCH (popcnt, POPCNT, POPCNT, false),
1052   SUBARCH (hle, HLE, HLE, false),
1053   SUBARCH (rtm, RTM, ANY_RTM, false),
1054   SUBARCH (tsx, TSX, TSX, false),
1055   SUBARCH (invpcid, INVPCID, INVPCID, false),
1056   SUBARCH (clflush, CLFLUSH, CLFLUSH, false),
1057   SUBARCH (nop, NOP, NOP, false),
1058   SUBARCH (syscall, SYSCALL, SYSCALL, false),
1059   SUBARCH (rdtscp, RDTSCP, RDTSCP, false),
1060   SUBARCH (3dnow, 3DNOW, ANY_3DNOW, false),
1061   SUBARCH (3dnowa, 3DNOWA, ANY_3DNOWA, false),
1062   SUBARCH (padlock, PADLOCK, PADLOCK, false),
1063   SUBARCH (pacifica, SVME, ANY_SVME, true),
1064   SUBARCH (svme, SVME, ANY_SVME, false),
1065   SUBARCH (abm, ABM, ABM, false),
1066   SUBARCH (bmi, BMI, BMI, false),
1067   SUBARCH (tbm, TBM, TBM, false),
1068   SUBARCH (adx, ADX, ADX, false),
1069   SUBARCH (rdseed, RDSEED, RDSEED, false),
1070   SUBARCH (prfchw, PRFCHW, PRFCHW, false),
1071   SUBARCH (smap, SMAP, SMAP, false),
1072   SUBARCH (mpx, MPX, ANY_MPX, false),
1073   SUBARCH (sha, SHA, ANY_SHA, false),
1074   SUBARCH (clflushopt, CLFLUSHOPT, CLFLUSHOPT, false),
1075   SUBARCH (prefetchwt1, PREFETCHWT1, PREFETCHWT1, false),
1076   SUBARCH (se1, SE1, SE1, false),
1077   SUBARCH (clwb, CLWB, CLWB, false),
1078   SUBARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, false),
1079   SUBARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, false),
1080   SUBARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, false),
1081   SUBARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, false),
1082   SUBARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, false),
1083   SUBARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, false),
1084   SUBARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, false),
1085   SUBARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, false),
1086   SUBARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, false),
1087   SUBARCH (clzero, CLZERO, CLZERO, false),
1088   SUBARCH (mwaitx, MWAITX, MWAITX, false),
1089   SUBARCH (ospke, OSPKE, ANY_OSPKE, false),
1090   SUBARCH (rdpid, RDPID, RDPID, false),
1091   SUBARCH (ptwrite, PTWRITE, PTWRITE, false),
1092   SUBARCH (ibt, IBT, IBT, false),
1093   SUBARCH (shstk, SHSTK, SHSTK, false),
1094   SUBARCH (gfni, GFNI, ANY_GFNI, false),
1095   SUBARCH (vaes, VAES, ANY_VAES, false),
1096   SUBARCH (vpclmulqdq, VPCLMULQDQ, ANY_VPCLMULQDQ, false),
1097   SUBARCH (wbnoinvd, WBNOINVD, WBNOINVD, false),
1098   SUBARCH (pconfig, PCONFIG, PCONFIG, false),
1099   SUBARCH (waitpkg, WAITPKG, WAITPKG, false),
1100   SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
1101   SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
1102   SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
1103   SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
1104   SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
1105   SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
1106   SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
1107   SUBARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, false),
1108   SUBARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
1109            ANY_AVX512_VP2INTERSECT, false),
1110   SUBARCH (tdx, TDX, TDX, false),
1111   SUBARCH (enqcmd, ENQCMD, ENQCMD, false),
1112   SUBARCH (serialize, SERIALIZE, SERIALIZE, false),
1113   SUBARCH (rdpru, RDPRU, RDPRU, false),
1114   SUBARCH (mcommit, MCOMMIT, MCOMMIT, false),
1115   SUBARCH (sev_es, SEV_ES, ANY_SEV_ES, false),
1116   SUBARCH (tsxldtrk, TSXLDTRK, ANY_TSXLDTRK, false),
1117   SUBARCH (kl, KL, ANY_KL, false),
1118   SUBARCH (widekl, WIDEKL, ANY_WIDEKL, false),
1119   SUBARCH (uintr, UINTR, UINTR, false),
1120   SUBARCH (hreset, HRESET, HRESET, false),
1121   SUBARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, false),
1122   SUBARCH (prefetchi, PREFETCHI, PREFETCHI, false),
1123   SUBARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, false),
1124   SUBARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, false),
1125   SUBARCH (cmpccxadd, CMPCCXADD, CMPCCXADD, false),
1126   SUBARCH (wrmsrns, WRMSRNS, WRMSRNS, false),
1127   SUBARCH (msrlist, MSRLIST, MSRLIST, false),
1128   SUBARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, false),
1129   SUBARCH (rao_int, RAO_INT, RAO_INT, false),
1130   SUBARCH (rmpquery, RMPQUERY, ANY_RMPQUERY, false),
1131 };
1132
1133 #undef SUBARCH
1134 #undef ARCH
1135
1136 #ifdef I386COFF
1137 /* Like s_lcomm_internal in gas/read.c but the alignment string
1138    is allowed to be optional.  */
1139
1140 static symbolS *
1141 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1142 {
1143   addressT align = 0;
1144
1145   SKIP_WHITESPACE ();
1146
1147   if (needs_align
1148       && *input_line_pointer == ',')
1149     {
1150       align = parse_align (needs_align - 1);
1151
1152       if (align == (addressT) -1)
1153         return NULL;
1154     }
1155   else
1156     {
1157       if (size >= 8)
1158         align = 3;
1159       else if (size >= 4)
1160         align = 2;
1161       else if (size >= 2)
1162         align = 1;
1163       else
1164         align = 0;
1165     }
1166
1167   bss_alloc (symbolP, size, align);
1168   return symbolP;
1169 }
1170
1171 static void
1172 pe_lcomm (int needs_align)
1173 {
1174   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1175 }
1176 #endif
1177
1178 const pseudo_typeS md_pseudo_table[] =
1179 {
1180 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1181   {"align", s_align_bytes, 0},
1182 #else
1183   {"align", s_align_ptwo, 0},
1184 #endif
1185   {"arch", set_cpu_arch, 0},
1186 #ifndef I386COFF
1187   {"bss", s_bss, 0},
1188 #else
1189   {"lcomm", pe_lcomm, 1},
1190 #endif
1191   {"ffloat", float_cons, 'f'},
1192   {"dfloat", float_cons, 'd'},
1193   {"tfloat", float_cons, 'x'},
1194   {"hfloat", float_cons, 'h'},
1195   {"bfloat16", float_cons, 'b'},
1196   {"value", cons, 2},
1197   {"slong", signed_cons, 4},
1198   {"noopt", s_ignore, 0},
1199   {"optim", s_ignore, 0},
1200   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1201   {"code16", set_code_flag, CODE_16BIT},
1202   {"code32", set_code_flag, CODE_32BIT},
1203 #ifdef BFD64
1204   {"code64", set_code_flag, CODE_64BIT},
1205 #endif
1206   {"intel_syntax", set_intel_syntax, 1},
1207   {"att_syntax", set_intel_syntax, 0},
1208   {"intel_mnemonic", set_intel_mnemonic, 1},
1209   {"att_mnemonic", set_intel_mnemonic, 0},
1210   {"allow_index_reg", set_allow_index_reg, 1},
1211   {"disallow_index_reg", set_allow_index_reg, 0},
1212   {"sse_check", set_check, 0},
1213   {"operand_check", set_check, 1},
1214 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1215   {"largecomm", handle_large_common, 0},
1216 #else
1217   {"file", dwarf2_directive_file, 0},
1218   {"loc", dwarf2_directive_loc, 0},
1219   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1220 #endif
1221 #ifdef TE_PE
1222   {"secrel32", pe_directive_secrel, 0},
1223   {"secidx", pe_directive_secidx, 0},
1224 #endif
1225   {0, 0, 0}
1226 };
1227
1228 /* For interface with expression ().  */
1229 extern char *input_line_pointer;
1230
1231 /* Hash table for instruction mnemonic lookup.  */
1232 static htab_t op_hash;
1233
1234 /* Hash table for register lookup.  */
1235 static htab_t reg_hash;
1236 \f
1237   /* Various efficient no-op patterns for aligning code labels.
1238      Note: Don't try to assemble the instructions in the comments.
1239      0L and 0w are not legal.  */
1240 static const unsigned char f32_1[] =
1241   {0x90};                               /* nop                  */
1242 static const unsigned char f32_2[] =
1243   {0x66,0x90};                          /* xchg %ax,%ax         */
1244 static const unsigned char f32_3[] =
1245   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1246 static const unsigned char f32_4[] =
1247   {0x8d,0x74,0x26,0x00};                /* leal 0(%esi,1),%esi  */
1248 static const unsigned char f32_6[] =
1249   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1250 static const unsigned char f32_7[] =
1251   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
1252 static const unsigned char f16_3[] =
1253   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1254 static const unsigned char f16_4[] =
1255   {0x8d,0xb4,0x00,0x00};                /* lea 0W(%si),%si      */
1256 static const unsigned char jump_disp8[] =
1257   {0xeb};                               /* jmp disp8           */
1258 static const unsigned char jump32_disp32[] =
1259   {0xe9};                               /* jmp disp32          */
1260 static const unsigned char jump16_disp32[] =
1261   {0x66,0xe9};                          /* jmp disp32          */
1262 /* 32-bit NOPs patterns.  */
1263 static const unsigned char *const f32_patt[] = {
1264   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1265 };
1266 /* 16-bit NOPs patterns.  */
1267 static const unsigned char *const f16_patt[] = {
1268   f32_1, f32_2, f16_3, f16_4
1269 };
1270 /* nopl (%[re]ax) */
1271 static const unsigned char alt_3[] =
1272   {0x0f,0x1f,0x00};
1273 /* nopl 0(%[re]ax) */
1274 static const unsigned char alt_4[] =
1275   {0x0f,0x1f,0x40,0x00};
1276 /* nopl 0(%[re]ax,%[re]ax,1) */
1277 static const unsigned char alt_5[] =
1278   {0x0f,0x1f,0x44,0x00,0x00};
1279 /* nopw 0(%[re]ax,%[re]ax,1) */
1280 static const unsigned char alt_6[] =
1281   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1282 /* nopl 0L(%[re]ax) */
1283 static const unsigned char alt_7[] =
1284   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1285 /* nopl 0L(%[re]ax,%[re]ax,1) */
1286 static const unsigned char alt_8[] =
1287   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1288 /* nopw 0L(%[re]ax,%[re]ax,1) */
1289 static const unsigned char alt_9[] =
1290   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1291 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1292 static const unsigned char alt_10[] =
1293   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1294 /* data16 nopw %cs:0L(%eax,%eax,1) */
1295 static const unsigned char alt_11[] =
1296   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1297 /* 32-bit and 64-bit NOPs patterns.  */
1298 static const unsigned char *const alt_patt[] = {
1299   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1300   alt_9, alt_10, alt_11
1301 };
1302
1303 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1304    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1305
1306 static void
1307 i386_output_nops (char *where, const unsigned char *const *patt,
1308                   int count, int max_single_nop_size)
1309
1310 {
1311   /* Place the longer NOP first.  */
1312   int last;
1313   int offset;
1314   const unsigned char *nops;
1315
1316   if (max_single_nop_size < 1)
1317     {
1318       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1319                 max_single_nop_size);
1320       return;
1321     }
1322
1323   nops = patt[max_single_nop_size - 1];
1324
1325   /* Use the smaller one if the requsted one isn't available.  */
1326   if (nops == NULL)
1327     {
1328       max_single_nop_size--;
1329       nops = patt[max_single_nop_size - 1];
1330     }
1331
1332   last = count % max_single_nop_size;
1333
1334   count -= last;
1335   for (offset = 0; offset < count; offset += max_single_nop_size)
1336     memcpy (where + offset, nops, max_single_nop_size);
1337
1338   if (last)
1339     {
1340       nops = patt[last - 1];
1341       if (nops == NULL)
1342         {
1343           /* Use the smaller one plus one-byte NOP if the needed one
1344              isn't available.  */
1345           last--;
1346           nops = patt[last - 1];
1347           memcpy (where + offset, nops, last);
1348           where[offset + last] = *patt[0];
1349         }
1350       else
1351         memcpy (where + offset, nops, last);
1352     }
1353 }
1354
1355 static INLINE int
1356 fits_in_imm7 (offsetT num)
1357 {
1358   return (num & 0x7f) == num;
1359 }
1360
1361 static INLINE int
1362 fits_in_imm31 (offsetT num)
1363 {
1364   return (num & 0x7fffffff) == num;
1365 }
1366
1367 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1368    single NOP instruction LIMIT.  */
1369
1370 void
1371 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1372 {
1373   const unsigned char *const *patt = NULL;
1374   int max_single_nop_size;
1375   /* Maximum number of NOPs before switching to jump over NOPs.  */
1376   int max_number_of_nops;
1377
1378   switch (fragP->fr_type)
1379     {
1380     case rs_fill_nop:
1381     case rs_align_code:
1382       break;
1383     case rs_machine_dependent:
1384       /* Allow NOP padding for jumps and calls.  */
1385       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1386           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1387         break;
1388       /* Fall through.  */
1389     default:
1390       return;
1391     }
1392
1393   /* We need to decide which NOP sequence to use for 32bit and
1394      64bit. When -mtune= is used:
1395
1396      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1397      PROCESSOR_GENERIC32, f32_patt will be used.
1398      2. For the rest, alt_patt will be used.
1399
1400      When -mtune= isn't used, alt_patt will be used if
1401      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1402      be used.
1403
1404      When -march= or .arch is used, we can't use anything beyond
1405      cpu_arch_isa_flags.   */
1406
1407   if (flag_code == CODE_16BIT)
1408     {
1409       patt = f16_patt;
1410       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1411       /* Limit number of NOPs to 2 in 16-bit mode.  */
1412       max_number_of_nops = 2;
1413     }
1414   else
1415     {
1416       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1417         {
1418           /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1419           switch (cpu_arch_tune)
1420             {
1421             case PROCESSOR_UNKNOWN:
1422               /* We use cpu_arch_isa_flags to check if we SHOULD
1423                  optimize with nops.  */
1424               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1425                 patt = alt_patt;
1426               else
1427                 patt = f32_patt;
1428               break;
1429             case PROCESSOR_PENTIUM4:
1430             case PROCESSOR_NOCONA:
1431             case PROCESSOR_CORE:
1432             case PROCESSOR_CORE2:
1433             case PROCESSOR_COREI7:
1434             case PROCESSOR_GENERIC64:
1435             case PROCESSOR_K6:
1436             case PROCESSOR_ATHLON:
1437             case PROCESSOR_K8:
1438             case PROCESSOR_AMDFAM10:
1439             case PROCESSOR_BD:
1440             case PROCESSOR_ZNVER:
1441             case PROCESSOR_BT:
1442               patt = alt_patt;
1443               break;
1444             case PROCESSOR_I386:
1445             case PROCESSOR_I486:
1446             case PROCESSOR_PENTIUM:
1447             case PROCESSOR_PENTIUMPRO:
1448             case PROCESSOR_IAMCU:
1449             case PROCESSOR_GENERIC32:
1450               patt = f32_patt;
1451               break;
1452             case PROCESSOR_NONE:
1453               abort ();
1454             }
1455         }
1456       else
1457         {
1458           switch (fragP->tc_frag_data.tune)
1459             {
1460             case PROCESSOR_UNKNOWN:
1461               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1462                  PROCESSOR_UNKNOWN.  */
1463               abort ();
1464               break;
1465
1466             case PROCESSOR_I386:
1467             case PROCESSOR_I486:
1468             case PROCESSOR_PENTIUM:
1469             case PROCESSOR_IAMCU:
1470             case PROCESSOR_K6:
1471             case PROCESSOR_ATHLON:
1472             case PROCESSOR_K8:
1473             case PROCESSOR_AMDFAM10:
1474             case PROCESSOR_BD:
1475             case PROCESSOR_ZNVER:
1476             case PROCESSOR_BT:
1477             case PROCESSOR_GENERIC32:
1478               /* We use cpu_arch_isa_flags to check if we CAN optimize
1479                  with nops.  */
1480               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1481                 patt = alt_patt;
1482               else
1483                 patt = f32_patt;
1484               break;
1485             case PROCESSOR_PENTIUMPRO:
1486             case PROCESSOR_PENTIUM4:
1487             case PROCESSOR_NOCONA:
1488             case PROCESSOR_CORE:
1489             case PROCESSOR_CORE2:
1490             case PROCESSOR_COREI7:
1491               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1492                 patt = alt_patt;
1493               else
1494                 patt = f32_patt;
1495               break;
1496             case PROCESSOR_GENERIC64:
1497               patt = alt_patt;
1498               break;
1499             case PROCESSOR_NONE:
1500               abort ();
1501             }
1502         }
1503
1504       if (patt == f32_patt)
1505         {
1506           max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1507           /* Limit number of NOPs to 2 for older processors.  */
1508           max_number_of_nops = 2;
1509         }
1510       else
1511         {
1512           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1513           /* Limit number of NOPs to 7 for newer processors.  */
1514           max_number_of_nops = 7;
1515         }
1516     }
1517
1518   if (limit == 0)
1519     limit = max_single_nop_size;
1520
1521   if (fragP->fr_type == rs_fill_nop)
1522     {
1523       /* Output NOPs for .nop directive.  */
1524       if (limit > max_single_nop_size)
1525         {
1526           as_bad_where (fragP->fr_file, fragP->fr_line,
1527                         _("invalid single nop size: %d "
1528                           "(expect within [0, %d])"),
1529                         limit, max_single_nop_size);
1530           return;
1531         }
1532     }
1533   else if (fragP->fr_type != rs_machine_dependent)
1534     fragP->fr_var = count;
1535
1536   if ((count / max_single_nop_size) > max_number_of_nops)
1537     {
1538       /* Generate jump over NOPs.  */
1539       offsetT disp = count - 2;
1540       if (fits_in_imm7 (disp))
1541         {
1542           /* Use "jmp disp8" if possible.  */
1543           count = disp;
1544           where[0] = jump_disp8[0];
1545           where[1] = count;
1546           where += 2;
1547         }
1548       else
1549         {
1550           unsigned int size_of_jump;
1551
1552           if (flag_code == CODE_16BIT)
1553             {
1554               where[0] = jump16_disp32[0];
1555               where[1] = jump16_disp32[1];
1556               size_of_jump = 2;
1557             }
1558           else
1559             {
1560               where[0] = jump32_disp32[0];
1561               size_of_jump = 1;
1562             }
1563
1564           count -= size_of_jump + 4;
1565           if (!fits_in_imm31 (count))
1566             {
1567               as_bad_where (fragP->fr_file, fragP->fr_line,
1568                             _("jump over nop padding out of range"));
1569               return;
1570             }
1571
1572           md_number_to_chars (where + size_of_jump, count, 4);
1573           where += size_of_jump + 4;
1574         }
1575     }
1576
1577   /* Generate multiple NOPs.  */
1578   i386_output_nops (where, patt, count, limit);
1579 }
1580
1581 static INLINE int
1582 operand_type_all_zero (const union i386_operand_type *x)
1583 {
1584   switch (ARRAY_SIZE(x->array))
1585     {
1586     case 3:
1587       if (x->array[2])
1588         return 0;
1589       /* Fall through.  */
1590     case 2:
1591       if (x->array[1])
1592         return 0;
1593       /* Fall through.  */
1594     case 1:
1595       return !x->array[0];
1596     default:
1597       abort ();
1598     }
1599 }
1600
1601 static INLINE void
1602 operand_type_set (union i386_operand_type *x, unsigned int v)
1603 {
1604   switch (ARRAY_SIZE(x->array))
1605     {
1606     case 3:
1607       x->array[2] = v;
1608       /* Fall through.  */
1609     case 2:
1610       x->array[1] = v;
1611       /* Fall through.  */
1612     case 1:
1613       x->array[0] = v;
1614       /* Fall through.  */
1615       break;
1616     default:
1617       abort ();
1618     }
1619
1620   x->bitfield.class = ClassNone;
1621   x->bitfield.instance = InstanceNone;
1622 }
1623
1624 static INLINE int
1625 operand_type_equal (const union i386_operand_type *x,
1626                     const union i386_operand_type *y)
1627 {
1628   switch (ARRAY_SIZE(x->array))
1629     {
1630     case 3:
1631       if (x->array[2] != y->array[2])
1632         return 0;
1633       /* Fall through.  */
1634     case 2:
1635       if (x->array[1] != y->array[1])
1636         return 0;
1637       /* Fall through.  */
1638     case 1:
1639       return x->array[0] == y->array[0];
1640       break;
1641     default:
1642       abort ();
1643     }
1644 }
1645
1646 static INLINE int
1647 cpu_flags_all_zero (const union i386_cpu_flags *x)
1648 {
1649   switch (ARRAY_SIZE(x->array))
1650     {
1651     case 5:
1652       if (x->array[4])
1653         return 0;
1654       /* Fall through.  */
1655     case 4:
1656       if (x->array[3])
1657         return 0;
1658       /* Fall through.  */
1659     case 3:
1660       if (x->array[2])
1661         return 0;
1662       /* Fall through.  */
1663     case 2:
1664       if (x->array[1])
1665         return 0;
1666       /* Fall through.  */
1667     case 1:
1668       return !x->array[0];
1669     default:
1670       abort ();
1671     }
1672 }
1673
1674 static INLINE int
1675 cpu_flags_equal (const union i386_cpu_flags *x,
1676                  const union i386_cpu_flags *y)
1677 {
1678   switch (ARRAY_SIZE(x->array))
1679     {
1680     case 5:
1681       if (x->array[4] != y->array[4])
1682         return 0;
1683       /* Fall through.  */
1684     case 4:
1685       if (x->array[3] != y->array[3])
1686         return 0;
1687       /* Fall through.  */
1688     case 3:
1689       if (x->array[2] != y->array[2])
1690         return 0;
1691       /* Fall through.  */
1692     case 2:
1693       if (x->array[1] != y->array[1])
1694         return 0;
1695       /* Fall through.  */
1696     case 1:
1697       return x->array[0] == y->array[0];
1698       break;
1699     default:
1700       abort ();
1701     }
1702 }
1703
1704 static INLINE int
1705 cpu_flags_check_cpu64 (i386_cpu_flags f)
1706 {
1707   return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
1708            || (flag_code != CODE_64BIT && f.bitfield.cpu64));
1709 }
1710
1711 static INLINE i386_cpu_flags
1712 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1713 {
1714   switch (ARRAY_SIZE (x.array))
1715     {
1716     case 5:
1717       x.array [4] &= y.array [4];
1718       /* Fall through.  */
1719     case 4:
1720       x.array [3] &= y.array [3];
1721       /* Fall through.  */
1722     case 3:
1723       x.array [2] &= y.array [2];
1724       /* Fall through.  */
1725     case 2:
1726       x.array [1] &= y.array [1];
1727       /* Fall through.  */
1728     case 1:
1729       x.array [0] &= y.array [0];
1730       break;
1731     default:
1732       abort ();
1733     }
1734   return x;
1735 }
1736
1737 static INLINE i386_cpu_flags
1738 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1739 {
1740   switch (ARRAY_SIZE (x.array))
1741     {
1742     case 5:
1743       x.array [4] |= y.array [4];
1744       /* Fall through.  */
1745     case 4:
1746       x.array [3] |= y.array [3];
1747       /* Fall through.  */
1748     case 3:
1749       x.array [2] |= y.array [2];
1750       /* Fall through.  */
1751     case 2:
1752       x.array [1] |= y.array [1];
1753       /* Fall through.  */
1754     case 1:
1755       x.array [0] |= y.array [0];
1756       break;
1757     default:
1758       abort ();
1759     }
1760   return x;
1761 }
1762
1763 static INLINE i386_cpu_flags
1764 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1765 {
1766   switch (ARRAY_SIZE (x.array))
1767     {
1768     case 5:
1769       x.array [4] &= ~y.array [4];
1770       /* Fall through.  */
1771     case 4:
1772       x.array [3] &= ~y.array [3];
1773       /* Fall through.  */
1774     case 3:
1775       x.array [2] &= ~y.array [2];
1776       /* Fall through.  */
1777     case 2:
1778       x.array [1] &= ~y.array [1];
1779       /* Fall through.  */
1780     case 1:
1781       x.array [0] &= ~y.array [0];
1782       break;
1783     default:
1784       abort ();
1785     }
1786   return x;
1787 }
1788
1789 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1790
1791 #define CPU_FLAGS_ARCH_MATCH            0x1
1792 #define CPU_FLAGS_64BIT_MATCH           0x2
1793
1794 #define CPU_FLAGS_PERFECT_MATCH \
1795   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1796
1797 /* Return CPU flags match bits. */
1798
1799 static int
1800 cpu_flags_match (const insn_template *t)
1801 {
1802   i386_cpu_flags x = t->cpu_flags;
1803   int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
1804
1805   x.bitfield.cpu64 = 0;
1806   x.bitfield.cpuno64 = 0;
1807
1808   if (cpu_flags_all_zero (&x))
1809     {
1810       /* This instruction is available on all archs.  */
1811       match |= CPU_FLAGS_ARCH_MATCH;
1812     }
1813   else
1814     {
1815       /* This instruction is available only on some archs.  */
1816       i386_cpu_flags cpu = cpu_arch_flags;
1817
1818       /* AVX512VL is no standalone feature - match it and then strip it.  */
1819       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1820         return match;
1821       x.bitfield.cpuavx512vl = 0;
1822
1823       /* AVX and AVX2 present at the same time express an operand size
1824          dependency - strip AVX2 for the purposes here.  The operand size
1825          dependent check occurs in check_vecOperands().  */
1826       if (x.bitfield.cpuavx && x.bitfield.cpuavx2)
1827         x.bitfield.cpuavx2 = 0;
1828
1829       cpu = cpu_flags_and (x, cpu);
1830       if (!cpu_flags_all_zero (&cpu))
1831         {
1832           if (x.bitfield.cpuavx)
1833             {
1834               /* We need to check a few extra flags with AVX.  */
1835               if (cpu.bitfield.cpuavx
1836                   && (!t->opcode_modifier.sse2avx
1837                       || (sse2avx && !i.prefix[DATA_PREFIX]))
1838                   && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1839                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1840                   && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
1841                 match |= CPU_FLAGS_ARCH_MATCH;
1842             }
1843           else if (x.bitfield.cpuavx512f)
1844             {
1845               /* We need to check a few extra flags with AVX512F.  */
1846               if (cpu.bitfield.cpuavx512f
1847                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1848                   && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1849                   && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1850                 match |= CPU_FLAGS_ARCH_MATCH;
1851             }
1852           else
1853             match |= CPU_FLAGS_ARCH_MATCH;
1854         }
1855     }
1856   return match;
1857 }
1858
1859 static INLINE i386_operand_type
1860 operand_type_and (i386_operand_type x, i386_operand_type y)
1861 {
1862   if (x.bitfield.class != y.bitfield.class)
1863     x.bitfield.class = ClassNone;
1864   if (x.bitfield.instance != y.bitfield.instance)
1865     x.bitfield.instance = InstanceNone;
1866
1867   switch (ARRAY_SIZE (x.array))
1868     {
1869     case 3:
1870       x.array [2] &= y.array [2];
1871       /* Fall through.  */
1872     case 2:
1873       x.array [1] &= y.array [1];
1874       /* Fall through.  */
1875     case 1:
1876       x.array [0] &= y.array [0];
1877       break;
1878     default:
1879       abort ();
1880     }
1881   return x;
1882 }
1883
1884 static INLINE i386_operand_type
1885 operand_type_and_not (i386_operand_type x, i386_operand_type y)
1886 {
1887   gas_assert (y.bitfield.class == ClassNone);
1888   gas_assert (y.bitfield.instance == InstanceNone);
1889
1890   switch (ARRAY_SIZE (x.array))
1891     {
1892     case 3:
1893       x.array [2] &= ~y.array [2];
1894       /* Fall through.  */
1895     case 2:
1896       x.array [1] &= ~y.array [1];
1897       /* Fall through.  */
1898     case 1:
1899       x.array [0] &= ~y.array [0];
1900       break;
1901     default:
1902       abort ();
1903     }
1904   return x;
1905 }
1906
1907 static INLINE i386_operand_type
1908 operand_type_or (i386_operand_type x, i386_operand_type y)
1909 {
1910   gas_assert (x.bitfield.class == ClassNone ||
1911               y.bitfield.class == ClassNone ||
1912               x.bitfield.class == y.bitfield.class);
1913   gas_assert (x.bitfield.instance == InstanceNone ||
1914               y.bitfield.instance == InstanceNone ||
1915               x.bitfield.instance == y.bitfield.instance);
1916
1917   switch (ARRAY_SIZE (x.array))
1918     {
1919     case 3:
1920       x.array [2] |= y.array [2];
1921       /* Fall through.  */
1922     case 2:
1923       x.array [1] |= y.array [1];
1924       /* Fall through.  */
1925     case 1:
1926       x.array [0] |= y.array [0];
1927       break;
1928     default:
1929       abort ();
1930     }
1931   return x;
1932 }
1933
1934 static INLINE i386_operand_type
1935 operand_type_xor (i386_operand_type x, i386_operand_type y)
1936 {
1937   gas_assert (y.bitfield.class == ClassNone);
1938   gas_assert (y.bitfield.instance == InstanceNone);
1939
1940   switch (ARRAY_SIZE (x.array))
1941     {
1942     case 3:
1943       x.array [2] ^= y.array [2];
1944       /* Fall through.  */
1945     case 2:
1946       x.array [1] ^= y.array [1];
1947       /* Fall through.  */
1948     case 1:
1949       x.array [0] ^= y.array [0];
1950       break;
1951     default:
1952       abort ();
1953     }
1954   return x;
1955 }
1956
1957 static const i386_operand_type anydisp = {
1958   .bitfield = { .disp8 = 1, .disp16 = 1, .disp32 = 1, .disp64 = 1 }
1959 };
1960
1961 enum operand_type
1962 {
1963   reg,
1964   imm,
1965   disp,
1966   anymem
1967 };
1968
1969 static INLINE int
1970 operand_type_check (i386_operand_type t, enum operand_type c)
1971 {
1972   switch (c)
1973     {
1974     case reg:
1975       return t.bitfield.class == Reg;
1976
1977     case imm:
1978       return (t.bitfield.imm8
1979               || t.bitfield.imm8s
1980               || t.bitfield.imm16
1981               || t.bitfield.imm32
1982               || t.bitfield.imm32s
1983               || t.bitfield.imm64);
1984
1985     case disp:
1986       return (t.bitfield.disp8
1987               || t.bitfield.disp16
1988               || t.bitfield.disp32
1989               || t.bitfield.disp64);
1990
1991     case anymem:
1992       return (t.bitfield.disp8
1993               || t.bitfield.disp16
1994               || t.bitfield.disp32
1995               || t.bitfield.disp64
1996               || t.bitfield.baseindex);
1997
1998     default:
1999       abort ();
2000     }
2001
2002   return 0;
2003 }
2004
2005 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2006    between operand GIVEN and opeand WANTED for instruction template T.  */
2007
2008 static INLINE int
2009 match_operand_size (const insn_template *t, unsigned int wanted,
2010                     unsigned int given)
2011 {
2012   return !((i.types[given].bitfield.byte
2013             && !t->operand_types[wanted].bitfield.byte)
2014            || (i.types[given].bitfield.word
2015                && !t->operand_types[wanted].bitfield.word)
2016            || (i.types[given].bitfield.dword
2017                && !t->operand_types[wanted].bitfield.dword)
2018            || (i.types[given].bitfield.qword
2019                && (!t->operand_types[wanted].bitfield.qword
2020                    /* Don't allow 64-bit (memory) operands outside of 64-bit
2021                       mode, when they're used where a 64-bit GPR could also
2022                       be used.  Checking is needed for Intel Syntax only.  */
2023                    || (intel_syntax
2024                        && flag_code != CODE_64BIT
2025                        && (t->operand_types[wanted].bitfield.class == Reg
2026                            || t->operand_types[wanted].bitfield.class == Accum
2027                            || t->opcode_modifier.isstring))))
2028            || (i.types[given].bitfield.tbyte
2029                && !t->operand_types[wanted].bitfield.tbyte));
2030 }
2031
2032 /* Return 1 if there is no conflict in SIMD register between operand
2033    GIVEN and opeand WANTED for instruction template T.  */
2034
2035 static INLINE int
2036 match_simd_size (const insn_template *t, unsigned int wanted,
2037                  unsigned int given)
2038 {
2039   return !((i.types[given].bitfield.xmmword
2040             && !t->operand_types[wanted].bitfield.xmmword)
2041            || (i.types[given].bitfield.ymmword
2042                && !t->operand_types[wanted].bitfield.ymmword)
2043            || (i.types[given].bitfield.zmmword
2044                && !t->operand_types[wanted].bitfield.zmmword)
2045            || (i.types[given].bitfield.tmmword
2046                && !t->operand_types[wanted].bitfield.tmmword));
2047 }
2048
2049 /* Return 1 if there is no conflict in any size between operand GIVEN
2050    and opeand WANTED for instruction template T.  */
2051
2052 static INLINE int
2053 match_mem_size (const insn_template *t, unsigned int wanted,
2054                 unsigned int given)
2055 {
2056   return (match_operand_size (t, wanted, given)
2057           && !((i.types[given].bitfield.unspecified
2058                 && !i.broadcast.type
2059                 && !i.broadcast.bytes
2060                 && !t->operand_types[wanted].bitfield.unspecified)
2061                || (i.types[given].bitfield.fword
2062                    && !t->operand_types[wanted].bitfield.fword)
2063                /* For scalar opcode templates to allow register and memory
2064                   operands at the same time, some special casing is needed
2065                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2066                   down-conversion vpmov*.  */
2067                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2068                     && t->operand_types[wanted].bitfield.byte
2069                        + t->operand_types[wanted].bitfield.word
2070                        + t->operand_types[wanted].bitfield.dword
2071                        + t->operand_types[wanted].bitfield.qword
2072                        > !!t->opcode_modifier.broadcast)
2073                    ? (i.types[given].bitfield.xmmword
2074                       || i.types[given].bitfield.ymmword
2075                       || i.types[given].bitfield.zmmword)
2076                    : !match_simd_size(t, wanted, given))));
2077 }
2078
2079 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2080    operands for instruction template T, and it has MATCH_REVERSE set if there
2081    is no size conflict on any operands for the template with operands reversed
2082    (and the template allows for reversing in the first place).  */
2083
2084 #define MATCH_STRAIGHT 1
2085 #define MATCH_REVERSE  2
2086
2087 static INLINE unsigned int
2088 operand_size_match (const insn_template *t)
2089 {
2090   unsigned int j, match = MATCH_STRAIGHT;
2091
2092   /* Don't check non-absolute jump instructions.  */
2093   if (t->opcode_modifier.jump
2094       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2095     return match;
2096
2097   /* Check memory and accumulator operand size.  */
2098   for (j = 0; j < i.operands; j++)
2099     {
2100       if (i.types[j].bitfield.class != Reg
2101           && i.types[j].bitfield.class != RegSIMD
2102           && t->opcode_modifier.operandconstraint == ANY_SIZE)
2103         continue;
2104
2105       if (t->operand_types[j].bitfield.class == Reg
2106           && !match_operand_size (t, j, j))
2107         {
2108           match = 0;
2109           break;
2110         }
2111
2112       if (t->operand_types[j].bitfield.class == RegSIMD
2113           && !match_simd_size (t, j, j))
2114         {
2115           match = 0;
2116           break;
2117         }
2118
2119       if (t->operand_types[j].bitfield.instance == Accum
2120           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2121         {
2122           match = 0;
2123           break;
2124         }
2125
2126       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2127         {
2128           match = 0;
2129           break;
2130         }
2131     }
2132
2133   if (!t->opcode_modifier.d)
2134     return match;
2135
2136   /* Check reverse.  */
2137   gas_assert ((i.operands >= 2 && i.operands <= 3)
2138               || t->opcode_modifier.vexsources);
2139
2140   for (j = 0; j < i.operands; j++)
2141     {
2142       unsigned int given = i.operands - j - 1;
2143
2144       /* For 4- and 5-operand insns VEX.W controls just the first two
2145          register operands.  */
2146       if (t->opcode_modifier.vexsources)
2147         given = j < 2 ? 1 - j : j;
2148
2149       if (t->operand_types[j].bitfield.class == Reg
2150           && !match_operand_size (t, j, given))
2151         return match;
2152
2153       if (t->operand_types[j].bitfield.class == RegSIMD
2154           && !match_simd_size (t, j, given))
2155         return match;
2156
2157       if (t->operand_types[j].bitfield.instance == Accum
2158           && (!match_operand_size (t, j, given)
2159               || !match_simd_size (t, j, given)))
2160         return match;
2161
2162       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2163         return match;
2164     }
2165
2166   return match | MATCH_REVERSE;
2167 }
2168
2169 static INLINE int
2170 operand_type_match (i386_operand_type overlap,
2171                     i386_operand_type given)
2172 {
2173   i386_operand_type temp = overlap;
2174
2175   temp.bitfield.unspecified = 0;
2176   temp.bitfield.byte = 0;
2177   temp.bitfield.word = 0;
2178   temp.bitfield.dword = 0;
2179   temp.bitfield.fword = 0;
2180   temp.bitfield.qword = 0;
2181   temp.bitfield.tbyte = 0;
2182   temp.bitfield.xmmword = 0;
2183   temp.bitfield.ymmword = 0;
2184   temp.bitfield.zmmword = 0;
2185   temp.bitfield.tmmword = 0;
2186   if (operand_type_all_zero (&temp))
2187     goto mismatch;
2188
2189   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2190     return 1;
2191
2192  mismatch:
2193   i.error = operand_type_mismatch;
2194   return 0;
2195 }
2196
2197 /* If given types g0 and g1 are registers they must be of the same type
2198    unless the expected operand type register overlap is null.
2199    Intel syntax sized memory operands are also checked here.  */
2200
2201 static INLINE int
2202 operand_type_register_match (i386_operand_type g0,
2203                              i386_operand_type t0,
2204                              i386_operand_type g1,
2205                              i386_operand_type t1)
2206 {
2207   if (g0.bitfield.class != Reg
2208       && g0.bitfield.class != RegSIMD
2209       && (g0.bitfield.unspecified
2210           || !operand_type_check (g0, anymem)))
2211     return 1;
2212
2213   if (g1.bitfield.class != Reg
2214       && g1.bitfield.class != RegSIMD
2215       && (g1.bitfield.unspecified
2216           || !operand_type_check (g1, anymem)))
2217     return 1;
2218
2219   if (g0.bitfield.byte == g1.bitfield.byte
2220       && g0.bitfield.word == g1.bitfield.word
2221       && g0.bitfield.dword == g1.bitfield.dword
2222       && g0.bitfield.qword == g1.bitfield.qword
2223       && g0.bitfield.xmmword == g1.bitfield.xmmword
2224       && g0.bitfield.ymmword == g1.bitfield.ymmword
2225       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2226     return 1;
2227
2228   /* If expectations overlap in no more than a single size, all is fine. */
2229   g0 = operand_type_and (t0, t1);
2230   if (g0.bitfield.byte
2231       + g0.bitfield.word
2232       + g0.bitfield.dword
2233       + g0.bitfield.qword
2234       + g0.bitfield.xmmword
2235       + g0.bitfield.ymmword
2236       + g0.bitfield.zmmword <= 1)
2237     return 1;
2238
2239   i.error = register_type_mismatch;
2240
2241   return 0;
2242 }
2243
2244 static INLINE unsigned int
2245 register_number (const reg_entry *r)
2246 {
2247   unsigned int nr = r->reg_num;
2248
2249   if (r->reg_flags & RegRex)
2250     nr += 8;
2251
2252   if (r->reg_flags & RegVRex)
2253     nr += 16;
2254
2255   return nr;
2256 }
2257
2258 static INLINE unsigned int
2259 mode_from_disp_size (i386_operand_type t)
2260 {
2261   if (t.bitfield.disp8)
2262     return 1;
2263   else if (t.bitfield.disp16
2264            || t.bitfield.disp32)
2265     return 2;
2266   else
2267     return 0;
2268 }
2269
2270 static INLINE int
2271 fits_in_signed_byte (addressT num)
2272 {
2273   return num + 0x80 <= 0xff;
2274 }
2275
2276 static INLINE int
2277 fits_in_unsigned_byte (addressT num)
2278 {
2279   return num <= 0xff;
2280 }
2281
2282 static INLINE int
2283 fits_in_unsigned_word (addressT num)
2284 {
2285   return num <= 0xffff;
2286 }
2287
2288 static INLINE int
2289 fits_in_signed_word (addressT num)
2290 {
2291   return num + 0x8000 <= 0xffff;
2292 }
2293
2294 static INLINE int
2295 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2296 {
2297 #ifndef BFD64
2298   return 1;
2299 #else
2300   return num + 0x80000000 <= 0xffffffff;
2301 #endif
2302 }                               /* fits_in_signed_long() */
2303
2304 static INLINE int
2305 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2306 {
2307 #ifndef BFD64
2308   return 1;
2309 #else
2310   return num <= 0xffffffff;
2311 #endif
2312 }                               /* fits_in_unsigned_long() */
2313
2314 static INLINE valueT extend_to_32bit_address (addressT num)
2315 {
2316 #ifdef BFD64
2317   if (fits_in_unsigned_long(num))
2318     return (num ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2319
2320   if (!fits_in_signed_long (num))
2321     return num & 0xffffffff;
2322 #endif
2323
2324   return num;
2325 }
2326
2327 static INLINE int
2328 fits_in_disp8 (offsetT num)
2329 {
2330   int shift = i.memshift;
2331   unsigned int mask;
2332
2333   if (shift == -1)
2334     abort ();
2335
2336   mask = (1 << shift) - 1;
2337
2338   /* Return 0 if NUM isn't properly aligned.  */
2339   if ((num & mask))
2340     return 0;
2341
2342   /* Check if NUM will fit in 8bit after shift.  */
2343   return fits_in_signed_byte (num >> shift);
2344 }
2345
2346 static INLINE int
2347 fits_in_imm4 (offsetT num)
2348 {
2349   return (num & 0xf) == num;
2350 }
2351
2352 static i386_operand_type
2353 smallest_imm_type (offsetT num)
2354 {
2355   i386_operand_type t;
2356
2357   operand_type_set (&t, 0);
2358   t.bitfield.imm64 = 1;
2359
2360   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2361     {
2362       /* This code is disabled on the 486 because all the Imm1 forms
2363          in the opcode table are slower on the i486.  They're the
2364          versions with the implicitly specified single-position
2365          displacement, which has another syntax if you really want to
2366          use that form.  */
2367       t.bitfield.imm1 = 1;
2368       t.bitfield.imm8 = 1;
2369       t.bitfield.imm8s = 1;
2370       t.bitfield.imm16 = 1;
2371       t.bitfield.imm32 = 1;
2372       t.bitfield.imm32s = 1;
2373     }
2374   else if (fits_in_signed_byte (num))
2375     {
2376       t.bitfield.imm8 = 1;
2377       t.bitfield.imm8s = 1;
2378       t.bitfield.imm16 = 1;
2379       t.bitfield.imm32 = 1;
2380       t.bitfield.imm32s = 1;
2381     }
2382   else if (fits_in_unsigned_byte (num))
2383     {
2384       t.bitfield.imm8 = 1;
2385       t.bitfield.imm16 = 1;
2386       t.bitfield.imm32 = 1;
2387       t.bitfield.imm32s = 1;
2388     }
2389   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2390     {
2391       t.bitfield.imm16 = 1;
2392       t.bitfield.imm32 = 1;
2393       t.bitfield.imm32s = 1;
2394     }
2395   else if (fits_in_signed_long (num))
2396     {
2397       t.bitfield.imm32 = 1;
2398       t.bitfield.imm32s = 1;
2399     }
2400   else if (fits_in_unsigned_long (num))
2401     t.bitfield.imm32 = 1;
2402
2403   return t;
2404 }
2405
2406 static offsetT
2407 offset_in_range (offsetT val, int size)
2408 {
2409   addressT mask;
2410
2411   switch (size)
2412     {
2413     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2414     case 2: mask = ((addressT) 1 << 16) - 1; break;
2415 #ifdef BFD64
2416     case 4: mask = ((addressT) 1 << 32) - 1; break;
2417 #endif
2418     case sizeof (val): return val;
2419     default: abort ();
2420     }
2421
2422   if ((val & ~mask) != 0 && (-val & ~mask) != 0)
2423     as_warn (_("0x%" PRIx64 " shortened to 0x%" PRIx64),
2424              (uint64_t) val, (uint64_t) (val & mask));
2425
2426   return val & mask;
2427 }
2428
2429 static INLINE const char *insn_name (const insn_template *t)
2430 {
2431   return t->name;
2432 }
2433
2434 enum PREFIX_GROUP
2435 {
2436   PREFIX_EXIST = 0,
2437   PREFIX_LOCK,
2438   PREFIX_REP,
2439   PREFIX_DS,
2440   PREFIX_OTHER
2441 };
2442
2443 /* Returns
2444    a. PREFIX_EXIST if attempting to add a prefix where one from the
2445    same class already exists.
2446    b. PREFIX_LOCK if lock prefix is added.
2447    c. PREFIX_REP if rep/repne prefix is added.
2448    d. PREFIX_DS if ds prefix is added.
2449    e. PREFIX_OTHER if other prefix is added.
2450  */
2451
2452 static enum PREFIX_GROUP
2453 add_prefix (unsigned int prefix)
2454 {
2455   enum PREFIX_GROUP ret = PREFIX_OTHER;
2456   unsigned int q;
2457
2458   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2459       && flag_code == CODE_64BIT)
2460     {
2461       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2462           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2463           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2464           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2465         ret = PREFIX_EXIST;
2466       q = REX_PREFIX;
2467     }
2468   else
2469     {
2470       switch (prefix)
2471         {
2472         default:
2473           abort ();
2474
2475         case DS_PREFIX_OPCODE:
2476           ret = PREFIX_DS;
2477           /* Fall through.  */
2478         case CS_PREFIX_OPCODE:
2479         case ES_PREFIX_OPCODE:
2480         case FS_PREFIX_OPCODE:
2481         case GS_PREFIX_OPCODE:
2482         case SS_PREFIX_OPCODE:
2483           q = SEG_PREFIX;
2484           break;
2485
2486         case REPNE_PREFIX_OPCODE:
2487         case REPE_PREFIX_OPCODE:
2488           q = REP_PREFIX;
2489           ret = PREFIX_REP;
2490           break;
2491
2492         case LOCK_PREFIX_OPCODE:
2493           q = LOCK_PREFIX;
2494           ret = PREFIX_LOCK;
2495           break;
2496
2497         case FWAIT_OPCODE:
2498           q = WAIT_PREFIX;
2499           break;
2500
2501         case ADDR_PREFIX_OPCODE:
2502           q = ADDR_PREFIX;
2503           break;
2504
2505         case DATA_PREFIX_OPCODE:
2506           q = DATA_PREFIX;
2507           break;
2508         }
2509       if (i.prefix[q] != 0)
2510         ret = PREFIX_EXIST;
2511     }
2512
2513   if (ret)
2514     {
2515       if (!i.prefix[q])
2516         ++i.prefixes;
2517       i.prefix[q] |= prefix;
2518     }
2519   else
2520     as_bad (_("same type of prefix used twice"));
2521
2522   return ret;
2523 }
2524
2525 static void
2526 update_code_flag (int value, int check)
2527 {
2528   PRINTF_LIKE ((*as_error));
2529
2530   flag_code = (enum flag_code) value;
2531   if (flag_code == CODE_64BIT)
2532     {
2533       cpu_arch_flags.bitfield.cpu64 = 1;
2534       cpu_arch_flags.bitfield.cpuno64 = 0;
2535     }
2536   else
2537     {
2538       cpu_arch_flags.bitfield.cpu64 = 0;
2539       cpu_arch_flags.bitfield.cpuno64 = 1;
2540     }
2541   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
2542     {
2543       if (check)
2544         as_error = as_fatal;
2545       else
2546         as_error = as_bad;
2547       (*as_error) (_("64bit mode not supported on `%s'."),
2548                    cpu_arch_name ? cpu_arch_name : default_arch);
2549     }
2550   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2551     {
2552       if (check)
2553         as_error = as_fatal;
2554       else
2555         as_error = as_bad;
2556       (*as_error) (_("32bit mode not supported on `%s'."),
2557                    cpu_arch_name ? cpu_arch_name : default_arch);
2558     }
2559   stackop_size = '\0';
2560 }
2561
2562 static void
2563 set_code_flag (int value)
2564 {
2565   update_code_flag (value, 0);
2566 }
2567
2568 static void
2569 set_16bit_gcc_code_flag (int new_code_flag)
2570 {
2571   flag_code = (enum flag_code) new_code_flag;
2572   if (flag_code != CODE_16BIT)
2573     abort ();
2574   cpu_arch_flags.bitfield.cpu64 = 0;
2575   cpu_arch_flags.bitfield.cpuno64 = 1;
2576   stackop_size = LONG_MNEM_SUFFIX;
2577 }
2578
2579 static void
2580 set_intel_syntax (int syntax_flag)
2581 {
2582   /* Find out if register prefixing is specified.  */
2583   int ask_naked_reg = 0;
2584
2585   SKIP_WHITESPACE ();
2586   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2587     {
2588       char *string;
2589       int e = get_symbol_name (&string);
2590
2591       if (strcmp (string, "prefix") == 0)
2592         ask_naked_reg = 1;
2593       else if (strcmp (string, "noprefix") == 0)
2594         ask_naked_reg = -1;
2595       else
2596         as_bad (_("bad argument to syntax directive."));
2597       (void) restore_line_pointer (e);
2598     }
2599   demand_empty_rest_of_line ();
2600
2601   intel_syntax = syntax_flag;
2602
2603   if (ask_naked_reg == 0)
2604     allow_naked_reg = (intel_syntax
2605                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2606   else
2607     allow_naked_reg = (ask_naked_reg < 0);
2608
2609   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2610
2611   identifier_chars['%'] = intel_syntax && allow_naked_reg ? '%' : 0;
2612   identifier_chars['$'] = intel_syntax ? '$' : 0;
2613   register_prefix = allow_naked_reg ? "" : "%";
2614 }
2615
2616 static void
2617 set_intel_mnemonic (int mnemonic_flag)
2618 {
2619   intel_mnemonic = mnemonic_flag;
2620 }
2621
2622 static void
2623 set_allow_index_reg (int flag)
2624 {
2625   allow_index_reg = flag;
2626 }
2627
2628 static void
2629 set_check (int what)
2630 {
2631   enum check_kind *kind;
2632   const char *str;
2633
2634   if (what)
2635     {
2636       kind = &operand_check;
2637       str = "operand";
2638     }
2639   else
2640     {
2641       kind = &sse_check;
2642       str = "sse";
2643     }
2644
2645   SKIP_WHITESPACE ();
2646
2647   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2648     {
2649       char *string;
2650       int e = get_symbol_name (&string);
2651
2652       if (strcmp (string, "none") == 0)
2653         *kind = check_none;
2654       else if (strcmp (string, "warning") == 0)
2655         *kind = check_warning;
2656       else if (strcmp (string, "error") == 0)
2657         *kind = check_error;
2658       else
2659         as_bad (_("bad argument to %s_check directive."), str);
2660       (void) restore_line_pointer (e);
2661     }
2662   else
2663     as_bad (_("missing argument for %s_check directive"), str);
2664
2665   demand_empty_rest_of_line ();
2666 }
2667
2668 static void
2669 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2670                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2671 {
2672 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2673   static const char *arch;
2674
2675   /* Intel MCU is only supported on ELF.  */
2676   if (!IS_ELF)
2677     return;
2678
2679   if (!arch)
2680     {
2681       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2682          use default_arch.  */
2683       arch = cpu_arch_name;
2684       if (!arch)
2685         arch = default_arch;
2686     }
2687
2688   /* If we are targeting Intel MCU, we must enable it.  */
2689   if ((get_elf_backend_data (stdoutput)->elf_machine_code == EM_IAMCU)
2690       == new_flag.bitfield.cpuiamcu)
2691     return;
2692
2693   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2694 #endif
2695 }
2696
2697 static void
2698 extend_cpu_sub_arch_name (const char *name)
2699 {
2700   if (cpu_sub_arch_name)
2701     cpu_sub_arch_name = reconcat (cpu_sub_arch_name, cpu_sub_arch_name,
2702                                   ".", name, (const char *) NULL);
2703   else
2704     cpu_sub_arch_name = concat (".", name, (const char *) NULL);
2705 }
2706
2707 static void
2708 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2709 {
2710   typedef struct arch_stack_entry
2711   {
2712     const struct arch_stack_entry *prev;
2713     const char *name;
2714     char *sub_name;
2715     i386_cpu_flags flags;
2716     i386_cpu_flags isa_flags;
2717     enum processor_type isa;
2718     enum flag_code flag_code;
2719     char stackop_size;
2720     bool no_cond_jump_promotion;
2721   } arch_stack_entry;
2722   static const arch_stack_entry *arch_stack_top;
2723
2724   SKIP_WHITESPACE ();
2725
2726   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2727     {
2728       char *s;
2729       int e = get_symbol_name (&s);
2730       const char *string = s;
2731       unsigned int j = 0;
2732       i386_cpu_flags flags;
2733
2734       if (strcmp (string, "default") == 0)
2735         {
2736           if (strcmp (default_arch, "iamcu") == 0)
2737             string = default_arch;
2738           else
2739             {
2740               static const i386_cpu_flags cpu_unknown_flags = CPU_UNKNOWN_FLAGS;
2741
2742               cpu_arch_name = NULL;
2743               free (cpu_sub_arch_name);
2744               cpu_sub_arch_name = NULL;
2745               cpu_arch_flags = cpu_unknown_flags;
2746               if (flag_code == CODE_64BIT)
2747                 {
2748                   cpu_arch_flags.bitfield.cpu64 = 1;
2749                   cpu_arch_flags.bitfield.cpuno64 = 0;
2750                 }
2751               else
2752                 {
2753                   cpu_arch_flags.bitfield.cpu64 = 0;
2754                   cpu_arch_flags.bitfield.cpuno64 = 1;
2755                 }
2756               cpu_arch_isa = PROCESSOR_UNKNOWN;
2757               cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
2758               if (!cpu_arch_tune_set)
2759                 {
2760                   cpu_arch_tune = cpu_arch_isa;
2761                   cpu_arch_tune_flags = cpu_arch_isa_flags;
2762                 }
2763
2764               j = ARRAY_SIZE (cpu_arch) + 1;
2765             }
2766         }
2767       else if (strcmp (string, "push") == 0)
2768         {
2769           arch_stack_entry *top = XNEW (arch_stack_entry);
2770
2771           top->name = cpu_arch_name;
2772           if (cpu_sub_arch_name)
2773             top->sub_name = xstrdup (cpu_sub_arch_name);
2774           else
2775             top->sub_name = NULL;
2776           top->flags = cpu_arch_flags;
2777           top->isa = cpu_arch_isa;
2778           top->isa_flags = cpu_arch_isa_flags;
2779           top->flag_code = flag_code;
2780           top->stackop_size = stackop_size;
2781           top->no_cond_jump_promotion = no_cond_jump_promotion;
2782
2783           top->prev = arch_stack_top;
2784           arch_stack_top = top;
2785
2786           (void) restore_line_pointer (e);
2787           demand_empty_rest_of_line ();
2788           return;
2789         }
2790       else if (strcmp (string, "pop") == 0)
2791         {
2792           const arch_stack_entry *top = arch_stack_top;
2793
2794           if (!top)
2795             as_bad (_(".arch stack is empty"));
2796           else if (top->flag_code != flag_code
2797                    || top->stackop_size != stackop_size)
2798             {
2799               static const unsigned int bits[] = {
2800                 [CODE_16BIT] = 16,
2801                 [CODE_32BIT] = 32,
2802                 [CODE_64BIT] = 64,
2803               };
2804
2805               as_bad (_("this `.arch pop' requires `.code%u%s' to be in effect"),
2806                       bits[top->flag_code],
2807                       top->stackop_size == LONG_MNEM_SUFFIX ? "gcc" : "");
2808             }
2809           else
2810             {
2811               arch_stack_top = top->prev;
2812
2813               cpu_arch_name = top->name;
2814               free (cpu_sub_arch_name);
2815               cpu_sub_arch_name = top->sub_name;
2816               cpu_arch_flags = top->flags;
2817               cpu_arch_isa = top->isa;
2818               cpu_arch_isa_flags = top->isa_flags;
2819               no_cond_jump_promotion = top->no_cond_jump_promotion;
2820
2821               XDELETE (top);
2822             }
2823
2824           (void) restore_line_pointer (e);
2825           demand_empty_rest_of_line ();
2826           return;
2827         }
2828
2829       for (; j < ARRAY_SIZE (cpu_arch); j++)
2830         {
2831           if (strcmp (string + (*string == '.'), cpu_arch[j].name) == 0
2832              && (*string == '.') == (cpu_arch[j].type == PROCESSOR_NONE))
2833             {
2834               if (*string != '.')
2835                 {
2836                   check_cpu_arch_compatible (string, cpu_arch[j].enable);
2837
2838                   cpu_arch_name = cpu_arch[j].name;
2839                   free (cpu_sub_arch_name);
2840                   cpu_sub_arch_name = NULL;
2841                   cpu_arch_flags = cpu_arch[j].enable;
2842                   if (flag_code == CODE_64BIT)
2843                     {
2844                       cpu_arch_flags.bitfield.cpu64 = 1;
2845                       cpu_arch_flags.bitfield.cpuno64 = 0;
2846                     }
2847                   else
2848                     {
2849                       cpu_arch_flags.bitfield.cpu64 = 0;
2850                       cpu_arch_flags.bitfield.cpuno64 = 1;
2851                     }
2852                   cpu_arch_isa = cpu_arch[j].type;
2853                   cpu_arch_isa_flags = cpu_arch[j].enable;
2854                   if (!cpu_arch_tune_set)
2855                     {
2856                       cpu_arch_tune = cpu_arch_isa;
2857                       cpu_arch_tune_flags = cpu_arch_isa_flags;
2858                     }
2859                   pre_386_16bit_warned = false;
2860                   break;
2861                 }
2862
2863               if (cpu_flags_all_zero (&cpu_arch[j].enable))
2864                 continue;
2865
2866               flags = cpu_flags_or (cpu_arch_flags,
2867                                     cpu_arch[j].enable);
2868
2869               if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2870                 {
2871                   extend_cpu_sub_arch_name (string + 1);
2872                   cpu_arch_flags = flags;
2873                   cpu_arch_isa_flags = flags;
2874                 }
2875               else
2876                 cpu_arch_isa_flags
2877                   = cpu_flags_or (cpu_arch_isa_flags,
2878                                   cpu_arch[j].enable);
2879               (void) restore_line_pointer (e);
2880               demand_empty_rest_of_line ();
2881               return;
2882             }
2883         }
2884
2885       if (startswith (string, ".no") && j >= ARRAY_SIZE (cpu_arch))
2886         {
2887           /* Disable an ISA extension.  */
2888           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
2889             if (cpu_arch[j].type == PROCESSOR_NONE
2890                 && strcmp (string + 3, cpu_arch[j].name) == 0)
2891               {
2892                 flags = cpu_flags_and_not (cpu_arch_flags,
2893                                            cpu_arch[j].disable);
2894                 if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2895                   {
2896                     extend_cpu_sub_arch_name (string + 1);
2897                     cpu_arch_flags = flags;
2898                     cpu_arch_isa_flags = flags;
2899                   }
2900                 (void) restore_line_pointer (e);
2901                 demand_empty_rest_of_line ();
2902                 return;
2903               }
2904         }
2905
2906       if (j == ARRAY_SIZE (cpu_arch))
2907         as_bad (_("no such architecture: `%s'"), string);
2908
2909       *input_line_pointer = e;
2910     }
2911   else
2912     as_bad (_("missing cpu architecture"));
2913
2914   no_cond_jump_promotion = 0;
2915   if (*input_line_pointer == ','
2916       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
2917     {
2918       char *string;
2919       char e;
2920
2921       ++input_line_pointer;
2922       e = get_symbol_name (&string);
2923
2924       if (strcmp (string, "nojumps") == 0)
2925         no_cond_jump_promotion = 1;
2926       else if (strcmp (string, "jumps") == 0)
2927         ;
2928       else
2929         as_bad (_("no such architecture modifier: `%s'"), string);
2930
2931       (void) restore_line_pointer (e);
2932     }
2933
2934   demand_empty_rest_of_line ();
2935 }
2936
2937 enum bfd_architecture
2938 i386_arch (void)
2939 {
2940   if (cpu_arch_isa == PROCESSOR_IAMCU)
2941     {
2942       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2943           || flag_code == CODE_64BIT)
2944         as_fatal (_("Intel MCU is 32bit ELF only"));
2945       return bfd_arch_iamcu;
2946     }
2947   else
2948     return bfd_arch_i386;
2949 }
2950
2951 unsigned long
2952 i386_mach (void)
2953 {
2954   if (startswith (default_arch, "x86_64"))
2955     {
2956       if (default_arch[6] == '\0')
2957         return bfd_mach_x86_64;
2958       else
2959         return bfd_mach_x64_32;
2960     }
2961   else if (!strcmp (default_arch, "i386")
2962            || !strcmp (default_arch, "iamcu"))
2963     {
2964       if (cpu_arch_isa == PROCESSOR_IAMCU)
2965         {
2966           if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
2967             as_fatal (_("Intel MCU is 32bit ELF only"));
2968           return bfd_mach_i386_iamcu;
2969         }
2970       else
2971         return bfd_mach_i386_i386;
2972     }
2973   else
2974     as_fatal (_("unknown architecture"));
2975 }
2976 \f
2977 #include "opcodes/i386-tbl.h"
2978
2979 void
2980 md_begin (void)
2981 {
2982   /* Support pseudo prefixes like {disp32}.  */
2983   lex_type ['{'] = LEX_BEGIN_NAME;
2984
2985   /* Initialize op_hash hash table.  */
2986   op_hash = str_htab_create ();
2987
2988   {
2989     const insn_template *const *sets = i386_op_sets;
2990     const insn_template *const *end = sets + ARRAY_SIZE (i386_op_sets) - 1;
2991
2992     /* Type checks to compensate for the conversion through void * which
2993        occurs during hash table insertion / lookup.  */
2994     (void) sizeof (sets == &current_templates->start);
2995     (void) sizeof (end == &current_templates->end);
2996     for (; sets < end; ++sets)
2997       if (str_hash_insert (op_hash, insn_name (*sets), sets, 0))
2998         as_fatal (_("duplicate %s"), insn_name (*sets));
2999   }
3000
3001   /* Initialize reg_hash hash table.  */
3002   reg_hash = str_htab_create ();
3003   {
3004     const reg_entry *regtab;
3005     unsigned int regtab_size = i386_regtab_size;
3006
3007     for (regtab = i386_regtab; regtab_size--; regtab++)
3008       {
3009         switch (regtab->reg_type.bitfield.class)
3010           {
3011           case Reg:
3012             if (regtab->reg_type.bitfield.dword)
3013               {
3014                 if (regtab->reg_type.bitfield.instance == Accum)
3015                   reg_eax = regtab;
3016               }
3017             else if (regtab->reg_type.bitfield.tbyte)
3018               {
3019                 /* There's no point inserting st(<N>) in the hash table, as
3020                    parentheses aren't included in register_chars[] anyway.  */
3021                 if (regtab->reg_type.bitfield.instance != Accum)
3022                   continue;
3023                 reg_st0 = regtab;
3024               }
3025             break;
3026
3027           case SReg:
3028             switch (regtab->reg_num)
3029               {
3030               case 0: reg_es = regtab; break;
3031               case 2: reg_ss = regtab; break;
3032               case 3: reg_ds = regtab; break;
3033               }
3034             break;
3035
3036           case RegMask:
3037             if (!regtab->reg_num)
3038               reg_k0 = regtab;
3039             break;
3040           }
3041
3042         if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3043           as_fatal (_("duplicate %s"), regtab->reg_name);
3044       }
3045   }
3046
3047   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3048   {
3049     int c;
3050     char *p;
3051
3052     for (c = 0; c < 256; c++)
3053       {
3054         if (ISDIGIT (c) || ISLOWER (c))
3055           {
3056             mnemonic_chars[c] = c;
3057             register_chars[c] = c;
3058             operand_chars[c] = c;
3059           }
3060         else if (ISUPPER (c))
3061           {
3062             mnemonic_chars[c] = TOLOWER (c);
3063             register_chars[c] = mnemonic_chars[c];
3064             operand_chars[c] = c;
3065           }
3066         else if (c == '{' || c == '}')
3067           {
3068             mnemonic_chars[c] = c;
3069             operand_chars[c] = c;
3070           }
3071 #ifdef SVR4_COMMENT_CHARS
3072         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3073           operand_chars[c] = c;
3074 #endif
3075
3076         if (ISALPHA (c) || ISDIGIT (c))
3077           identifier_chars[c] = c;
3078         else if (c >= 128)
3079           {
3080             identifier_chars[c] = c;
3081             operand_chars[c] = c;
3082           }
3083       }
3084
3085 #ifdef LEX_AT
3086     identifier_chars['@'] = '@';
3087 #endif
3088 #ifdef LEX_QM
3089     identifier_chars['?'] = '?';
3090     operand_chars['?'] = '?';
3091 #endif
3092     mnemonic_chars['_'] = '_';
3093     mnemonic_chars['-'] = '-';
3094     mnemonic_chars['.'] = '.';
3095     identifier_chars['_'] = '_';
3096     identifier_chars['.'] = '.';
3097
3098     for (p = operand_special_chars; *p != '\0'; p++)
3099       operand_chars[(unsigned char) *p] = *p;
3100   }
3101
3102   if (flag_code == CODE_64BIT)
3103     {
3104 #if defined (OBJ_COFF) && defined (TE_PE)
3105       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3106                                   ? 32 : 16);
3107 #else
3108       x86_dwarf2_return_column = 16;
3109 #endif
3110       x86_cie_data_alignment = -8;
3111 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3112       x86_sframe_cfa_sp_reg = 7;
3113       x86_sframe_cfa_fp_reg = 6;
3114 #endif
3115     }
3116   else
3117     {
3118       x86_dwarf2_return_column = 8;
3119       x86_cie_data_alignment = -4;
3120     }
3121
3122   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3123      can be turned into BRANCH_PREFIX frag.  */
3124   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3125     abort ();
3126 }
3127
3128 void
3129 i386_print_statistics (FILE *file)
3130 {
3131   htab_print_statistics (file, "i386 opcode", op_hash);
3132   htab_print_statistics (file, "i386 register", reg_hash);
3133 }
3134
3135 void
3136 i386_md_end (void)
3137 {
3138   htab_delete (op_hash);
3139   htab_delete (reg_hash);
3140 }
3141 \f
3142 #ifdef DEBUG386
3143
3144 /* Debugging routines for md_assemble.  */
3145 static void pte (insn_template *);
3146 static void pt (i386_operand_type);
3147 static void pe (expressionS *);
3148 static void ps (symbolS *);
3149
3150 static void
3151 pi (const char *line, i386_insn *x)
3152 {
3153   unsigned int j;
3154
3155   fprintf (stdout, "%s: template ", line);
3156   pte (&x->tm);
3157   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3158            x->base_reg ? x->base_reg->reg_name : "none",
3159            x->index_reg ? x->index_reg->reg_name : "none",
3160            x->log2_scale_factor);
3161   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3162            x->rm.mode, x->rm.reg, x->rm.regmem);
3163   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3164            x->sib.base, x->sib.index, x->sib.scale);
3165   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3166            (x->rex & REX_W) != 0,
3167            (x->rex & REX_R) != 0,
3168            (x->rex & REX_X) != 0,
3169            (x->rex & REX_B) != 0);
3170   for (j = 0; j < x->operands; j++)
3171     {
3172       fprintf (stdout, "    #%d:  ", j + 1);
3173       pt (x->types[j]);
3174       fprintf (stdout, "\n");
3175       if (x->types[j].bitfield.class == Reg
3176           || x->types[j].bitfield.class == RegMMX
3177           || x->types[j].bitfield.class == RegSIMD
3178           || x->types[j].bitfield.class == RegMask
3179           || x->types[j].bitfield.class == SReg
3180           || x->types[j].bitfield.class == RegCR
3181           || x->types[j].bitfield.class == RegDR
3182           || x->types[j].bitfield.class == RegTR
3183           || x->types[j].bitfield.class == RegBND)
3184         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3185       if (operand_type_check (x->types[j], imm))
3186         pe (x->op[j].imms);
3187       if (operand_type_check (x->types[j], disp))
3188         pe (x->op[j].disps);
3189     }
3190 }
3191
3192 static void
3193 pte (insn_template *t)
3194 {
3195   static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
3196   static const char *const opc_spc[] = {
3197     NULL, "0f", "0f38", "0f3a", NULL, "evexmap5", "evexmap6", NULL,
3198     "XOP08", "XOP09", "XOP0A",
3199   };
3200   unsigned int j;
3201
3202   fprintf (stdout, " %d operands ", t->operands);
3203   if (opc_pfx[t->opcode_modifier.opcodeprefix])
3204     fprintf (stdout, "pfx %x ", opc_pfx[t->opcode_modifier.opcodeprefix]);
3205   if (opc_spc[t->opcode_modifier.opcodespace])
3206     fprintf (stdout, "space %s ", opc_spc[t->opcode_modifier.opcodespace]);
3207   fprintf (stdout, "opcode %x ", t->base_opcode);
3208   if (t->extension_opcode != None)
3209     fprintf (stdout, "ext %x ", t->extension_opcode);
3210   if (t->opcode_modifier.d)
3211     fprintf (stdout, "D");
3212   if (t->opcode_modifier.w)
3213     fprintf (stdout, "W");
3214   fprintf (stdout, "\n");
3215   for (j = 0; j < t->operands; j++)
3216     {
3217       fprintf (stdout, "    #%d type ", j + 1);
3218       pt (t->operand_types[j]);
3219       fprintf (stdout, "\n");
3220     }
3221 }
3222
3223 static void
3224 pe (expressionS *e)
3225 {
3226   fprintf (stdout, "    operation     %d\n", e->X_op);
3227   fprintf (stdout, "    add_number    %" PRId64 " (%" PRIx64 ")\n",
3228            (int64_t) e->X_add_number, (uint64_t) (valueT) e->X_add_number);
3229   if (e->X_add_symbol)
3230     {
3231       fprintf (stdout, "    add_symbol    ");
3232       ps (e->X_add_symbol);
3233       fprintf (stdout, "\n");
3234     }
3235   if (e->X_op_symbol)
3236     {
3237       fprintf (stdout, "    op_symbol    ");
3238       ps (e->X_op_symbol);
3239       fprintf (stdout, "\n");
3240     }
3241 }
3242
3243 static void
3244 ps (symbolS *s)
3245 {
3246   fprintf (stdout, "%s type %s%s",
3247            S_GET_NAME (s),
3248            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3249            segment_name (S_GET_SEGMENT (s)));
3250 }
3251
3252 static struct type_name
3253   {
3254     i386_operand_type mask;
3255     const char *name;
3256   }
3257 const type_names[] =
3258 {
3259   { { .bitfield = { .class = Reg, .byte = 1 } }, "r8" },
3260   { { .bitfield = { .class = Reg, .word = 1 } }, "r16" },
3261   { { .bitfield = { .class = Reg, .dword = 1 } }, "r32" },
3262   { { .bitfield = { .class = Reg, .qword = 1 } }, "r64" },
3263   { { .bitfield = { .instance = Accum, .byte = 1 } }, "acc8" },
3264   { { .bitfield = { .instance = Accum, .word = 1 } }, "acc16" },
3265   { { .bitfield = { .instance = Accum, .dword = 1 } }, "acc32" },
3266   { { .bitfield = { .instance = Accum, .qword = 1 } }, "acc64" },
3267   { { .bitfield = { .imm8 = 1 } }, "i8" },
3268   { { .bitfield = { .imm8s = 1 } }, "i8s" },
3269   { { .bitfield = { .imm16 = 1 } }, "i16" },
3270   { { .bitfield = { .imm32 = 1 } }, "i32" },
3271   { { .bitfield = { .imm32s = 1 } }, "i32s" },
3272   { { .bitfield = { .imm64 = 1 } }, "i64" },
3273   { { .bitfield = { .imm1 = 1 } }, "i1" },
3274   { { .bitfield = { .baseindex = 1 } }, "BaseIndex" },
3275   { { .bitfield = { .disp8 = 1 } }, "d8" },
3276   { { .bitfield = { .disp16 = 1 } }, "d16" },
3277   { { .bitfield = { .disp32 = 1 } }, "d32" },
3278   { { .bitfield = { .disp64 = 1 } }, "d64" },
3279   { { .bitfield = { .instance = RegD, .word = 1 } }, "InOutPortReg" },
3280   { { .bitfield = { .instance = RegC, .byte = 1 } }, "ShiftCount" },
3281   { { .bitfield = { .class = RegCR } }, "control reg" },
3282   { { .bitfield = { .class = RegTR } }, "test reg" },
3283   { { .bitfield = { .class = RegDR } }, "debug reg" },
3284   { { .bitfield = { .class = Reg, .tbyte = 1 } }, "FReg" },
3285   { { .bitfield = { .instance = Accum, .tbyte = 1 } }, "FAcc" },
3286   { { .bitfield = { .class = SReg } }, "SReg" },
3287   { { .bitfield = { .class = RegMMX } }, "rMMX" },
3288   { { .bitfield = { .class = RegSIMD, .xmmword = 1 } }, "rXMM" },
3289   { { .bitfield = { .class = RegSIMD, .ymmword = 1 } }, "rYMM" },
3290   { { .bitfield = { .class = RegSIMD, .zmmword = 1 } }, "rZMM" },
3291   { { .bitfield = { .class = RegSIMD, .tmmword = 1 } }, "rTMM" },
3292   { { .bitfield = { .class = RegMask } }, "Mask reg" },
3293 };
3294
3295 static void
3296 pt (i386_operand_type t)
3297 {
3298   unsigned int j;
3299   i386_operand_type a;
3300
3301   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3302     {
3303       a = operand_type_and (t, type_names[j].mask);
3304       if (operand_type_equal (&a, &type_names[j].mask))
3305         fprintf (stdout, "%s, ",  type_names[j].name);
3306     }
3307   fflush (stdout);
3308 }
3309
3310 #endif /* DEBUG386 */
3311 \f
3312 static bfd_reloc_code_real_type
3313 reloc (unsigned int size,
3314        int pcrel,
3315        int sign,
3316        bfd_reloc_code_real_type other)
3317 {
3318   if (other != NO_RELOC)
3319     {
3320       reloc_howto_type *rel;
3321
3322       if (size == 8)
3323         switch (other)
3324           {
3325           case BFD_RELOC_X86_64_GOT32:
3326             return BFD_RELOC_X86_64_GOT64;
3327             break;
3328           case BFD_RELOC_X86_64_GOTPLT64:
3329             return BFD_RELOC_X86_64_GOTPLT64;
3330             break;
3331           case BFD_RELOC_X86_64_PLTOFF64:
3332             return BFD_RELOC_X86_64_PLTOFF64;
3333             break;
3334           case BFD_RELOC_X86_64_GOTPC32:
3335             other = BFD_RELOC_X86_64_GOTPC64;
3336             break;
3337           case BFD_RELOC_X86_64_GOTPCREL:
3338             other = BFD_RELOC_X86_64_GOTPCREL64;
3339             break;
3340           case BFD_RELOC_X86_64_TPOFF32:
3341             other = BFD_RELOC_X86_64_TPOFF64;
3342             break;
3343           case BFD_RELOC_X86_64_DTPOFF32:
3344             other = BFD_RELOC_X86_64_DTPOFF64;
3345             break;
3346           default:
3347             break;
3348           }
3349
3350 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3351       if (other == BFD_RELOC_SIZE32)
3352         {
3353           if (size == 8)
3354             other = BFD_RELOC_SIZE64;
3355           if (pcrel)
3356             {
3357               as_bad (_("there are no pc-relative size relocations"));
3358               return NO_RELOC;
3359             }
3360         }
3361 #endif
3362
3363       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3364       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3365         sign = -1;
3366
3367       rel = bfd_reloc_type_lookup (stdoutput, other);
3368       if (!rel)
3369         as_bad (_("unknown relocation (%u)"), other);
3370       else if (size != bfd_get_reloc_size (rel))
3371         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3372                 bfd_get_reloc_size (rel),
3373                 size);
3374       else if (pcrel && !rel->pc_relative)
3375         as_bad (_("non-pc-relative relocation for pc-relative field"));
3376       else if ((rel->complain_on_overflow == complain_overflow_signed
3377                 && !sign)
3378                || (rel->complain_on_overflow == complain_overflow_unsigned
3379                    && sign > 0))
3380         as_bad (_("relocated field and relocation type differ in signedness"));
3381       else
3382         return other;
3383       return NO_RELOC;
3384     }
3385
3386   if (pcrel)
3387     {
3388       if (!sign)
3389         as_bad (_("there are no unsigned pc-relative relocations"));
3390       switch (size)
3391         {
3392         case 1: return BFD_RELOC_8_PCREL;
3393         case 2: return BFD_RELOC_16_PCREL;
3394         case 4: return BFD_RELOC_32_PCREL;
3395         case 8: return BFD_RELOC_64_PCREL;
3396         }
3397       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3398     }
3399   else
3400     {
3401       if (sign > 0)
3402         switch (size)
3403           {
3404           case 4: return BFD_RELOC_X86_64_32S;
3405           }
3406       else
3407         switch (size)
3408           {
3409           case 1: return BFD_RELOC_8;
3410           case 2: return BFD_RELOC_16;
3411           case 4: return BFD_RELOC_32;
3412           case 8: return BFD_RELOC_64;
3413           }
3414       as_bad (_("cannot do %s %u byte relocation"),
3415               sign > 0 ? "signed" : "unsigned", size);
3416     }
3417
3418   return NO_RELOC;
3419 }
3420
3421 /* Here we decide which fixups can be adjusted to make them relative to
3422    the beginning of the section instead of the symbol.  Basically we need
3423    to make sure that the dynamic relocations are done correctly, so in
3424    some cases we force the original symbol to be used.  */
3425
3426 int
3427 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3428 {
3429 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3430   if (!IS_ELF)
3431     return 1;
3432
3433   /* Don't adjust pc-relative references to merge sections in 64-bit
3434      mode.  */
3435   if (use_rela_relocations
3436       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3437       && fixP->fx_pcrel)
3438     return 0;
3439
3440   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3441      and changed later by validate_fix.  */
3442   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3443       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3444     return 0;
3445
3446   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3447      for size relocations.  */
3448   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3449       || fixP->fx_r_type == BFD_RELOC_SIZE64
3450       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3451       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3452       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3453       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3454       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3455       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3456       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3457       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3458       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3459       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3460       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3461       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3462       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3463       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3464       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3465       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3466       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3467       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3468       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3469       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3470       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3471       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3472       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3473       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3474       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3475       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3476       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3477       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3478       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3479     return 0;
3480 #endif
3481   return 1;
3482 }
3483
3484 static INLINE bool
3485 want_disp32 (const insn_template *t)
3486 {
3487   return flag_code != CODE_64BIT
3488          || i.prefix[ADDR_PREFIX]
3489          || (t->base_opcode == 0x8d
3490              && t->opcode_modifier.opcodespace == SPACE_BASE
3491              && (!i.types[1].bitfield.qword
3492                 || t->opcode_modifier.size == SIZE32));
3493 }
3494
3495 static int
3496 intel_float_operand (const char *mnemonic)
3497 {
3498   /* Note that the value returned is meaningful only for opcodes with (memory)
3499      operands, hence the code here is free to improperly handle opcodes that
3500      have no operands (for better performance and smaller code). */
3501
3502   if (mnemonic[0] != 'f')
3503     return 0; /* non-math */
3504
3505   switch (mnemonic[1])
3506     {
3507     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3508        the fs segment override prefix not currently handled because no
3509        call path can make opcodes without operands get here */
3510     case 'i':
3511       return 2 /* integer op */;
3512     case 'l':
3513       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3514         return 3; /* fldcw/fldenv */
3515       break;
3516     case 'n':
3517       if (mnemonic[2] != 'o' /* fnop */)
3518         return 3; /* non-waiting control op */
3519       break;
3520     case 'r':
3521       if (mnemonic[2] == 's')
3522         return 3; /* frstor/frstpm */
3523       break;
3524     case 's':
3525       if (mnemonic[2] == 'a')
3526         return 3; /* fsave */
3527       if (mnemonic[2] == 't')
3528         {
3529           switch (mnemonic[3])
3530             {
3531             case 'c': /* fstcw */
3532             case 'd': /* fstdw */
3533             case 'e': /* fstenv */
3534             case 's': /* fsts[gw] */
3535               return 3;
3536             }
3537         }
3538       break;
3539     case 'x':
3540       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3541         return 0; /* fxsave/fxrstor are not really math ops */
3542       break;
3543     }
3544
3545   return 1;
3546 }
3547
3548 static INLINE void
3549 install_template (const insn_template *t)
3550 {
3551   unsigned int l;
3552
3553   i.tm = *t;
3554
3555   /* Note that for pseudo prefixes this produces a length of 1. But for them
3556      the length isn't interesting at all.  */
3557   for (l = 1; l < 4; ++l)
3558     if (!(t->base_opcode >> (8 * l)))
3559       break;
3560
3561   i.opcode_length = l;
3562 }
3563
3564 /* Build the VEX prefix.  */
3565
3566 static void
3567 build_vex_prefix (const insn_template *t)
3568 {
3569   unsigned int register_specifier;
3570   unsigned int vector_length;
3571   unsigned int w;
3572
3573   /* Check register specifier.  */
3574   if (i.vex.register_specifier)
3575     {
3576       register_specifier =
3577         ~register_number (i.vex.register_specifier) & 0xf;
3578       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3579     }
3580   else
3581     register_specifier = 0xf;
3582
3583   /* Use 2-byte VEX prefix by swapping destination and source operand
3584      if there are more than 1 register operand.  */
3585   if (i.reg_operands > 1
3586       && i.vec_encoding != vex_encoding_vex3
3587       && i.dir_encoding == dir_encoding_default
3588       && i.operands == i.reg_operands
3589       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3590       && i.tm.opcode_modifier.opcodespace == SPACE_0F
3591       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3592       && i.rex == REX_B)
3593     {
3594       unsigned int xchg = i.operands - 1;
3595       union i386_op temp_op;
3596       i386_operand_type temp_type;
3597
3598       temp_type = i.types[xchg];
3599       i.types[xchg] = i.types[0];
3600       i.types[0] = temp_type;
3601       temp_op = i.op[xchg];
3602       i.op[xchg] = i.op[0];
3603       i.op[0] = temp_op;
3604
3605       gas_assert (i.rm.mode == 3);
3606
3607       i.rex = REX_R;
3608       xchg = i.rm.regmem;
3609       i.rm.regmem = i.rm.reg;
3610       i.rm.reg = xchg;
3611
3612       if (i.tm.opcode_modifier.d)
3613         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3614                             ? Opcode_ExtD : Opcode_SIMD_IntD;
3615       else /* Use the next insn.  */
3616         install_template (&t[1]);
3617     }
3618
3619   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3620      are no memory operands and at least 3 register ones.  */
3621   if (i.reg_operands >= 3
3622       && i.vec_encoding != vex_encoding_vex3
3623       && i.reg_operands == i.operands - i.imm_operands
3624       && i.tm.opcode_modifier.vex
3625       && i.tm.opcode_modifier.commutative
3626       && (i.tm.opcode_modifier.sse2avx || optimize > 1)
3627       && i.rex == REX_B
3628       && i.vex.register_specifier
3629       && !(i.vex.register_specifier->reg_flags & RegRex))
3630     {
3631       unsigned int xchg = i.operands - i.reg_operands;
3632       union i386_op temp_op;
3633       i386_operand_type temp_type;
3634
3635       gas_assert (i.tm.opcode_modifier.opcodespace == SPACE_0F);
3636       gas_assert (!i.tm.opcode_modifier.sae);
3637       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3638                                       &i.types[i.operands - 3]));
3639       gas_assert (i.rm.mode == 3);
3640
3641       temp_type = i.types[xchg];
3642       i.types[xchg] = i.types[xchg + 1];
3643       i.types[xchg + 1] = temp_type;
3644       temp_op = i.op[xchg];
3645       i.op[xchg] = i.op[xchg + 1];
3646       i.op[xchg + 1] = temp_op;
3647
3648       i.rex = 0;
3649       xchg = i.rm.regmem | 8;
3650       i.rm.regmem = ~register_specifier & 0xf;
3651       gas_assert (!(i.rm.regmem & 8));
3652       i.vex.register_specifier += xchg - i.rm.regmem;
3653       register_specifier = ~xchg & 0xf;
3654     }
3655
3656   if (i.tm.opcode_modifier.vex == VEXScalar)
3657     vector_length = avxscalar;
3658   else if (i.tm.opcode_modifier.vex == VEX256)
3659     vector_length = 1;
3660   else
3661     {
3662       unsigned int op;
3663
3664       /* Determine vector length from the last multi-length vector
3665          operand.  */
3666       vector_length = 0;
3667       for (op = t->operands; op--;)
3668         if (t->operand_types[op].bitfield.xmmword
3669             && t->operand_types[op].bitfield.ymmword
3670             && i.types[op].bitfield.ymmword)
3671           {
3672             vector_length = 1;
3673             break;
3674           }
3675     }
3676
3677   /* Check the REX.W bit and VEXW.  */
3678   if (i.tm.opcode_modifier.vexw == VEXWIG)
3679     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3680   else if (i.tm.opcode_modifier.vexw)
3681     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3682   else
3683     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3684
3685   /* Use 2-byte VEX prefix if possible.  */
3686   if (w == 0
3687       && i.vec_encoding != vex_encoding_vex3
3688       && i.tm.opcode_modifier.opcodespace == SPACE_0F
3689       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3690     {
3691       /* 2-byte VEX prefix.  */
3692       unsigned int r;
3693
3694       i.vex.length = 2;
3695       i.vex.bytes[0] = 0xc5;
3696
3697       /* Check the REX.R bit.  */
3698       r = (i.rex & REX_R) ? 0 : 1;
3699       i.vex.bytes[1] = (r << 7
3700                         | register_specifier << 3
3701                         | vector_length << 2
3702                         | i.tm.opcode_modifier.opcodeprefix);
3703     }
3704   else
3705     {
3706       /* 3-byte VEX prefix.  */
3707       i.vex.length = 3;
3708
3709       switch (i.tm.opcode_modifier.opcodespace)
3710         {
3711         case SPACE_0F:
3712         case SPACE_0F38:
3713         case SPACE_0F3A:
3714           i.vex.bytes[0] = 0xc4;
3715           break;
3716         case SPACE_XOP08:
3717         case SPACE_XOP09:
3718         case SPACE_XOP0A:
3719           i.vex.bytes[0] = 0x8f;
3720           break;
3721         default:
3722           abort ();
3723         }
3724
3725       /* The high 3 bits of the second VEX byte are 1's compliment
3726          of RXB bits from REX.  */
3727       i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace;
3728
3729       i.vex.bytes[2] = (w << 7
3730                         | register_specifier << 3
3731                         | vector_length << 2
3732                         | i.tm.opcode_modifier.opcodeprefix);
3733     }
3734 }
3735
3736 static INLINE bool
3737 is_evex_encoding (const insn_template *t)
3738 {
3739   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3740          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3741          || t->opcode_modifier.sae;
3742 }
3743
3744 static INLINE bool
3745 is_any_vex_encoding (const insn_template *t)
3746 {
3747   return t->opcode_modifier.vex || is_evex_encoding (t);
3748 }
3749
3750 static unsigned int
3751 get_broadcast_bytes (const insn_template *t, bool diag)
3752 {
3753   unsigned int op, bytes;
3754   const i386_operand_type *types;
3755
3756   if (i.broadcast.type)
3757     return i.broadcast.bytes = ((1 << (t->opcode_modifier.broadcast - 1))
3758                                 * i.broadcast.type);
3759
3760   gas_assert (intel_syntax);
3761
3762   for (op = 0; op < t->operands; ++op)
3763     if (t->operand_types[op].bitfield.baseindex)
3764       break;
3765
3766   gas_assert (op < t->operands);
3767
3768   if (t->opcode_modifier.evex
3769       && t->opcode_modifier.evex != EVEXDYN)
3770     switch (i.broadcast.bytes)
3771       {
3772       case 1:
3773         if (t->operand_types[op].bitfield.word)
3774           return 2;
3775       /* Fall through.  */
3776       case 2:
3777         if (t->operand_types[op].bitfield.dword)
3778           return 4;
3779       /* Fall through.  */
3780       case 4:
3781         if (t->operand_types[op].bitfield.qword)
3782           return 8;
3783       /* Fall through.  */
3784       case 8:
3785         if (t->operand_types[op].bitfield.xmmword)
3786           return 16;
3787         if (t->operand_types[op].bitfield.ymmword)
3788           return 32;
3789         if (t->operand_types[op].bitfield.zmmword)
3790           return 64;
3791       /* Fall through.  */
3792       default:
3793         abort ();
3794       }
3795
3796   gas_assert (op + 1 < t->operands);
3797
3798   if (t->operand_types[op + 1].bitfield.xmmword
3799       + t->operand_types[op + 1].bitfield.ymmword
3800       + t->operand_types[op + 1].bitfield.zmmword > 1)
3801     {
3802       types = &i.types[op + 1];
3803       diag = false;
3804     }
3805   else /* Ambiguous - guess with a preference to non-AVX512VL forms.  */
3806     types = &t->operand_types[op];
3807
3808   if (types->bitfield.zmmword)
3809     bytes = 64;
3810   else if (types->bitfield.ymmword)
3811     bytes = 32;
3812   else
3813     bytes = 16;
3814
3815   if (diag)
3816     as_warn (_("ambiguous broadcast for `%s', using %u-bit form"),
3817              insn_name (t), bytes * 8);
3818
3819   return bytes;
3820 }
3821
3822 /* Build the EVEX prefix.  */
3823
3824 static void
3825 build_evex_prefix (void)
3826 {
3827   unsigned int register_specifier, w;
3828   rex_byte vrex_used = 0;
3829
3830   /* Check register specifier.  */
3831   if (i.vex.register_specifier)
3832     {
3833       gas_assert ((i.vrex & REX_X) == 0);
3834
3835       register_specifier = i.vex.register_specifier->reg_num;
3836       if ((i.vex.register_specifier->reg_flags & RegRex))
3837         register_specifier += 8;
3838       /* The upper 16 registers are encoded in the fourth byte of the
3839          EVEX prefix.  */
3840       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3841         i.vex.bytes[3] = 0x8;
3842       register_specifier = ~register_specifier & 0xf;
3843     }
3844   else
3845     {
3846       register_specifier = 0xf;
3847
3848       /* Encode upper 16 vector index register in the fourth byte of
3849          the EVEX prefix.  */
3850       if (!(i.vrex & REX_X))
3851         i.vex.bytes[3] = 0x8;
3852       else
3853         vrex_used |= REX_X;
3854     }
3855
3856   /* 4 byte EVEX prefix.  */
3857   i.vex.length = 4;
3858   i.vex.bytes[0] = 0x62;
3859
3860   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3861      bits from REX.  */
3862   gas_assert (i.tm.opcode_modifier.opcodespace >= SPACE_0F);
3863   gas_assert (i.tm.opcode_modifier.opcodespace <= SPACE_EVEXMAP6);
3864   i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace;
3865
3866   /* The fifth bit of the second EVEX byte is 1's compliment of the
3867      REX_R bit in VREX.  */
3868   if (!(i.vrex & REX_R))
3869     i.vex.bytes[1] |= 0x10;
3870   else
3871     vrex_used |= REX_R;
3872
3873   if ((i.reg_operands + i.imm_operands) == i.operands)
3874     {
3875       /* When all operands are registers, the REX_X bit in REX is not
3876          used.  We reuse it to encode the upper 16 registers, which is
3877          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3878          as 1's compliment.  */
3879       if ((i.vrex & REX_B))
3880         {
3881           vrex_used |= REX_B;
3882           i.vex.bytes[1] &= ~0x40;
3883         }
3884     }
3885
3886   /* EVEX instructions shouldn't need the REX prefix.  */
3887   i.vrex &= ~vrex_used;
3888   gas_assert (i.vrex == 0);
3889
3890   /* Check the REX.W bit and VEXW.  */
3891   if (i.tm.opcode_modifier.vexw == VEXWIG)
3892     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
3893   else if (i.tm.opcode_modifier.vexw)
3894     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3895   else
3896     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
3897
3898   /* The third byte of the EVEX prefix.  */
3899   i.vex.bytes[2] = ((w << 7)
3900                     | (register_specifier << 3)
3901                     | 4 /* Encode the U bit.  */
3902                     | i.tm.opcode_modifier.opcodeprefix);
3903
3904   /* The fourth byte of the EVEX prefix.  */
3905   /* The zeroing-masking bit.  */
3906   if (i.mask.reg && i.mask.zeroing)
3907     i.vex.bytes[3] |= 0x80;
3908
3909   /* Don't always set the broadcast bit if there is no RC.  */
3910   if (i.rounding.type == rc_none)
3911     {
3912       /* Encode the vector length.  */
3913       unsigned int vec_length;
3914
3915       if (!i.tm.opcode_modifier.evex
3916           || i.tm.opcode_modifier.evex == EVEXDYN)
3917         {
3918           unsigned int op;
3919
3920           /* Determine vector length from the last multi-length vector
3921              operand.  */
3922           for (op = i.operands; op--;)
3923             if (i.tm.operand_types[op].bitfield.xmmword
3924                 + i.tm.operand_types[op].bitfield.ymmword
3925                 + i.tm.operand_types[op].bitfield.zmmword > 1)
3926               {
3927                 if (i.types[op].bitfield.zmmword)
3928                   {
3929                     i.tm.opcode_modifier.evex = EVEX512;
3930                     break;
3931                   }
3932                 else if (i.types[op].bitfield.ymmword)
3933                   {
3934                     i.tm.opcode_modifier.evex = EVEX256;
3935                     break;
3936                   }
3937                 else if (i.types[op].bitfield.xmmword)
3938                   {
3939                     i.tm.opcode_modifier.evex = EVEX128;
3940                     break;
3941                   }
3942                 else if (i.broadcast.bytes && op == i.broadcast.operand)
3943                   {
3944                     switch (get_broadcast_bytes (&i.tm, true))
3945                       {
3946                         case 64:
3947                           i.tm.opcode_modifier.evex = EVEX512;
3948                           break;
3949                         case 32:
3950                           i.tm.opcode_modifier.evex = EVEX256;
3951                           break;
3952                         case 16:
3953                           i.tm.opcode_modifier.evex = EVEX128;
3954                           break;
3955                         default:
3956                           abort ();
3957                       }
3958                     break;
3959                   }
3960               }
3961
3962           if (op >= MAX_OPERANDS)
3963             abort ();
3964         }
3965
3966       switch (i.tm.opcode_modifier.evex)
3967         {
3968         case EVEXLIG: /* LL' is ignored */
3969           vec_length = evexlig << 5;
3970           break;
3971         case EVEX128:
3972           vec_length = 0 << 5;
3973           break;
3974         case EVEX256:
3975           vec_length = 1 << 5;
3976           break;
3977         case EVEX512:
3978           vec_length = 2 << 5;
3979           break;
3980         default:
3981           abort ();
3982           break;
3983         }
3984       i.vex.bytes[3] |= vec_length;
3985       /* Encode the broadcast bit.  */
3986       if (i.broadcast.bytes)
3987         i.vex.bytes[3] |= 0x10;
3988     }
3989   else if (i.rounding.type != saeonly)
3990     i.vex.bytes[3] |= 0x10 | (i.rounding.type << 5);
3991   else
3992     i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
3993
3994   if (i.mask.reg)
3995     i.vex.bytes[3] |= i.mask.reg->reg_num;
3996 }
3997
3998 static void
3999 process_immext (void)
4000 {
4001   expressionS *exp;
4002
4003   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
4004      which is coded in the same place as an 8-bit immediate field
4005      would be.  Here we fake an 8-bit immediate operand from the
4006      opcode suffix stored in tm.extension_opcode.
4007
4008      AVX instructions also use this encoding, for some of
4009      3 argument instructions.  */
4010
4011   gas_assert (i.imm_operands <= 1
4012               && (i.operands <= 2
4013                   || (is_any_vex_encoding (&i.tm)
4014                       && i.operands <= 4)));
4015
4016   exp = &im_expressions[i.imm_operands++];
4017   i.op[i.operands].imms = exp;
4018   i.types[i.operands].bitfield.imm8 = 1;
4019   i.operands++;
4020   exp->X_op = O_constant;
4021   exp->X_add_number = i.tm.extension_opcode;
4022   i.tm.extension_opcode = None;
4023 }
4024
4025
4026 static int
4027 check_hle (void)
4028 {
4029   switch (i.tm.opcode_modifier.prefixok)
4030     {
4031     default:
4032       abort ();
4033     case PrefixLock:
4034     case PrefixNone:
4035     case PrefixNoTrack:
4036     case PrefixRep:
4037       as_bad (_("invalid instruction `%s' after `%s'"),
4038               insn_name (&i.tm), i.hle_prefix);
4039       return 0;
4040     case PrefixHLELock:
4041       if (i.prefix[LOCK_PREFIX])
4042         return 1;
4043       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4044       return 0;
4045     case PrefixHLEAny:
4046       return 1;
4047     case PrefixHLERelease:
4048       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4049         {
4050           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4051                   insn_name (&i.tm));
4052           return 0;
4053         }
4054       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4055         {
4056           as_bad (_("memory destination needed for instruction `%s'"
4057                     " after `xrelease'"), insn_name (&i.tm));
4058           return 0;
4059         }
4060       return 1;
4061     }
4062 }
4063
4064 /* Encode aligned vector move as unaligned vector move.  */
4065
4066 static void
4067 encode_with_unaligned_vector_move (void)
4068 {
4069   switch (i.tm.base_opcode)
4070     {
4071     case 0x28:  /* Load instructions.  */
4072     case 0x29:  /* Store instructions.  */
4073       /* movaps/movapd/vmovaps/vmovapd.  */
4074       if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4075           && i.tm.opcode_modifier.opcodeprefix <= PREFIX_0X66)
4076         i.tm.base_opcode = 0x10 | (i.tm.base_opcode & 1);
4077       break;
4078     case 0x6f:  /* Load instructions.  */
4079     case 0x7f:  /* Store instructions.  */
4080       /* movdqa/vmovdqa/vmovdqa64/vmovdqa32. */
4081       if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4082           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66)
4083         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4084       break;
4085     default:
4086       break;
4087     }
4088 }
4089
4090 /* Try the shortest encoding by shortening operand size.  */
4091
4092 static void
4093 optimize_encoding (void)
4094 {
4095   unsigned int j;
4096
4097   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
4098       && i.tm.base_opcode == 0x8d)
4099     {
4100       /* Optimize: -O:
4101            lea symbol, %rN    -> mov $symbol, %rN
4102            lea (%rM), %rN     -> mov %rM, %rN
4103            lea (,%rM,1), %rN  -> mov %rM, %rN
4104
4105            and in 32-bit mode for 16-bit addressing
4106
4107            lea (%rM), %rN     -> movzx %rM, %rN
4108
4109            and in 64-bit mode zap 32-bit addressing in favor of using a
4110            32-bit (or less) destination.
4111        */
4112       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4113         {
4114           if (!i.op[1].regs->reg_type.bitfield.word)
4115             i.tm.opcode_modifier.size = SIZE32;
4116           i.prefix[ADDR_PREFIX] = 0;
4117         }
4118
4119       if (!i.index_reg && !i.base_reg)
4120         {
4121           /* Handle:
4122                lea symbol, %rN    -> mov $symbol, %rN
4123            */
4124           if (flag_code == CODE_64BIT)
4125             {
4126               /* Don't transform a relocation to a 16-bit one.  */
4127               if (i.op[0].disps
4128                   && i.op[0].disps->X_op != O_constant
4129                   && i.op[1].regs->reg_type.bitfield.word)
4130                 return;
4131
4132               if (!i.op[1].regs->reg_type.bitfield.qword
4133                   || i.tm.opcode_modifier.size == SIZE32)
4134                 {
4135                   i.tm.base_opcode = 0xb8;
4136                   i.tm.opcode_modifier.modrm = 0;
4137                   if (!i.op[1].regs->reg_type.bitfield.word)
4138                     i.types[0].bitfield.imm32 = 1;
4139                   else
4140                     {
4141                       i.tm.opcode_modifier.size = SIZE16;
4142                       i.types[0].bitfield.imm16 = 1;
4143                     }
4144                 }
4145               else
4146                 {
4147                   /* Subject to further optimization below.  */
4148                   i.tm.base_opcode = 0xc7;
4149                   i.tm.extension_opcode = 0;
4150                   i.types[0].bitfield.imm32s = 1;
4151                   i.types[0].bitfield.baseindex = 0;
4152                 }
4153             }
4154           /* Outside of 64-bit mode address and operand sizes have to match if
4155              a relocation is involved, as otherwise we wouldn't (currently) or
4156              even couldn't express the relocation correctly.  */
4157           else if (i.op[0].disps
4158                    && i.op[0].disps->X_op != O_constant
4159                    && ((!i.prefix[ADDR_PREFIX])
4160                        != (flag_code == CODE_32BIT
4161                            ? i.op[1].regs->reg_type.bitfield.dword
4162                            : i.op[1].regs->reg_type.bitfield.word)))
4163             return;
4164           /* In 16-bit mode converting LEA with 16-bit addressing and a 32-bit
4165              destination is going to grow encoding size.  */
4166           else if (flag_code == CODE_16BIT
4167                    && (optimize <= 1 || optimize_for_space)
4168                    && !i.prefix[ADDR_PREFIX]
4169                    && i.op[1].regs->reg_type.bitfield.dword)
4170             return;
4171           else
4172             {
4173               i.tm.base_opcode = 0xb8;
4174               i.tm.opcode_modifier.modrm = 0;
4175               if (i.op[1].regs->reg_type.bitfield.dword)
4176                 i.types[0].bitfield.imm32 = 1;
4177               else
4178                 i.types[0].bitfield.imm16 = 1;
4179
4180               if (i.op[0].disps
4181                   && i.op[0].disps->X_op == O_constant
4182                   && i.op[1].regs->reg_type.bitfield.dword
4183                   /* NB: Add () to !i.prefix[ADDR_PREFIX] to silence
4184                      GCC 5. */
4185                   && (!i.prefix[ADDR_PREFIX]) != (flag_code == CODE_32BIT))
4186                 i.op[0].disps->X_add_number &= 0xffff;
4187             }
4188
4189           i.tm.operand_types[0] = i.types[0];
4190           i.imm_operands = 1;
4191           if (!i.op[0].imms)
4192             {
4193               i.op[0].imms = &im_expressions[0];
4194               i.op[0].imms->X_op = O_absent;
4195             }
4196         }
4197       else if (i.op[0].disps
4198                   && (i.op[0].disps->X_op != O_constant
4199                       || i.op[0].disps->X_add_number))
4200         return;
4201       else
4202         {
4203           /* Handle:
4204                lea (%rM), %rN     -> mov %rM, %rN
4205                lea (,%rM,1), %rN  -> mov %rM, %rN
4206                lea (%rM), %rN     -> movzx %rM, %rN
4207            */
4208           const reg_entry *addr_reg;
4209
4210           if (!i.index_reg && i.base_reg->reg_num != RegIP)
4211             addr_reg = i.base_reg;
4212           else if (!i.base_reg
4213                    && i.index_reg->reg_num != RegIZ
4214                    && !i.log2_scale_factor)
4215             addr_reg = i.index_reg;
4216           else
4217             return;
4218
4219           if (addr_reg->reg_type.bitfield.word
4220               && i.op[1].regs->reg_type.bitfield.dword)
4221             {
4222               if (flag_code != CODE_32BIT)
4223                 return;
4224               i.tm.opcode_modifier.opcodespace = SPACE_0F;
4225               i.tm.base_opcode = 0xb7;
4226             }
4227           else
4228             i.tm.base_opcode = 0x8b;
4229
4230           if (addr_reg->reg_type.bitfield.dword
4231               && i.op[1].regs->reg_type.bitfield.qword)
4232             i.tm.opcode_modifier.size = SIZE32;
4233
4234           i.op[0].regs = addr_reg;
4235           i.reg_operands = 2;
4236         }
4237
4238       i.mem_operands = 0;
4239       i.disp_operands = 0;
4240       i.prefix[ADDR_PREFIX] = 0;
4241       i.prefix[SEG_PREFIX] = 0;
4242       i.seg[0] = NULL;
4243     }
4244
4245   if (optimize_for_space
4246       && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4247       && i.reg_operands == 1
4248       && i.imm_operands == 1
4249       && !i.types[1].bitfield.byte
4250       && i.op[0].imms->X_op == O_constant
4251       && fits_in_imm7 (i.op[0].imms->X_add_number)
4252       && (i.tm.base_opcode == 0xa8
4253           || (i.tm.base_opcode == 0xf6
4254               && i.tm.extension_opcode == 0x0)))
4255     {
4256       /* Optimize: -Os:
4257            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4258        */
4259       unsigned int base_regnum = i.op[1].regs->reg_num;
4260       if (flag_code == CODE_64BIT || base_regnum < 4)
4261         {
4262           i.types[1].bitfield.byte = 1;
4263           /* Ignore the suffix.  */
4264           i.suffix = 0;
4265           /* Convert to byte registers.  */
4266           if (i.types[1].bitfield.word)
4267             j = 16;
4268           else if (i.types[1].bitfield.dword)
4269             j = 32;
4270           else
4271             j = 48;
4272           if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4273             j += 8;
4274           i.op[1].regs -= j;
4275         }
4276     }
4277   else if (flag_code == CODE_64BIT
4278            && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4279            && ((i.types[1].bitfield.qword
4280                 && i.reg_operands == 1
4281                 && i.imm_operands == 1
4282                 && i.op[0].imms->X_op == O_constant
4283                 && ((i.tm.base_opcode == 0xb8
4284                      && i.tm.extension_opcode == None
4285                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4286                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4287                         && ((i.tm.base_opcode == 0x24
4288                              || i.tm.base_opcode == 0xa8)
4289                             || (i.tm.base_opcode == 0x80
4290                                 && i.tm.extension_opcode == 0x4)
4291                             || ((i.tm.base_opcode == 0xf6
4292                                  || (i.tm.base_opcode | 1) == 0xc7)
4293                                 && i.tm.extension_opcode == 0x0)))
4294                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4295                         && i.tm.base_opcode == 0x83
4296                         && i.tm.extension_opcode == 0x4)))
4297                || (i.types[0].bitfield.qword
4298                    && ((i.reg_operands == 2
4299                         && i.op[0].regs == i.op[1].regs
4300                         && (i.tm.base_opcode == 0x30
4301                             || i.tm.base_opcode == 0x28))
4302                        || (i.reg_operands == 1
4303                            && i.operands == 1
4304                            && i.tm.base_opcode == 0x30)))))
4305     {
4306       /* Optimize: -O:
4307            andq $imm31, %r64   -> andl $imm31, %r32
4308            andq $imm7, %r64    -> andl $imm7, %r32
4309            testq $imm31, %r64  -> testl $imm31, %r32
4310            xorq %r64, %r64     -> xorl %r32, %r32
4311            subq %r64, %r64     -> subl %r32, %r32
4312            movq $imm31, %r64   -> movl $imm31, %r32
4313            movq $imm32, %r64   -> movl $imm32, %r32
4314         */
4315       i.tm.opcode_modifier.size = SIZE32;
4316       if (i.imm_operands)
4317         {
4318           i.types[0].bitfield.imm32 = 1;
4319           i.types[0].bitfield.imm32s = 0;
4320           i.types[0].bitfield.imm64 = 0;
4321         }
4322       else
4323         {
4324           i.types[0].bitfield.dword = 1;
4325           i.types[0].bitfield.qword = 0;
4326         }
4327       i.types[1].bitfield.dword = 1;
4328       i.types[1].bitfield.qword = 0;
4329       if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)
4330         {
4331           /* Handle
4332                movq $imm31, %r64   -> movl $imm31, %r32
4333                movq $imm32, %r64   -> movl $imm32, %r32
4334            */
4335           i.tm.operand_types[0].bitfield.imm32 = 1;
4336           i.tm.operand_types[0].bitfield.imm32s = 0;
4337           i.tm.operand_types[0].bitfield.imm64 = 0;
4338           if ((i.tm.base_opcode | 1) == 0xc7)
4339             {
4340               /* Handle
4341                    movq $imm31, %r64   -> movl $imm31, %r32
4342                */
4343               i.tm.base_opcode = 0xb8;
4344               i.tm.extension_opcode = None;
4345               i.tm.opcode_modifier.w = 0;
4346               i.tm.opcode_modifier.modrm = 0;
4347             }
4348         }
4349     }
4350   else if (optimize > 1
4351            && !optimize_for_space
4352            && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4353            && i.reg_operands == 2
4354            && i.op[0].regs == i.op[1].regs
4355            && ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8
4356                || (i.tm.base_opcode & ~(Opcode_D | 1)) == 0x20)
4357            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4358     {
4359       /* Optimize: -O2:
4360            andb %rN, %rN  -> testb %rN, %rN
4361            andw %rN, %rN  -> testw %rN, %rN
4362            andq %rN, %rN  -> testq %rN, %rN
4363            orb %rN, %rN   -> testb %rN, %rN
4364            orw %rN, %rN   -> testw %rN, %rN
4365            orq %rN, %rN   -> testq %rN, %rN
4366
4367            and outside of 64-bit mode
4368
4369            andl %rN, %rN  -> testl %rN, %rN
4370            orl %rN, %rN   -> testl %rN, %rN
4371        */
4372       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4373     }
4374   else if (i.reg_operands == 3
4375            && i.op[0].regs == i.op[1].regs
4376            && !i.types[2].bitfield.xmmword
4377            && (i.tm.opcode_modifier.vex
4378                || ((!i.mask.reg || i.mask.zeroing)
4379                    && is_evex_encoding (&i.tm)
4380                    && (i.vec_encoding != vex_encoding_evex
4381                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4382                        || i.tm.cpu_flags.bitfield.cpuavx512vl
4383                        || (i.tm.operand_types[2].bitfield.zmmword
4384                            && i.types[2].bitfield.ymmword))))
4385            && i.tm.opcode_modifier.opcodespace == SPACE_0F
4386            && ((i.tm.base_opcode | 2) == 0x57
4387                || i.tm.base_opcode == 0xdf
4388                || i.tm.base_opcode == 0xef
4389                || (i.tm.base_opcode | 3) == 0xfb
4390                || i.tm.base_opcode == 0x42
4391                || i.tm.base_opcode == 0x47))
4392     {
4393       /* Optimize: -O1:
4394            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4395            vpsubq and vpsubw:
4396              EVEX VOP %zmmM, %zmmM, %zmmN
4397                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4398                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4399              EVEX VOP %ymmM, %ymmM, %ymmN
4400                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4401                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4402              VEX VOP %ymmM, %ymmM, %ymmN
4403                -> VEX VOP %xmmM, %xmmM, %xmmN
4404            VOP, one of vpandn and vpxor:
4405              VEX VOP %ymmM, %ymmM, %ymmN
4406                -> VEX VOP %xmmM, %xmmM, %xmmN
4407            VOP, one of vpandnd and vpandnq:
4408              EVEX VOP %zmmM, %zmmM, %zmmN
4409                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4410                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4411              EVEX VOP %ymmM, %ymmM, %ymmN
4412                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4413                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4414            VOP, one of vpxord and vpxorq:
4415              EVEX VOP %zmmM, %zmmM, %zmmN
4416                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4417                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4418              EVEX VOP %ymmM, %ymmM, %ymmN
4419                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4420                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4421            VOP, one of kxord and kxorq:
4422              VEX VOP %kM, %kM, %kN
4423                -> VEX kxorw %kM, %kM, %kN
4424            VOP, one of kandnd and kandnq:
4425              VEX VOP %kM, %kM, %kN
4426                -> VEX kandnw %kM, %kM, %kN
4427        */
4428       if (is_evex_encoding (&i.tm))
4429         {
4430           if (i.vec_encoding != vex_encoding_evex)
4431             {
4432               i.tm.opcode_modifier.vex = VEX128;
4433               i.tm.opcode_modifier.vexw = VEXW0;
4434               i.tm.opcode_modifier.evex = 0;
4435             }
4436           else if (optimize > 1)
4437             i.tm.opcode_modifier.evex = EVEX128;
4438           else
4439             return;
4440         }
4441       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4442         {
4443           i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
4444           i.tm.opcode_modifier.vexw = VEXW0;
4445         }
4446       else
4447         i.tm.opcode_modifier.vex = VEX128;
4448
4449       if (i.tm.opcode_modifier.vex)
4450         for (j = 0; j < 3; j++)
4451           {
4452             i.types[j].bitfield.xmmword = 1;
4453             i.types[j].bitfield.ymmword = 0;
4454           }
4455     }
4456   else if (i.vec_encoding != vex_encoding_evex
4457            && !i.types[0].bitfield.zmmword
4458            && !i.types[1].bitfield.zmmword
4459            && !i.mask.reg
4460            && !i.broadcast.bytes
4461            && is_evex_encoding (&i.tm)
4462            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4463                || (i.tm.base_opcode & ~4) == 0xdb
4464                || (i.tm.base_opcode & ~4) == 0xeb)
4465            && i.tm.extension_opcode == None)
4466     {
4467       /* Optimize: -O1:
4468            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4469            vmovdqu32 and vmovdqu64:
4470              EVEX VOP %xmmM, %xmmN
4471                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4472              EVEX VOP %ymmM, %ymmN
4473                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4474              EVEX VOP %xmmM, mem
4475                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4476              EVEX VOP %ymmM, mem
4477                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4478              EVEX VOP mem, %xmmN
4479                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4480              EVEX VOP mem, %ymmN
4481                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4482            VOP, one of vpand, vpandn, vpor, vpxor:
4483              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4484                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4485              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4486                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4487              EVEX VOP{d,q} mem, %xmmM, %xmmN
4488                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4489              EVEX VOP{d,q} mem, %ymmM, %ymmN
4490                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4491        */
4492       for (j = 0; j < i.operands; j++)
4493         if (operand_type_check (i.types[j], disp)
4494             && i.op[j].disps->X_op == O_constant)
4495           {
4496             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4497                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4498                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4499             int evex_disp8, vex_disp8;
4500             unsigned int memshift = i.memshift;
4501             offsetT n = i.op[j].disps->X_add_number;
4502
4503             evex_disp8 = fits_in_disp8 (n);
4504             i.memshift = 0;
4505             vex_disp8 = fits_in_disp8 (n);
4506             if (evex_disp8 != vex_disp8)
4507               {
4508                 i.memshift = memshift;
4509                 return;
4510               }
4511
4512             i.types[j].bitfield.disp8 = vex_disp8;
4513             break;
4514           }
4515       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4516           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
4517         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4518       i.tm.opcode_modifier.vex
4519         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4520       i.tm.opcode_modifier.vexw = VEXW0;
4521       /* VPAND, VPOR, and VPXOR are commutative.  */
4522       if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
4523         i.tm.opcode_modifier.commutative = 1;
4524       i.tm.opcode_modifier.evex = 0;
4525       i.tm.opcode_modifier.masking = 0;
4526       i.tm.opcode_modifier.broadcast = 0;
4527       i.tm.opcode_modifier.disp8memshift = 0;
4528       i.memshift = 0;
4529       if (j < i.operands)
4530         i.types[j].bitfield.disp8
4531           = fits_in_disp8 (i.op[j].disps->X_add_number);
4532     }
4533 }
4534
4535 /* Return non-zero for load instruction.  */
4536
4537 static int
4538 load_insn_p (void)
4539 {
4540   unsigned int dest;
4541   int any_vex_p = is_any_vex_encoding (&i.tm);
4542   unsigned int base_opcode = i.tm.base_opcode | 1;
4543
4544   if (!any_vex_p)
4545     {
4546       /* Anysize insns: lea, invlpg, clflush, prefetch*, bndmk, bndcl, bndcu,
4547          bndcn, bndstx, bndldx, clflushopt, clwb, cldemote.  */
4548       if (i.tm.opcode_modifier.operandconstraint == ANY_SIZE)
4549         return 0;
4550
4551       /* pop.   */
4552       if (strcmp (insn_name (&i.tm), "pop") == 0)
4553         return 1;
4554     }
4555
4556   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE)
4557     {
4558       /* popf, popa.   */
4559       if (i.tm.base_opcode == 0x9d
4560           || i.tm.base_opcode == 0x61)
4561         return 1;
4562
4563       /* movs, cmps, lods, scas.  */
4564       if ((i.tm.base_opcode | 0xb) == 0xaf)
4565         return 1;
4566
4567       /* outs, xlatb.  */
4568       if (base_opcode == 0x6f
4569           || i.tm.base_opcode == 0xd7)
4570         return 1;
4571       /* NB: For AMD-specific insns with implicit memory operands,
4572          they're intentionally not covered.  */
4573     }
4574
4575   /* No memory operand.  */
4576   if (!i.mem_operands)
4577     return 0;
4578
4579   if (any_vex_p)
4580     {
4581       /* vldmxcsr.  */
4582       if (i.tm.base_opcode == 0xae
4583           && i.tm.opcode_modifier.vex
4584           && i.tm.opcode_modifier.opcodespace == SPACE_0F
4585           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4586           && i.tm.extension_opcode == 2)
4587         return 1;
4588     }
4589   else if (i.tm.opcode_modifier.opcodespace == SPACE_BASE)
4590     {
4591       /* test, not, neg, mul, imul, div, idiv.  */
4592       if ((i.tm.base_opcode == 0xf6 || i.tm.base_opcode == 0xf7)
4593           && i.tm.extension_opcode != 1)
4594         return 1;
4595
4596       /* inc, dec.  */
4597       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
4598         return 1;
4599
4600       /* add, or, adc, sbb, and, sub, xor, cmp.  */
4601       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
4602         return 1;
4603
4604       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
4605       if ((base_opcode == 0xc1
4606            || (i.tm.base_opcode >= 0xd0 && i.tm.base_opcode <= 0xd3))
4607           && i.tm.extension_opcode != 6)
4608         return 1;
4609
4610       /* Check for x87 instructions.  */
4611       if (base_opcode >= 0xd8 && base_opcode <= 0xdf)
4612         {
4613           /* Skip fst, fstp, fstenv, fstcw.  */
4614           if (i.tm.base_opcode == 0xd9
4615               && (i.tm.extension_opcode == 2
4616                   || i.tm.extension_opcode == 3
4617                   || i.tm.extension_opcode == 6
4618                   || i.tm.extension_opcode == 7))
4619             return 0;
4620
4621           /* Skip fisttp, fist, fistp, fstp.  */
4622           if (i.tm.base_opcode == 0xdb
4623               && (i.tm.extension_opcode == 1
4624                   || i.tm.extension_opcode == 2
4625                   || i.tm.extension_opcode == 3
4626                   || i.tm.extension_opcode == 7))
4627             return 0;
4628
4629           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
4630           if (i.tm.base_opcode == 0xdd
4631               && (i.tm.extension_opcode == 1
4632                   || i.tm.extension_opcode == 2
4633                   || i.tm.extension_opcode == 3
4634                   || i.tm.extension_opcode == 6
4635                   || i.tm.extension_opcode == 7))
4636             return 0;
4637
4638           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
4639           if (i.tm.base_opcode == 0xdf
4640               && (i.tm.extension_opcode == 1
4641                   || i.tm.extension_opcode == 2
4642                   || i.tm.extension_opcode == 3
4643                   || i.tm.extension_opcode == 6
4644                   || i.tm.extension_opcode == 7))
4645             return 0;
4646
4647           return 1;
4648         }
4649     }
4650   else if (i.tm.opcode_modifier.opcodespace == SPACE_0F)
4651     {
4652       /* bt, bts, btr, btc.  */
4653       if (i.tm.base_opcode == 0xba
4654           && (i.tm.extension_opcode >= 4 && i.tm.extension_opcode <= 7))
4655         return 1;
4656
4657       /* cmpxchg8b, cmpxchg16b, xrstors, vmptrld.  */
4658       if (i.tm.base_opcode == 0xc7
4659           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4660           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3
4661               || i.tm.extension_opcode == 6))
4662         return 1;
4663
4664       /* fxrstor, ldmxcsr, xrstor.  */
4665       if (i.tm.base_opcode == 0xae
4666           && (i.tm.extension_opcode == 1
4667               || i.tm.extension_opcode == 2
4668               || i.tm.extension_opcode == 5))
4669         return 1;
4670
4671       /* lgdt, lidt, lmsw.  */
4672       if (i.tm.base_opcode == 0x01
4673           && (i.tm.extension_opcode == 2
4674               || i.tm.extension_opcode == 3
4675               || i.tm.extension_opcode == 6))
4676         return 1;
4677     }
4678
4679   dest = i.operands - 1;
4680
4681   /* Check fake imm8 operand and 3 source operands.  */
4682   if ((i.tm.opcode_modifier.immext
4683        || i.tm.opcode_modifier.vexsources == VEX3SOURCES)
4684       && i.types[dest].bitfield.imm8)
4685     dest--;
4686
4687   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg.  */
4688   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
4689       && (base_opcode == 0x1
4690           || base_opcode == 0x9
4691           || base_opcode == 0x11
4692           || base_opcode == 0x19
4693           || base_opcode == 0x21
4694           || base_opcode == 0x29
4695           || base_opcode == 0x31
4696           || base_opcode == 0x39
4697           || (base_opcode | 2) == 0x87))
4698     return 1;
4699
4700   /* xadd.  */
4701   if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4702       && base_opcode == 0xc1)
4703     return 1;
4704
4705   /* Check for load instruction.  */
4706   return (i.types[dest].bitfield.class != ClassNone
4707           || i.types[dest].bitfield.instance == Accum);
4708 }
4709
4710 /* Output lfence, 0xfaee8, after instruction.  */
4711
4712 static void
4713 insert_lfence_after (void)
4714 {
4715   if (lfence_after_load && load_insn_p ())
4716     {
4717       /* There are also two REP string instructions that require
4718          special treatment. Specifically, the compare string (CMPS)
4719          and scan string (SCAS) instructions set EFLAGS in a manner
4720          that depends on the data being compared/scanned. When used
4721          with a REP prefix, the number of iterations may therefore
4722          vary depending on this data. If the data is a program secret
4723          chosen by the adversary using an LVI method,
4724          then this data-dependent behavior may leak some aspect
4725          of the secret.  */
4726       if (((i.tm.base_opcode | 0x1) == 0xa7
4727            || (i.tm.base_opcode | 0x1) == 0xaf)
4728           && i.prefix[REP_PREFIX])
4729         {
4730             as_warn (_("`%s` changes flags which would affect control flow behavior"),
4731                      insn_name (&i.tm));
4732         }
4733       char *p = frag_more (3);
4734       *p++ = 0xf;
4735       *p++ = 0xae;
4736       *p = 0xe8;
4737     }
4738 }
4739
4740 /* Output lfence, 0xfaee8, before instruction.  */
4741
4742 static void
4743 insert_lfence_before (void)
4744 {
4745   char *p;
4746
4747   if (i.tm.opcode_modifier.opcodespace != SPACE_BASE)
4748     return;
4749
4750   if (i.tm.base_opcode == 0xff
4751       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
4752     {
4753       /* Insert lfence before indirect branch if needed.  */
4754
4755       if (lfence_before_indirect_branch == lfence_branch_none)
4756         return;
4757
4758       if (i.operands != 1)
4759         abort ();
4760
4761       if (i.reg_operands == 1)
4762         {
4763           /* Indirect branch via register.  Don't insert lfence with
4764              -mlfence-after-load=yes.  */
4765           if (lfence_after_load
4766               || lfence_before_indirect_branch == lfence_branch_memory)
4767             return;
4768         }
4769       else if (i.mem_operands == 1
4770                && lfence_before_indirect_branch != lfence_branch_register)
4771         {
4772           as_warn (_("indirect `%s` with memory operand should be avoided"),
4773                    insn_name (&i.tm));
4774           return;
4775         }
4776       else
4777         return;
4778
4779       if (last_insn.kind != last_insn_other
4780           && last_insn.seg == now_seg)
4781         {
4782           as_warn_where (last_insn.file, last_insn.line,
4783                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
4784                          last_insn.name, insn_name (&i.tm));
4785           return;
4786         }
4787
4788       p = frag_more (3);
4789       *p++ = 0xf;
4790       *p++ = 0xae;
4791       *p = 0xe8;
4792       return;
4793     }
4794
4795   /* Output or/not/shl and lfence before near ret.  */
4796   if (lfence_before_ret != lfence_before_ret_none
4797       && (i.tm.base_opcode == 0xc2
4798           || i.tm.base_opcode == 0xc3))
4799     {
4800       if (last_insn.kind != last_insn_other
4801           && last_insn.seg == now_seg)
4802         {
4803           as_warn_where (last_insn.file, last_insn.line,
4804                          _("`%s` skips -mlfence-before-ret on `%s`"),
4805                          last_insn.name, insn_name (&i.tm));
4806           return;
4807         }
4808
4809       /* Near ret ingore operand size override under CPU64.  */
4810       char prefix = flag_code == CODE_64BIT
4811                     ? 0x48
4812                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
4813
4814       if (lfence_before_ret == lfence_before_ret_not)
4815         {
4816           /* not: 0xf71424, may add prefix
4817              for operand size override or 64-bit code.  */
4818           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
4819           if (prefix)
4820             *p++ = prefix;
4821           *p++ = 0xf7;
4822           *p++ = 0x14;
4823           *p++ = 0x24;
4824           if (prefix)
4825             *p++ = prefix;
4826           *p++ = 0xf7;
4827           *p++ = 0x14;
4828           *p++ = 0x24;
4829         }
4830       else
4831         {
4832           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
4833           if (prefix)
4834             *p++ = prefix;
4835           if (lfence_before_ret == lfence_before_ret_or)
4836             {
4837               /* or: 0x830c2400, may add prefix
4838                  for operand size override or 64-bit code.  */
4839               *p++ = 0x83;
4840               *p++ = 0x0c;
4841             }
4842           else
4843             {
4844               /* shl: 0xc1242400, may add prefix
4845                  for operand size override or 64-bit code.  */
4846               *p++ = 0xc1;
4847               *p++ = 0x24;
4848             }
4849
4850           *p++ = 0x24;
4851           *p++ = 0x0;
4852         }
4853
4854       *p++ = 0xf;
4855       *p++ = 0xae;
4856       *p = 0xe8;
4857     }
4858 }
4859
4860 /* Helper for md_assemble() to decide whether to prepare for a possible 2nd
4861    parsing pass. Instead of introducing a rarely use new insn attribute this
4862    utilizes a common pattern between affected templates. It is deemed
4863    acceptable that this will lead to unnecessary pass 2 preparations in a
4864    limited set of cases.  */
4865 static INLINE bool may_need_pass2 (const insn_template *t)
4866 {
4867   return t->opcode_modifier.sse2avx
4868          /* Note that all SSE2AVX templates have at least one operand.  */
4869          ? t->operand_types[t->operands - 1].bitfield.class == RegSIMD
4870          : (t->opcode_modifier.opcodespace == SPACE_0F
4871             && (t->base_opcode | 1) == 0xbf)
4872            || (t->opcode_modifier.opcodespace == SPACE_BASE
4873                && t->base_opcode == 0x63);
4874 }
4875
4876 /* This is the guts of the machine-dependent assembler.  LINE points to a
4877    machine dependent instruction.  This function is supposed to emit
4878    the frags/bytes it assembles to.  */
4879
4880 void
4881 md_assemble (char *line)
4882 {
4883   unsigned int j;
4884   char mnemonic[MAX_MNEM_SIZE], mnem_suffix = 0, *copy = NULL;
4885   const char *end, *pass1_mnem = NULL;
4886   enum i386_error pass1_err = 0;
4887   const insn_template *t;
4888
4889   /* Initialize globals.  */
4890   current_templates = NULL;
4891  retry:
4892   memset (&i, '\0', sizeof (i));
4893   i.rounding.type = rc_none;
4894   for (j = 0; j < MAX_OPERANDS; j++)
4895     i.reloc[j] = NO_RELOC;
4896   memset (disp_expressions, '\0', sizeof (disp_expressions));
4897   memset (im_expressions, '\0', sizeof (im_expressions));
4898   save_stack_p = save_stack;
4899
4900   /* First parse an instruction mnemonic & call i386_operand for the operands.
4901      We assume that the scrubber has arranged it so that line[0] is the valid
4902      start of a (possibly prefixed) mnemonic.  */
4903
4904   end = parse_insn (line, mnemonic);
4905   if (end == NULL)
4906     {
4907       if (pass1_mnem != NULL)
4908         goto match_error;
4909       if (i.error != no_error)
4910         {
4911           gas_assert (current_templates != NULL);
4912           if (may_need_pass2 (current_templates->start) && !i.suffix)
4913             goto no_match;
4914           /* No point in trying a 2nd pass - it'll only find the same suffix
4915              again.  */
4916           mnem_suffix = i.suffix;
4917           goto match_error;
4918         }
4919       return;
4920     }
4921   if (may_need_pass2 (current_templates->start))
4922     {
4923       /* Make a copy of the full line in case we need to retry.  */
4924       copy = xstrdup (line);
4925     }
4926   line += end - line;
4927   mnem_suffix = i.suffix;
4928
4929   line = parse_operands (line, mnemonic);
4930   this_operand = -1;
4931   if (line == NULL)
4932     {
4933       free (copy);
4934       return;
4935     }
4936
4937   /* Now we've parsed the mnemonic into a set of templates, and have the
4938      operands at hand.  */
4939
4940   /* All Intel opcodes have reversed operands except for "bound", "enter",
4941      "invlpg*", "monitor*", "mwait*", "tpause", "umwait", "pvalidate",
4942      "rmpadjust", "rmpupdate", and "rmpquery".  We also don't reverse
4943      intersegment "jmp" and "call" instructions with 2 immediate operands so
4944      that the immediate segment precedes the offset consistently in Intel and
4945      AT&T modes.  */
4946   if (intel_syntax
4947       && i.operands > 1
4948       && (strcmp (mnemonic, "bound") != 0)
4949       && (strncmp (mnemonic, "invlpg", 6) != 0)
4950       && !startswith (mnemonic, "monitor")
4951       && !startswith (mnemonic, "mwait")
4952       && (strcmp (mnemonic, "pvalidate") != 0)
4953       && !startswith (mnemonic, "rmp")
4954       && (strcmp (mnemonic, "tpause") != 0)
4955       && (strcmp (mnemonic, "umwait") != 0)
4956       && !(i.operands == 2
4957            && operand_type_check (i.types[0], imm)
4958            && operand_type_check (i.types[1], imm)))
4959     swap_operands ();
4960
4961   /* The order of the immediates should be reversed
4962      for 2 immediates extrq and insertq instructions */
4963   if (i.imm_operands == 2
4964       && (strcmp (mnemonic, "extrq") == 0
4965           || strcmp (mnemonic, "insertq") == 0))
4966       swap_2_operands (0, 1);
4967
4968   if (i.imm_operands)
4969     optimize_imm ();
4970
4971   if (i.disp_operands && !want_disp32 (current_templates->start)
4972       && (!current_templates->start->opcode_modifier.jump
4973           || i.jumpabsolute || i.types[0].bitfield.baseindex))
4974     {
4975       for (j = 0; j < i.operands; ++j)
4976         {
4977           const expressionS *exp = i.op[j].disps;
4978
4979           if (!operand_type_check (i.types[j], disp))
4980             continue;
4981
4982           if (exp->X_op != O_constant)
4983             continue;
4984
4985           /* Since displacement is signed extended to 64bit, don't allow
4986              disp32 if it is out of range.  */
4987           if (fits_in_signed_long (exp->X_add_number))
4988             continue;
4989
4990           i.types[j].bitfield.disp32 = 0;
4991           if (i.types[j].bitfield.baseindex)
4992             {
4993               as_bad (_("0x%" PRIx64 " out of range of signed 32bit displacement"),
4994                       (uint64_t) exp->X_add_number);
4995               return;
4996             }
4997         }
4998     }
4999
5000   /* Don't optimize displacement for movabs since it only takes 64bit
5001      displacement.  */
5002   if (i.disp_operands
5003       && i.disp_encoding <= disp_encoding_8bit
5004       && (flag_code != CODE_64BIT
5005           || strcmp (mnemonic, "movabs") != 0))
5006     optimize_disp ();
5007
5008   /* Next, we find a template that matches the given insn,
5009      making sure the overlap of the given operands types is consistent
5010      with the template operand types.  */
5011
5012   if (!(t = match_template (mnem_suffix)))
5013     {
5014       const char *err_msg;
5015
5016       if (copy && !mnem_suffix)
5017         {
5018           line = copy;
5019           copy = NULL;
5020   no_match:
5021           pass1_err = i.error;
5022           pass1_mnem = insn_name (current_templates->start);
5023           goto retry;
5024         }
5025
5026       /* If a non-/only-64bit template (group) was found in pass 1, and if
5027          _some_ template (group) was found in pass 2, squash pass 1's
5028          error.  */
5029       if (pass1_err == unsupported_64bit)
5030         pass1_mnem = NULL;
5031
5032   match_error:
5033       free (copy);
5034
5035       switch (pass1_mnem ? pass1_err : i.error)
5036         {
5037         default:
5038           abort ();
5039         case operand_size_mismatch:
5040           err_msg = _("operand size mismatch");
5041           break;
5042         case operand_type_mismatch:
5043           err_msg = _("operand type mismatch");
5044           break;
5045         case register_type_mismatch:
5046           err_msg = _("register type mismatch");
5047           break;
5048         case number_of_operands_mismatch:
5049           err_msg = _("number of operands mismatch");
5050           break;
5051         case invalid_instruction_suffix:
5052           err_msg = _("invalid instruction suffix");
5053           break;
5054         case bad_imm4:
5055           err_msg = _("constant doesn't fit in 4 bits");
5056           break;
5057         case unsupported_with_intel_mnemonic:
5058           err_msg = _("unsupported with Intel mnemonic");
5059           break;
5060         case unsupported_syntax:
5061           err_msg = _("unsupported syntax");
5062           break;
5063         case unsupported:
5064           as_bad (_("unsupported instruction `%s'"),
5065                   pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5066           return;
5067         case unsupported_on_arch:
5068           as_bad (_("`%s' is not supported on `%s%s'"),
5069                   pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5070                   cpu_arch_name ? cpu_arch_name : default_arch,
5071                   cpu_sub_arch_name ? cpu_sub_arch_name : "");
5072           return;
5073         case unsupported_64bit:
5074           if (ISLOWER (mnem_suffix))
5075             {
5076               if (flag_code == CODE_64BIT)
5077                 as_bad (_("`%s%c' is not supported in 64-bit mode"),
5078                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5079                         mnem_suffix);
5080               else
5081                 as_bad (_("`%s%c' is only supported in 64-bit mode"),
5082                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5083                         mnem_suffix);
5084             }
5085           else
5086             {
5087               if (flag_code == CODE_64BIT)
5088                 as_bad (_("`%s' is not supported in 64-bit mode"),
5089                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5090               else
5091                 as_bad (_("`%s' is only supported in 64-bit mode"),
5092                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5093             }
5094           return;
5095         case invalid_sib_address:
5096           err_msg = _("invalid SIB address");
5097           break;
5098         case invalid_vsib_address:
5099           err_msg = _("invalid VSIB address");
5100           break;
5101         case invalid_vector_register_set:
5102           err_msg = _("mask, index, and destination registers must be distinct");
5103           break;
5104         case invalid_tmm_register_set:
5105           err_msg = _("all tmm registers must be distinct");
5106           break;
5107         case invalid_dest_and_src_register_set:
5108           err_msg = _("destination and source registers must be distinct");
5109           break;
5110         case unsupported_vector_index_register:
5111           err_msg = _("unsupported vector index register");
5112           break;
5113         case unsupported_broadcast:
5114           err_msg = _("unsupported broadcast");
5115           break;
5116         case broadcast_needed:
5117           err_msg = _("broadcast is needed for operand of such type");
5118           break;
5119         case unsupported_masking:
5120           err_msg = _("unsupported masking");
5121           break;
5122         case mask_not_on_destination:
5123           err_msg = _("mask not on destination operand");
5124           break;
5125         case no_default_mask:
5126           err_msg = _("default mask isn't allowed");
5127           break;
5128         case unsupported_rc_sae:
5129           err_msg = _("unsupported static rounding/sae");
5130           break;
5131         case invalid_register_operand:
5132           err_msg = _("invalid register operand");
5133           break;
5134         }
5135       as_bad (_("%s for `%s'"), err_msg,
5136               pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5137       return;
5138     }
5139
5140   free (copy);
5141
5142   if (sse_check != check_none
5143       /* The opcode space check isn't strictly needed; it's there only to
5144          bypass the logic below when easily possible.  */
5145       && t->opcode_modifier.opcodespace >= SPACE_0F
5146       && t->opcode_modifier.opcodespace <= SPACE_0F3A
5147       && !i.tm.cpu_flags.bitfield.cpusse4a
5148       && !is_any_vex_encoding (t))
5149     {
5150       bool simd = false;
5151
5152       for (j = 0; j < t->operands; ++j)
5153         {
5154           if (t->operand_types[j].bitfield.class == RegMMX)
5155             break;
5156           if (t->operand_types[j].bitfield.class == RegSIMD)
5157             simd = true;
5158         }
5159
5160       if (j >= t->operands && simd)
5161         (sse_check == check_warning
5162          ? as_warn
5163          : as_bad) (_("SSE instruction `%s' is used"), insn_name (&i.tm));
5164     }
5165
5166   if (i.tm.opcode_modifier.fwait)
5167     if (!add_prefix (FWAIT_OPCODE))
5168       return;
5169
5170   /* Check if REP prefix is OK.  */
5171   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep)
5172     {
5173       as_bad (_("invalid instruction `%s' after `%s'"),
5174                 insn_name (&i.tm), i.rep_prefix);
5175       return;
5176     }
5177
5178   /* Check for lock without a lockable instruction.  Destination operand
5179      must be memory unless it is xchg (0x86).  */
5180   if (i.prefix[LOCK_PREFIX]
5181       && (i.tm.opcode_modifier.prefixok < PrefixLock
5182           || i.mem_operands == 0
5183           || (i.tm.base_opcode != 0x86
5184               && !(i.flags[i.operands - 1] & Operand_Mem))))
5185     {
5186       as_bad (_("expecting lockable instruction after `lock'"));
5187       return;
5188     }
5189
5190   if (is_any_vex_encoding (&i.tm)
5191       || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
5192       || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX)
5193     {
5194       /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
5195       if (i.prefix[DATA_PREFIX])
5196         {
5197           as_bad (_("data size prefix invalid with `%s'"), insn_name (&i.tm));
5198           return;
5199         }
5200
5201       /* Don't allow e.g. KMOV in TLS code sequences.  */
5202       for (j = i.imm_operands; j < i.operands; ++j)
5203         switch (i.reloc[j])
5204           {
5205           case BFD_RELOC_386_TLS_GOTIE:
5206           case BFD_RELOC_386_TLS_LE_32:
5207           case BFD_RELOC_X86_64_GOTTPOFF:
5208           case BFD_RELOC_X86_64_TLSLD:
5209             as_bad (_("TLS relocation cannot be used with `%s'"), insn_name (&i.tm));
5210             return;
5211           default:
5212             break;
5213           }
5214     }
5215
5216   /* Check if HLE prefix is OK.  */
5217   if (i.hle_prefix && !check_hle ())
5218     return;
5219
5220   /* Check BND prefix.  */
5221   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
5222     as_bad (_("expecting valid branch instruction after `bnd'"));
5223
5224   /* Check NOTRACK prefix.  */
5225   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
5226     as_bad (_("expecting indirect branch instruction after `notrack'"));
5227
5228   if (i.tm.cpu_flags.bitfield.cpumpx)
5229     {
5230       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
5231         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
5232       else if (flag_code != CODE_16BIT
5233                ? i.prefix[ADDR_PREFIX]
5234                : i.mem_operands && !i.prefix[ADDR_PREFIX])
5235         as_bad (_("16-bit address isn't allowed in MPX instructions"));
5236     }
5237
5238   /* Insert BND prefix.  */
5239   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
5240     {
5241       if (!i.prefix[BND_PREFIX])
5242         add_prefix (BND_PREFIX_OPCODE);
5243       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
5244         {
5245           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
5246           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
5247         }
5248     }
5249
5250   /* Check string instruction segment overrides.  */
5251   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
5252     {
5253       gas_assert (i.mem_operands);
5254       if (!check_string ())
5255         return;
5256       i.disp_operands = 0;
5257     }
5258
5259   /* The memory operand of (%dx) should be only used with input/output
5260      instructions (base opcodes: 0x6c, 0x6e, 0xec, 0xee).  */
5261   if (i.input_output_operand
5262       && ((i.tm.base_opcode | 0x82) != 0xee
5263           || i.tm.opcode_modifier.opcodespace != SPACE_BASE))
5264     {
5265       as_bad (_("input/output port address isn't allowed with `%s'"),
5266               insn_name (&i.tm));
5267       return;
5268     }
5269
5270   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
5271     optimize_encoding ();
5272
5273   if (use_unaligned_vector_move)
5274     encode_with_unaligned_vector_move ();
5275
5276   if (!process_suffix ())
5277     return;
5278
5279   /* Check if IP-relative addressing requirements can be satisfied.  */
5280   if (i.tm.cpu_flags.bitfield.cpuprefetchi
5281       && !(i.base_reg && i.base_reg->reg_num == RegIP))
5282     as_warn (_("'%s' only supports RIP-relative address"), insn_name (&i.tm));
5283
5284   /* Update operand types and check extended states.  */
5285   for (j = 0; j < i.operands; j++)
5286     {
5287       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
5288       switch (i.tm.operand_types[j].bitfield.class)
5289         {
5290         default:
5291           break;
5292         case RegMMX:
5293           i.xstate |= xstate_mmx;
5294           break;
5295         case RegMask:
5296           i.xstate |= xstate_mask;
5297           break;
5298         case RegSIMD:
5299           if (i.tm.operand_types[j].bitfield.tmmword)
5300             i.xstate |= xstate_tmm;
5301           else if (i.tm.operand_types[j].bitfield.zmmword)
5302             i.xstate |= xstate_zmm;
5303           else if (i.tm.operand_types[j].bitfield.ymmword)
5304             i.xstate |= xstate_ymm;
5305           else if (i.tm.operand_types[j].bitfield.xmmword)
5306             i.xstate |= xstate_xmm;
5307           break;
5308         }
5309     }
5310
5311   /* Make still unresolved immediate matches conform to size of immediate
5312      given in i.suffix.  */
5313   if (!finalize_imm ())
5314     return;
5315
5316   if (i.types[0].bitfield.imm1)
5317     i.imm_operands = 0; /* kludge for shift insns.  */
5318
5319   /* We only need to check those implicit registers for instructions
5320      with 3 operands or less.  */
5321   if (i.operands <= 3)
5322     for (j = 0; j < i.operands; j++)
5323       if (i.types[j].bitfield.instance != InstanceNone
5324           && !i.types[j].bitfield.xmmword)
5325         i.reg_operands--;
5326
5327   /* For insns with operands there are more diddles to do to the opcode.  */
5328   if (i.operands)
5329     {
5330       if (!process_operands ())
5331         return;
5332     }
5333   else if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
5334     {
5335       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
5336       as_warn (_("translating to `%sp'"), insn_name (&i.tm));
5337     }
5338
5339   if (is_any_vex_encoding (&i.tm))
5340     {
5341       if (!cpu_arch_flags.bitfield.cpui286)
5342         {
5343           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
5344                   insn_name (&i.tm));
5345           return;
5346         }
5347
5348       /* Check for explicit REX prefix.  */
5349       if (i.prefix[REX_PREFIX] || i.rex_encoding)
5350         {
5351           as_bad (_("REX prefix invalid with `%s'"), insn_name (&i.tm));
5352           return;
5353         }
5354
5355       if (i.tm.opcode_modifier.vex)
5356         build_vex_prefix (t);
5357       else
5358         build_evex_prefix ();
5359
5360       /* The individual REX.RXBW bits got consumed.  */
5361       i.rex &= REX_OPCODE;
5362     }
5363
5364   /* Handle conversion of 'int $3' --> special int3 insn.  XOP or FMA4
5365      instructions may define INT_OPCODE as well, so avoid this corner
5366      case for those instructions that use MODRM.  */
5367   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
5368       && i.tm.base_opcode == INT_OPCODE
5369       && !i.tm.opcode_modifier.modrm
5370       && i.op[0].imms->X_add_number == 3)
5371     {
5372       i.tm.base_opcode = INT3_OPCODE;
5373       i.imm_operands = 0;
5374     }
5375
5376   if ((i.tm.opcode_modifier.jump == JUMP
5377        || i.tm.opcode_modifier.jump == JUMP_BYTE
5378        || i.tm.opcode_modifier.jump == JUMP_DWORD)
5379       && i.op[0].disps->X_op == O_constant)
5380     {
5381       /* Convert "jmp constant" (and "call constant") to a jump (call) to
5382          the absolute address given by the constant.  Since ix86 jumps and
5383          calls are pc relative, we need to generate a reloc.  */
5384       i.op[0].disps->X_add_symbol = &abs_symbol;
5385       i.op[0].disps->X_op = O_symbol;
5386     }
5387
5388   /* For 8 bit registers we need an empty rex prefix.  Also if the
5389      instruction already has a prefix, we need to convert old
5390      registers to new ones.  */
5391
5392   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
5393        && (i.op[0].regs->reg_flags & RegRex64) != 0)
5394       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
5395           && (i.op[1].regs->reg_flags & RegRex64) != 0)
5396       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
5397            || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
5398           && i.rex != 0))
5399     {
5400       int x;
5401
5402       i.rex |= REX_OPCODE;
5403       for (x = 0; x < 2; x++)
5404         {
5405           /* Look for 8 bit operand that uses old registers.  */
5406           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
5407               && (i.op[x].regs->reg_flags & RegRex64) == 0)
5408             {
5409               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5410               /* In case it is "hi" register, give up.  */
5411               if (i.op[x].regs->reg_num > 3)
5412                 as_bad (_("can't encode register '%s%s' in an "
5413                           "instruction requiring REX prefix."),
5414                         register_prefix, i.op[x].regs->reg_name);
5415
5416               /* Otherwise it is equivalent to the extended register.
5417                  Since the encoding doesn't change this is merely
5418                  cosmetic cleanup for debug output.  */
5419
5420               i.op[x].regs = i.op[x].regs + 8;
5421             }
5422         }
5423     }
5424
5425   if (i.rex == 0 && i.rex_encoding)
5426     {
5427       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
5428          that uses legacy register.  If it is "hi" register, don't add
5429          the REX_OPCODE byte.  */
5430       int x;
5431       for (x = 0; x < 2; x++)
5432         if (i.types[x].bitfield.class == Reg
5433             && i.types[x].bitfield.byte
5434             && (i.op[x].regs->reg_flags & RegRex64) == 0
5435             && i.op[x].regs->reg_num > 3)
5436           {
5437             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5438             i.rex_encoding = false;
5439             break;
5440           }
5441
5442       if (i.rex_encoding)
5443         i.rex = REX_OPCODE;
5444     }
5445
5446   if (i.rex != 0)
5447     add_prefix (REX_OPCODE | i.rex);
5448
5449   insert_lfence_before ();
5450
5451   /* We are ready to output the insn.  */
5452   output_insn ();
5453
5454   insert_lfence_after ();
5455
5456   last_insn.seg = now_seg;
5457
5458   if (i.tm.opcode_modifier.isprefix)
5459     {
5460       last_insn.kind = last_insn_prefix;
5461       last_insn.name = insn_name (&i.tm);
5462       last_insn.file = as_where (&last_insn.line);
5463     }
5464   else
5465     last_insn.kind = last_insn_other;
5466 }
5467
5468 /* The Q suffix is generally valid only in 64-bit mode, with very few
5469    exceptions: fild, fistp, fisttp, and cmpxchg8b.  Note that for fild
5470    and fisttp only one of their two templates is matched below: That's
5471    sufficient since other relevant attributes are the same between both
5472    respective templates.  */
5473 static INLINE bool q_suffix_allowed(const insn_template *t)
5474 {
5475   return flag_code == CODE_64BIT
5476          || (t->opcode_modifier.opcodespace == SPACE_BASE
5477              && t->base_opcode == 0xdf
5478              && (t->extension_opcode & 1)) /* fild / fistp / fisttp */
5479          || (t->opcode_modifier.opcodespace == SPACE_0F
5480              && t->base_opcode == 0xc7
5481              && t->opcode_modifier.opcodeprefix == PREFIX_NONE
5482              && t->extension_opcode == 1) /* cmpxchg8b */;
5483 }
5484
5485 static const char *
5486 parse_insn (const char *line, char *mnemonic)
5487 {
5488   const char *l = line, *token_start = l;
5489   char *mnem_p;
5490   bool pass1 = !current_templates;
5491   int supported;
5492   const insn_template *t;
5493   char *dot_p = NULL;
5494
5495   while (1)
5496     {
5497       mnem_p = mnemonic;
5498       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
5499         {
5500           if (*mnem_p == '.')
5501             dot_p = mnem_p;
5502           mnem_p++;
5503           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5504             {
5505               as_bad (_("no such instruction: `%s'"), token_start);
5506               return NULL;
5507             }
5508           l++;
5509         }
5510       if (!is_space_char (*l)
5511           && *l != END_OF_INSN
5512           && (intel_syntax
5513               || (*l != PREFIX_SEPARATOR
5514                   && *l != ',')))
5515         {
5516           as_bad (_("invalid character %s in mnemonic"),
5517                   output_invalid (*l));
5518           return NULL;
5519         }
5520       if (token_start == l)
5521         {
5522           if (!intel_syntax && *l == PREFIX_SEPARATOR)
5523             as_bad (_("expecting prefix; got nothing"));
5524           else
5525             as_bad (_("expecting mnemonic; got nothing"));
5526           return NULL;
5527         }
5528
5529       /* Look up instruction (or prefix) via hash table.  */
5530       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5531
5532       if (*l != END_OF_INSN
5533           && (!is_space_char (*l) || l[1] != END_OF_INSN)
5534           && current_templates
5535           && current_templates->start->opcode_modifier.isprefix)
5536         {
5537           if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
5538             {
5539               as_bad ((flag_code != CODE_64BIT
5540                        ? _("`%s' is only supported in 64-bit mode")
5541                        : _("`%s' is not supported in 64-bit mode")),
5542                       insn_name (current_templates->start));
5543               return NULL;
5544             }
5545           /* If we are in 16-bit mode, do not allow addr16 or data16.
5546              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
5547           if ((current_templates->start->opcode_modifier.size == SIZE16
5548                || current_templates->start->opcode_modifier.size == SIZE32)
5549               && flag_code != CODE_64BIT
5550               && ((current_templates->start->opcode_modifier.size == SIZE32)
5551                   ^ (flag_code == CODE_16BIT)))
5552             {
5553               as_bad (_("redundant %s prefix"),
5554                       insn_name (current_templates->start));
5555               return NULL;
5556             }
5557
5558           if (current_templates->start->base_opcode == PSEUDO_PREFIX)
5559             {
5560               /* Handle pseudo prefixes.  */
5561               switch (current_templates->start->extension_opcode)
5562                 {
5563                 case Prefix_Disp8:
5564                   /* {disp8} */
5565                   i.disp_encoding = disp_encoding_8bit;
5566                   break;
5567                 case Prefix_Disp16:
5568                   /* {disp16} */
5569                   i.disp_encoding = disp_encoding_16bit;
5570                   break;
5571                 case Prefix_Disp32:
5572                   /* {disp32} */
5573                   i.disp_encoding = disp_encoding_32bit;
5574                   break;
5575                 case Prefix_Load:
5576                   /* {load} */
5577                   i.dir_encoding = dir_encoding_load;
5578                   break;
5579                 case Prefix_Store:
5580                   /* {store} */
5581                   i.dir_encoding = dir_encoding_store;
5582                   break;
5583                 case Prefix_VEX:
5584                   /* {vex} */
5585                   i.vec_encoding = vex_encoding_vex;
5586                   break;
5587                 case Prefix_VEX3:
5588                   /* {vex3} */
5589                   i.vec_encoding = vex_encoding_vex3;
5590                   break;
5591                 case Prefix_EVEX:
5592                   /* {evex} */
5593                   i.vec_encoding = vex_encoding_evex;
5594                   break;
5595                 case Prefix_REX:
5596                   /* {rex} */
5597                   i.rex_encoding = true;
5598                   break;
5599                 case Prefix_NoOptimize:
5600                   /* {nooptimize} */
5601                   i.no_optimize = true;
5602                   break;
5603                 default:
5604                   abort ();
5605                 }
5606             }
5607           else
5608             {
5609               /* Add prefix, checking for repeated prefixes.  */
5610               switch (add_prefix (current_templates->start->base_opcode))
5611                 {
5612                 case PREFIX_EXIST:
5613                   return NULL;
5614                 case PREFIX_DS:
5615                   if (current_templates->start->cpu_flags.bitfield.cpuibt)
5616                     i.notrack_prefix = insn_name (current_templates->start);
5617                   break;
5618                 case PREFIX_REP:
5619                   if (current_templates->start->cpu_flags.bitfield.cpuhle)
5620                     i.hle_prefix = insn_name (current_templates->start);
5621                   else if (current_templates->start->cpu_flags.bitfield.cpumpx)
5622                     i.bnd_prefix = insn_name (current_templates->start);
5623                   else
5624                     i.rep_prefix = insn_name (current_templates->start);
5625                   break;
5626                 default:
5627                   break;
5628                 }
5629             }
5630           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
5631           token_start = ++l;
5632         }
5633       else
5634         break;
5635     }
5636
5637   if (!current_templates)
5638     {
5639       /* Deprecated functionality (new code should use pseudo-prefixes instead):
5640          Check if we should swap operand or force 32bit displacement in
5641          encoding.  */
5642       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
5643         i.dir_encoding = dir_encoding_swap;
5644       else if (mnem_p - 3 == dot_p
5645                && dot_p[1] == 'd'
5646                && dot_p[2] == '8')
5647         i.disp_encoding = disp_encoding_8bit;
5648       else if (mnem_p - 4 == dot_p
5649                && dot_p[1] == 'd'
5650                && dot_p[2] == '3'
5651                && dot_p[3] == '2')
5652         i.disp_encoding = disp_encoding_32bit;
5653       else
5654         goto check_suffix;
5655       mnem_p = dot_p;
5656       *dot_p = '\0';
5657       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5658     }
5659
5660   if (!current_templates || !pass1)
5661     {
5662       current_templates = NULL;
5663
5664     check_suffix:
5665       if (mnem_p > mnemonic)
5666         {
5667           /* See if we can get a match by trimming off a suffix.  */
5668           switch (mnem_p[-1])
5669             {
5670             case WORD_MNEM_SUFFIX:
5671               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
5672                 i.suffix = SHORT_MNEM_SUFFIX;
5673               else
5674                 /* Fall through.  */
5675               case BYTE_MNEM_SUFFIX:
5676               case QWORD_MNEM_SUFFIX:
5677                 i.suffix = mnem_p[-1];
5678               mnem_p[-1] = '\0';
5679               current_templates
5680                 = (const templates *) str_hash_find (op_hash, mnemonic);
5681               break;
5682             case SHORT_MNEM_SUFFIX:
5683             case LONG_MNEM_SUFFIX:
5684               if (!intel_syntax)
5685                 {
5686                   i.suffix = mnem_p[-1];
5687                   mnem_p[-1] = '\0';
5688                   current_templates
5689                     = (const templates *) str_hash_find (op_hash, mnemonic);
5690                 }
5691               break;
5692
5693               /* Intel Syntax.  */
5694             case 'd':
5695               if (intel_syntax)
5696                 {
5697                   if (intel_float_operand (mnemonic) == 1)
5698                     i.suffix = SHORT_MNEM_SUFFIX;
5699                   else
5700                     i.suffix = LONG_MNEM_SUFFIX;
5701                   mnem_p[-1] = '\0';
5702                   current_templates
5703                     = (const templates *) str_hash_find (op_hash, mnemonic);
5704                 }
5705               /* For compatibility reasons accept MOVSD and CMPSD without
5706                  operands even in AT&T mode.  */
5707               else if (*l == END_OF_INSN
5708                        || (is_space_char (*l) && l[1] == END_OF_INSN))
5709                 {
5710                   mnem_p[-1] = '\0';
5711                   current_templates
5712                     = (const templates *) str_hash_find (op_hash, mnemonic);
5713                   if (current_templates != NULL
5714                       /* MOVS or CMPS */
5715                       && (current_templates->start->base_opcode | 2) == 0xa6
5716                       && current_templates->start->opcode_modifier.opcodespace
5717                          == SPACE_BASE
5718                       && mnem_p[-2] == 's')
5719                     {
5720                       as_warn (_("found `%sd'; assuming `%sl' was meant"),
5721                                mnemonic, mnemonic);
5722                       i.suffix = LONG_MNEM_SUFFIX;
5723                     }
5724                   else
5725                     {
5726                       current_templates = NULL;
5727                       mnem_p[-1] = 'd';
5728                     }
5729                 }
5730               break;
5731             }
5732         }
5733
5734       if (!current_templates)
5735         {
5736           if (pass1)
5737             as_bad (_("no such instruction: `%s'"), token_start);
5738           return NULL;
5739         }
5740     }
5741
5742   if (current_templates->start->opcode_modifier.jump == JUMP
5743       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
5744     {
5745       /* Check for a branch hint.  We allow ",pt" and ",pn" for
5746          predict taken and predict not taken respectively.
5747          I'm not sure that branch hints actually do anything on loop
5748          and jcxz insns (JumpByte) for current Pentium4 chips.  They
5749          may work in the future and it doesn't hurt to accept them
5750          now.  */
5751       if (l[0] == ',' && l[1] == 'p')
5752         {
5753           if (l[2] == 't')
5754             {
5755               if (!add_prefix (DS_PREFIX_OPCODE))
5756                 return NULL;
5757               l += 3;
5758             }
5759           else if (l[2] == 'n')
5760             {
5761               if (!add_prefix (CS_PREFIX_OPCODE))
5762                 return NULL;
5763               l += 3;
5764             }
5765         }
5766     }
5767   /* Any other comma loses.  */
5768   if (*l == ',')
5769     {
5770       as_bad (_("invalid character %s in mnemonic"),
5771               output_invalid (*l));
5772       return NULL;
5773     }
5774
5775   /* Check if instruction is supported on specified architecture.  */
5776   supported = 0;
5777   for (t = current_templates->start; t < current_templates->end; ++t)
5778     {
5779       supported |= cpu_flags_match (t);
5780
5781       if (i.suffix == QWORD_MNEM_SUFFIX && !q_suffix_allowed (t))
5782         supported &= ~CPU_FLAGS_64BIT_MATCH;
5783
5784       if (supported == CPU_FLAGS_PERFECT_MATCH)
5785         return l;
5786     }
5787
5788   if (pass1)
5789     {
5790       if (supported & CPU_FLAGS_64BIT_MATCH)
5791         i.error = unsupported_on_arch;
5792       else
5793         i.error = unsupported_64bit;
5794     }
5795
5796   return NULL;
5797 }
5798
5799 static char *
5800 parse_operands (char *l, const char *mnemonic)
5801 {
5802   char *token_start;
5803
5804   /* 1 if operand is pending after ','.  */
5805   unsigned int expecting_operand = 0;
5806
5807   while (*l != END_OF_INSN)
5808     {
5809       /* Non-zero if operand parens not balanced.  */
5810       unsigned int paren_not_balanced = 0;
5811       /* True if inside double quotes.  */
5812       bool in_quotes = false;
5813
5814       /* Skip optional white space before operand.  */
5815       if (is_space_char (*l))
5816         ++l;
5817       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
5818         {
5819           as_bad (_("invalid character %s before operand %d"),
5820                   output_invalid (*l),
5821                   i.operands + 1);
5822           return NULL;
5823         }
5824       token_start = l;  /* After white space.  */
5825       while (in_quotes || paren_not_balanced || *l != ',')
5826         {
5827           if (*l == END_OF_INSN)
5828             {
5829               if (in_quotes)
5830                 {
5831                   as_bad (_("unbalanced double quotes in operand %d."),
5832                           i.operands + 1);
5833                   return NULL;
5834                 }
5835               if (paren_not_balanced)
5836                 {
5837                   know (!intel_syntax);
5838                   as_bad (_("unbalanced parenthesis in operand %d."),
5839                           i.operands + 1);
5840                   return NULL;
5841                 }
5842               else
5843                 break;  /* we are done */
5844             }
5845           else if (*l == '\\' && l[1] == '"')
5846             ++l;
5847           else if (*l == '"')
5848             in_quotes = !in_quotes;
5849           else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l))
5850             {
5851               as_bad (_("invalid character %s in operand %d"),
5852                       output_invalid (*l),
5853                       i.operands + 1);
5854               return NULL;
5855             }
5856           if (!intel_syntax && !in_quotes)
5857             {
5858               if (*l == '(')
5859                 ++paren_not_balanced;
5860               if (*l == ')')
5861                 --paren_not_balanced;
5862             }
5863           l++;
5864         }
5865       if (l != token_start)
5866         {                       /* Yes, we've read in another operand.  */
5867           unsigned int operand_ok;
5868           this_operand = i.operands++;
5869           if (i.operands > MAX_OPERANDS)
5870             {
5871               as_bad (_("spurious operands; (%d operands/instruction max)"),
5872                       MAX_OPERANDS);
5873               return NULL;
5874             }
5875           i.types[this_operand].bitfield.unspecified = 1;
5876           /* Now parse operand adding info to 'i' as we go along.  */
5877           END_STRING_AND_SAVE (l);
5878
5879           if (i.mem_operands > 1)
5880             {
5881               as_bad (_("too many memory references for `%s'"),
5882                       mnemonic);
5883               return 0;
5884             }
5885
5886           if (intel_syntax)
5887             operand_ok =
5888               i386_intel_operand (token_start,
5889                                   intel_float_operand (mnemonic));
5890           else
5891             operand_ok = i386_att_operand (token_start);
5892
5893           RESTORE_END_STRING (l);
5894           if (!operand_ok)
5895             return NULL;
5896         }
5897       else
5898         {
5899           if (expecting_operand)
5900             {
5901             expecting_operand_after_comma:
5902               as_bad (_("expecting operand after ','; got nothing"));
5903               return NULL;
5904             }
5905           if (*l == ',')
5906             {
5907               as_bad (_("expecting operand before ','; got nothing"));
5908               return NULL;
5909             }
5910         }
5911
5912       /* Now *l must be either ',' or END_OF_INSN.  */
5913       if (*l == ',')
5914         {
5915           if (*++l == END_OF_INSN)
5916             {
5917               /* Just skip it, if it's \n complain.  */
5918               goto expecting_operand_after_comma;
5919             }
5920           expecting_operand = 1;
5921         }
5922     }
5923   return l;
5924 }
5925
5926 static void
5927 swap_2_operands (unsigned int xchg1, unsigned int xchg2)
5928 {
5929   union i386_op temp_op;
5930   i386_operand_type temp_type;
5931   unsigned int temp_flags;
5932   enum bfd_reloc_code_real temp_reloc;
5933
5934   temp_type = i.types[xchg2];
5935   i.types[xchg2] = i.types[xchg1];
5936   i.types[xchg1] = temp_type;
5937
5938   temp_flags = i.flags[xchg2];
5939   i.flags[xchg2] = i.flags[xchg1];
5940   i.flags[xchg1] = temp_flags;
5941
5942   temp_op = i.op[xchg2];
5943   i.op[xchg2] = i.op[xchg1];
5944   i.op[xchg1] = temp_op;
5945
5946   temp_reloc = i.reloc[xchg2];
5947   i.reloc[xchg2] = i.reloc[xchg1];
5948   i.reloc[xchg1] = temp_reloc;
5949
5950   if (i.mask.reg)
5951     {
5952       if (i.mask.operand == xchg1)
5953         i.mask.operand = xchg2;
5954       else if (i.mask.operand == xchg2)
5955         i.mask.operand = xchg1;
5956     }
5957   if (i.broadcast.type || i.broadcast.bytes)
5958     {
5959       if (i.broadcast.operand == xchg1)
5960         i.broadcast.operand = xchg2;
5961       else if (i.broadcast.operand == xchg2)
5962         i.broadcast.operand = xchg1;
5963     }
5964 }
5965
5966 static void
5967 swap_operands (void)
5968 {
5969   switch (i.operands)
5970     {
5971     case 5:
5972     case 4:
5973       swap_2_operands (1, i.operands - 2);
5974       /* Fall through.  */
5975     case 3:
5976     case 2:
5977       swap_2_operands (0, i.operands - 1);
5978       break;
5979     default:
5980       abort ();
5981     }
5982
5983   if (i.mem_operands == 2)
5984     {
5985       const reg_entry *temp_seg;
5986       temp_seg = i.seg[0];
5987       i.seg[0] = i.seg[1];
5988       i.seg[1] = temp_seg;
5989     }
5990 }
5991
5992 /* Try to ensure constant immediates are represented in the smallest
5993    opcode possible.  */
5994 static void
5995 optimize_imm (void)
5996 {
5997   char guess_suffix = 0;
5998   int op;
5999
6000   if (i.suffix)
6001     guess_suffix = i.suffix;
6002   else if (i.reg_operands)
6003     {
6004       /* Figure out a suffix from the last register operand specified.
6005          We can't do this properly yet, i.e. excluding special register
6006          instances, but the following works for instructions with
6007          immediates.  In any case, we can't set i.suffix yet.  */
6008       for (op = i.operands; --op >= 0;)
6009         if (i.types[op].bitfield.class != Reg)
6010           continue;
6011         else if (i.types[op].bitfield.byte)
6012           {
6013             guess_suffix = BYTE_MNEM_SUFFIX;
6014             break;
6015           }
6016         else if (i.types[op].bitfield.word)
6017           {
6018             guess_suffix = WORD_MNEM_SUFFIX;
6019             break;
6020           }
6021         else if (i.types[op].bitfield.dword)
6022           {
6023             guess_suffix = LONG_MNEM_SUFFIX;
6024             break;
6025           }
6026         else if (i.types[op].bitfield.qword)
6027           {
6028             guess_suffix = QWORD_MNEM_SUFFIX;
6029             break;
6030           }
6031     }
6032   else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
6033     guess_suffix = WORD_MNEM_SUFFIX;
6034
6035   for (op = i.operands; --op >= 0;)
6036     if (operand_type_check (i.types[op], imm))
6037       {
6038         switch (i.op[op].imms->X_op)
6039           {
6040           case O_constant:
6041             /* If a suffix is given, this operand may be shortened.  */
6042             switch (guess_suffix)
6043               {
6044               case LONG_MNEM_SUFFIX:
6045                 i.types[op].bitfield.imm32 = 1;
6046                 i.types[op].bitfield.imm64 = 1;
6047                 break;
6048               case WORD_MNEM_SUFFIX:
6049                 i.types[op].bitfield.imm16 = 1;
6050                 i.types[op].bitfield.imm32 = 1;
6051                 i.types[op].bitfield.imm32s = 1;
6052                 i.types[op].bitfield.imm64 = 1;
6053                 break;
6054               case BYTE_MNEM_SUFFIX:
6055                 i.types[op].bitfield.imm8 = 1;
6056                 i.types[op].bitfield.imm8s = 1;
6057                 i.types[op].bitfield.imm16 = 1;
6058                 i.types[op].bitfield.imm32 = 1;
6059                 i.types[op].bitfield.imm32s = 1;
6060                 i.types[op].bitfield.imm64 = 1;
6061                 break;
6062               }
6063
6064             /* If this operand is at most 16 bits, convert it
6065                to a signed 16 bit number before trying to see
6066                whether it will fit in an even smaller size.
6067                This allows a 16-bit operand such as $0xffe0 to
6068                be recognised as within Imm8S range.  */
6069             if ((i.types[op].bitfield.imm16)
6070                 && fits_in_unsigned_word (i.op[op].imms->X_add_number))
6071               {
6072                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
6073                                                 ^ 0x8000) - 0x8000);
6074               }
6075 #ifdef BFD64
6076             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
6077             if ((i.types[op].bitfield.imm32)
6078                 && fits_in_unsigned_long (i.op[op].imms->X_add_number))
6079               {
6080                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
6081                                                 ^ ((offsetT) 1 << 31))
6082                                                - ((offsetT) 1 << 31));
6083               }
6084 #endif
6085             i.types[op]
6086               = operand_type_or (i.types[op],
6087                                  smallest_imm_type (i.op[op].imms->X_add_number));
6088
6089             /* We must avoid matching of Imm32 templates when 64bit
6090                only immediate is available.  */
6091             if (guess_suffix == QWORD_MNEM_SUFFIX)
6092               i.types[op].bitfield.imm32 = 0;
6093             break;
6094
6095           case O_absent:
6096           case O_register:
6097             abort ();
6098
6099             /* Symbols and expressions.  */
6100           default:
6101             /* Convert symbolic operand to proper sizes for matching, but don't
6102                prevent matching a set of insns that only supports sizes other
6103                than those matching the insn suffix.  */
6104             {
6105               i386_operand_type mask, allowed;
6106               const insn_template *t = current_templates->start;
6107
6108               operand_type_set (&mask, 0);
6109               switch (guess_suffix)
6110                 {
6111                 case QWORD_MNEM_SUFFIX:
6112                   mask.bitfield.imm64 = 1;
6113                   mask.bitfield.imm32s = 1;
6114                   break;
6115                 case LONG_MNEM_SUFFIX:
6116                   mask.bitfield.imm32 = 1;
6117                   break;
6118                 case WORD_MNEM_SUFFIX:
6119                   mask.bitfield.imm16 = 1;
6120                   break;
6121                 case BYTE_MNEM_SUFFIX:
6122                   mask.bitfield.imm8 = 1;
6123                   break;
6124                 default:
6125                   break;
6126                 }
6127
6128               allowed = operand_type_and (t->operand_types[op], mask);
6129               while (++t < current_templates->end)
6130                 {
6131                   allowed = operand_type_or (allowed, t->operand_types[op]);
6132                   allowed = operand_type_and (allowed, mask);
6133                 }
6134
6135               if (!operand_type_all_zero (&allowed))
6136                 i.types[op] = operand_type_and (i.types[op], mask);
6137             }
6138             break;
6139           }
6140       }
6141 }
6142
6143 /* Try to use the smallest displacement type too.  */
6144 static void
6145 optimize_disp (void)
6146 {
6147   int op;
6148
6149   for (op = i.operands; --op >= 0;)
6150     if (operand_type_check (i.types[op], disp))
6151       {
6152         if (i.op[op].disps->X_op == O_constant)
6153           {
6154             offsetT op_disp = i.op[op].disps->X_add_number;
6155
6156             if (!op_disp && i.types[op].bitfield.baseindex)
6157               {
6158                 i.types[op] = operand_type_and_not (i.types[op], anydisp);
6159                 i.op[op].disps = NULL;
6160                 i.disp_operands--;
6161                 continue;
6162               }
6163
6164             if (i.types[op].bitfield.disp16
6165                 && fits_in_unsigned_word (op_disp))
6166               {
6167                 /* If this operand is at most 16 bits, convert
6168                    to a signed 16 bit number and don't use 64bit
6169                    displacement.  */
6170                 op_disp = ((op_disp ^ 0x8000) - 0x8000);
6171                 i.types[op].bitfield.disp64 = 0;
6172               }
6173
6174 #ifdef BFD64
6175             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
6176             if ((flag_code != CODE_64BIT
6177                  ? i.types[op].bitfield.disp32
6178                  : want_disp32 (current_templates->start)
6179                    && (!current_templates->start->opcode_modifier.jump
6180                        || i.jumpabsolute || i.types[op].bitfield.baseindex))
6181                 && fits_in_unsigned_long (op_disp))
6182               {
6183                 /* If this operand is at most 32 bits, convert
6184                    to a signed 32 bit number and don't use 64bit
6185                    displacement.  */
6186                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
6187                 i.types[op].bitfield.disp64 = 0;
6188                 i.types[op].bitfield.disp32 = 1;
6189               }
6190
6191             if (flag_code == CODE_64BIT && fits_in_signed_long (op_disp))
6192               {
6193                 i.types[op].bitfield.disp64 = 0;
6194                 i.types[op].bitfield.disp32 = 1;
6195               }
6196 #endif
6197             if ((i.types[op].bitfield.disp32
6198                  || i.types[op].bitfield.disp16)
6199                 && fits_in_disp8 (op_disp))
6200               i.types[op].bitfield.disp8 = 1;
6201
6202             i.op[op].disps->X_add_number = op_disp;
6203           }
6204         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
6205                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
6206           {
6207             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
6208                          i.op[op].disps, 0, i.reloc[op]);
6209             i.types[op] = operand_type_and_not (i.types[op], anydisp);
6210           }
6211         else
6212           /* We only support 64bit displacement on constants.  */
6213           i.types[op].bitfield.disp64 = 0;
6214       }
6215 }
6216
6217 /* Return 1 if there is a match in broadcast bytes between operand
6218    GIVEN and instruction template T.   */
6219
6220 static INLINE int
6221 match_broadcast_size (const insn_template *t, unsigned int given)
6222 {
6223   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
6224            && i.types[given].bitfield.byte)
6225           || (t->opcode_modifier.broadcast == WORD_BROADCAST
6226               && i.types[given].bitfield.word)
6227           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
6228               && i.types[given].bitfield.dword)
6229           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
6230               && i.types[given].bitfield.qword));
6231 }
6232
6233 /* Check if operands are valid for the instruction.  */
6234
6235 static int
6236 check_VecOperands (const insn_template *t)
6237 {
6238   unsigned int op;
6239   i386_cpu_flags cpu;
6240
6241   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
6242      any one operand are implicity requiring AVX512VL support if the actual
6243      operand size is YMMword or XMMword.  Since this function runs after
6244      template matching, there's no need to check for YMMword/XMMword in
6245      the template.  */
6246   cpu = cpu_flags_and (t->cpu_flags, avx512);
6247   if (!cpu_flags_all_zero (&cpu)
6248       && !t->cpu_flags.bitfield.cpuavx512vl
6249       && !cpu_arch_flags.bitfield.cpuavx512vl)
6250     {
6251       for (op = 0; op < t->operands; ++op)
6252         {
6253           if (t->operand_types[op].bitfield.zmmword
6254               && (i.types[op].bitfield.ymmword
6255                   || i.types[op].bitfield.xmmword))
6256             {
6257               i.error = unsupported;
6258               return 1;
6259             }
6260         }
6261     }
6262
6263   /* Somewhat similarly, templates specifying both AVX and AVX2 are
6264      requiring AVX2 support if the actual operand size is YMMword.  */
6265   if (t->cpu_flags.bitfield.cpuavx
6266       && t->cpu_flags.bitfield.cpuavx2
6267       && !cpu_arch_flags.bitfield.cpuavx2)
6268     {
6269       for (op = 0; op < t->operands; ++op)
6270         {
6271           if (t->operand_types[op].bitfield.xmmword
6272               && i.types[op].bitfield.ymmword)
6273             {
6274               i.error = unsupported;
6275               return 1;
6276             }
6277         }
6278     }
6279
6280   /* Without VSIB byte, we can't have a vector register for index.  */
6281   if (!t->opcode_modifier.sib
6282       && i.index_reg
6283       && (i.index_reg->reg_type.bitfield.xmmword
6284           || i.index_reg->reg_type.bitfield.ymmword
6285           || i.index_reg->reg_type.bitfield.zmmword))
6286     {
6287       i.error = unsupported_vector_index_register;
6288       return 1;
6289     }
6290
6291   /* Check if default mask is allowed.  */
6292   if (t->opcode_modifier.operandconstraint == NO_DEFAULT_MASK
6293       && (!i.mask.reg || i.mask.reg->reg_num == 0))
6294     {
6295       i.error = no_default_mask;
6296       return 1;
6297     }
6298
6299   /* For VSIB byte, we need a vector register for index, and all vector
6300      registers must be distinct.  */
6301   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
6302     {
6303       if (!i.index_reg
6304           || !((t->opcode_modifier.sib == VECSIB128
6305                 && i.index_reg->reg_type.bitfield.xmmword)
6306                || (t->opcode_modifier.sib == VECSIB256
6307                    && i.index_reg->reg_type.bitfield.ymmword)
6308                || (t->opcode_modifier.sib == VECSIB512
6309                    && i.index_reg->reg_type.bitfield.zmmword)))
6310       {
6311         i.error = invalid_vsib_address;
6312         return 1;
6313       }
6314
6315       gas_assert (i.reg_operands == 2 || i.mask.reg);
6316       if (i.reg_operands == 2 && !i.mask.reg)
6317         {
6318           gas_assert (i.types[0].bitfield.class == RegSIMD);
6319           gas_assert (i.types[0].bitfield.xmmword
6320                       || i.types[0].bitfield.ymmword);
6321           gas_assert (i.types[2].bitfield.class == RegSIMD);
6322           gas_assert (i.types[2].bitfield.xmmword
6323                       || i.types[2].bitfield.ymmword);
6324           if (operand_check == check_none)
6325             return 0;
6326           if (register_number (i.op[0].regs)
6327               != register_number (i.index_reg)
6328               && register_number (i.op[2].regs)
6329                  != register_number (i.index_reg)
6330               && register_number (i.op[0].regs)
6331                  != register_number (i.op[2].regs))
6332             return 0;
6333           if (operand_check == check_error)
6334             {
6335               i.error = invalid_vector_register_set;
6336               return 1;
6337             }
6338           as_warn (_("mask, index, and destination registers should be distinct"));
6339         }
6340       else if (i.reg_operands == 1 && i.mask.reg)
6341         {
6342           if (i.types[1].bitfield.class == RegSIMD
6343               && (i.types[1].bitfield.xmmword
6344                   || i.types[1].bitfield.ymmword
6345                   || i.types[1].bitfield.zmmword)
6346               && (register_number (i.op[1].regs)
6347                   == register_number (i.index_reg)))
6348             {
6349               if (operand_check == check_error)
6350                 {
6351                   i.error = invalid_vector_register_set;
6352                   return 1;
6353                 }
6354               if (operand_check != check_none)
6355                 as_warn (_("index and destination registers should be distinct"));
6356             }
6357         }
6358     }
6359
6360   /* For AMX instructions with 3 TMM register operands, all operands
6361       must be distinct.  */
6362   if (i.reg_operands == 3
6363       && t->operand_types[0].bitfield.tmmword
6364       && (i.op[0].regs == i.op[1].regs
6365           || i.op[0].regs == i.op[2].regs
6366           || i.op[1].regs == i.op[2].regs))
6367     {
6368       i.error = invalid_tmm_register_set;
6369       return 1;
6370     }
6371
6372   /* For some special instructions require that destination must be distinct
6373      from source registers.  */
6374   if (t->opcode_modifier.operandconstraint == DISTINCT_DEST)
6375     {
6376       unsigned int dest_reg = i.operands - 1;
6377
6378       know (i.operands >= 3);
6379
6380       /* #UD if dest_reg == src1_reg or dest_reg == src2_reg.  */
6381       if (i.op[dest_reg - 1].regs == i.op[dest_reg].regs
6382           || (i.reg_operands > 2
6383               && i.op[dest_reg - 2].regs == i.op[dest_reg].regs))
6384         {
6385           i.error = invalid_dest_and_src_register_set;
6386           return 1;
6387         }
6388     }
6389
6390   /* Check if broadcast is supported by the instruction and is applied
6391      to the memory operand.  */
6392   if (i.broadcast.type || i.broadcast.bytes)
6393     {
6394       i386_operand_type type, overlap;
6395
6396       /* Check if specified broadcast is supported in this instruction,
6397          and its broadcast bytes match the memory operand.  */
6398       op = i.broadcast.operand;
6399       if (!t->opcode_modifier.broadcast
6400           || !(i.flags[op] & Operand_Mem)
6401           || (!i.types[op].bitfield.unspecified
6402               && !match_broadcast_size (t, op)))
6403         {
6404         bad_broadcast:
6405           i.error = unsupported_broadcast;
6406           return 1;
6407         }
6408
6409       if (i.broadcast.type)
6410         i.broadcast.bytes = ((1 << (t->opcode_modifier.broadcast - 1))
6411                              * i.broadcast.type);
6412       operand_type_set (&type, 0);
6413       switch (get_broadcast_bytes (t, false))
6414         {
6415         case 2:
6416           type.bitfield.word = 1;
6417           break;
6418         case 4:
6419           type.bitfield.dword = 1;
6420           break;
6421         case 8:
6422           type.bitfield.qword = 1;
6423           break;
6424         case 16:
6425           type.bitfield.xmmword = 1;
6426           break;
6427         case 32:
6428           type.bitfield.ymmword = 1;
6429           break;
6430         case 64:
6431           type.bitfield.zmmword = 1;
6432           break;
6433         default:
6434           goto bad_broadcast;
6435         }
6436
6437       overlap = operand_type_and (type, t->operand_types[op]);
6438       if (t->operand_types[op].bitfield.class == RegSIMD
6439           && t->operand_types[op].bitfield.byte
6440              + t->operand_types[op].bitfield.word
6441              + t->operand_types[op].bitfield.dword
6442              + t->operand_types[op].bitfield.qword > 1)
6443         {
6444           overlap.bitfield.xmmword = 0;
6445           overlap.bitfield.ymmword = 0;
6446           overlap.bitfield.zmmword = 0;
6447         }
6448       if (operand_type_all_zero (&overlap))
6449           goto bad_broadcast;
6450
6451       if (t->opcode_modifier.checkoperandsize)
6452         {
6453           unsigned int j;
6454
6455           type.bitfield.baseindex = 1;
6456           for (j = 0; j < i.operands; ++j)
6457             {
6458               if (j != op
6459                   && !operand_type_register_match(i.types[j],
6460                                                   t->operand_types[j],
6461                                                   type,
6462                                                   t->operand_types[op]))
6463                 goto bad_broadcast;
6464             }
6465         }
6466     }
6467   /* If broadcast is supported in this instruction, we need to check if
6468      operand of one-element size isn't specified without broadcast.  */
6469   else if (t->opcode_modifier.broadcast && i.mem_operands)
6470     {
6471       /* Find memory operand.  */
6472       for (op = 0; op < i.operands; op++)
6473         if (i.flags[op] & Operand_Mem)
6474           break;
6475       gas_assert (op < i.operands);
6476       /* Check size of the memory operand.  */
6477       if (match_broadcast_size (t, op))
6478         {
6479           i.error = broadcast_needed;
6480           return 1;
6481         }
6482     }
6483   else
6484     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
6485
6486   /* Check if requested masking is supported.  */
6487   if (i.mask.reg)
6488     {
6489       switch (t->opcode_modifier.masking)
6490         {
6491         case BOTH_MASKING:
6492           break;
6493         case MERGING_MASKING:
6494           if (i.mask.zeroing)
6495             {
6496         case 0:
6497               i.error = unsupported_masking;
6498               return 1;
6499             }
6500           break;
6501         case DYNAMIC_MASKING:
6502           /* Memory destinations allow only merging masking.  */
6503           if (i.mask.zeroing && i.mem_operands)
6504             {
6505               /* Find memory operand.  */
6506               for (op = 0; op < i.operands; op++)
6507                 if (i.flags[op] & Operand_Mem)
6508                   break;
6509               gas_assert (op < i.operands);
6510               if (op == i.operands - 1)
6511                 {
6512                   i.error = unsupported_masking;
6513                   return 1;
6514                 }
6515             }
6516           break;
6517         default:
6518           abort ();
6519         }
6520     }
6521
6522   /* Check if masking is applied to dest operand.  */
6523   if (i.mask.reg && (i.mask.operand != i.operands - 1))
6524     {
6525       i.error = mask_not_on_destination;
6526       return 1;
6527     }
6528
6529   /* Check RC/SAE.  */
6530   if (i.rounding.type != rc_none)
6531     {
6532       if (!t->opcode_modifier.sae
6533           || ((i.rounding.type != saeonly) != t->opcode_modifier.staticrounding)
6534           || i.mem_operands)
6535         {
6536           i.error = unsupported_rc_sae;
6537           return 1;
6538         }
6539
6540       /* Non-EVEX.LIG forms need to have a ZMM register as at least one
6541          operand.  */
6542       if (t->opcode_modifier.evex != EVEXLIG)
6543         {
6544           for (op = 0; op < t->operands; ++op)
6545             if (i.types[op].bitfield.zmmword)
6546               break;
6547           if (op >= t->operands)
6548             {
6549               i.error = operand_size_mismatch;
6550               return 1;
6551             }
6552         }
6553     }
6554
6555   /* Check the special Imm4 cases; must be the first operand.  */
6556   if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
6557     {
6558       if (i.op[0].imms->X_op != O_constant
6559           || !fits_in_imm4 (i.op[0].imms->X_add_number))
6560         {
6561           i.error = bad_imm4;
6562           return 1;
6563         }
6564
6565       /* Turn off Imm<N> so that update_imm won't complain.  */
6566       operand_type_set (&i.types[0], 0);
6567     }
6568
6569   /* Check vector Disp8 operand.  */
6570   if (t->opcode_modifier.disp8memshift
6571       && i.disp_encoding <= disp_encoding_8bit)
6572     {
6573       if (i.broadcast.bytes)
6574         i.memshift = t->opcode_modifier.broadcast - 1;
6575       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
6576         i.memshift = t->opcode_modifier.disp8memshift;
6577       else
6578         {
6579           const i386_operand_type *type = NULL, *fallback = NULL;
6580
6581           i.memshift = 0;
6582           for (op = 0; op < i.operands; op++)
6583             if (i.flags[op] & Operand_Mem)
6584               {
6585                 if (t->opcode_modifier.evex == EVEXLIG)
6586                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
6587                 else if (t->operand_types[op].bitfield.xmmword
6588                          + t->operand_types[op].bitfield.ymmword
6589                          + t->operand_types[op].bitfield.zmmword <= 1)
6590                   type = &t->operand_types[op];
6591                 else if (!i.types[op].bitfield.unspecified)
6592                   type = &i.types[op];
6593                 else /* Ambiguities get resolved elsewhere.  */
6594                   fallback = &t->operand_types[op];
6595               }
6596             else if (i.types[op].bitfield.class == RegSIMD
6597                      && t->opcode_modifier.evex != EVEXLIG)
6598               {
6599                 if (i.types[op].bitfield.zmmword)
6600                   i.memshift = 6;
6601                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
6602                   i.memshift = 5;
6603                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
6604                   i.memshift = 4;
6605               }
6606
6607           if (!type && !i.memshift)
6608             type = fallback;
6609           if (type)
6610             {
6611               if (type->bitfield.zmmword)
6612                 i.memshift = 6;
6613               else if (type->bitfield.ymmword)
6614                 i.memshift = 5;
6615               else if (type->bitfield.xmmword)
6616                 i.memshift = 4;
6617             }
6618
6619           /* For the check in fits_in_disp8().  */
6620           if (i.memshift == 0)
6621             i.memshift = -1;
6622         }
6623
6624       for (op = 0; op < i.operands; op++)
6625         if (operand_type_check (i.types[op], disp)
6626             && i.op[op].disps->X_op == O_constant)
6627           {
6628             if (fits_in_disp8 (i.op[op].disps->X_add_number))
6629               {
6630                 i.types[op].bitfield.disp8 = 1;
6631                 return 0;
6632               }
6633             i.types[op].bitfield.disp8 = 0;
6634           }
6635     }
6636
6637   i.memshift = 0;
6638
6639   return 0;
6640 }
6641
6642 /* Check if encoding requirements are met by the instruction.  */
6643
6644 static int
6645 VEX_check_encoding (const insn_template *t)
6646 {
6647   if (i.vec_encoding == vex_encoding_error)
6648     {
6649       i.error = unsupported;
6650       return 1;
6651     }
6652
6653   if (i.vec_encoding == vex_encoding_evex)
6654     {
6655       /* This instruction must be encoded with EVEX prefix.  */
6656       if (!is_evex_encoding (t))
6657         {
6658           i.error = unsupported;
6659           return 1;
6660         }
6661       return 0;
6662     }
6663
6664   if (!t->opcode_modifier.vex)
6665     {
6666       /* This instruction template doesn't have VEX prefix.  */
6667       if (i.vec_encoding != vex_encoding_default)
6668         {
6669           i.error = unsupported;
6670           return 1;
6671         }
6672       return 0;
6673     }
6674
6675   return 0;
6676 }
6677
6678 /* Helper function for the progress() macro in match_template().  */
6679 static INLINE enum i386_error progress (enum i386_error new,
6680                                         enum i386_error last,
6681                                         unsigned int line, unsigned int *line_p)
6682 {
6683   if (line <= *line_p)
6684     return last;
6685   *line_p = line;
6686   return new;
6687 }
6688
6689 static const insn_template *
6690 match_template (char mnem_suffix)
6691 {
6692   /* Points to template once we've found it.  */
6693   const insn_template *t;
6694   i386_operand_type overlap0, overlap1, overlap2, overlap3;
6695   i386_operand_type overlap4;
6696   unsigned int found_reverse_match;
6697   i386_operand_type operand_types [MAX_OPERANDS];
6698   int addr_prefix_disp;
6699   unsigned int j, size_match, check_register, errline = __LINE__;
6700   enum i386_error specific_error = number_of_operands_mismatch;
6701 #define progress(err) progress (err, specific_error, __LINE__, &errline)
6702
6703 #if MAX_OPERANDS != 5
6704 # error "MAX_OPERANDS must be 5."
6705 #endif
6706
6707   found_reverse_match = 0;
6708   addr_prefix_disp = -1;
6709
6710   for (t = current_templates->start; t < current_templates->end; t++)
6711     {
6712       addr_prefix_disp = -1;
6713       found_reverse_match = 0;
6714
6715       /* Must have right number of operands.  */
6716       if (i.operands != t->operands)
6717         continue;
6718
6719       /* Check processor support.  */
6720       specific_error = progress (unsupported);
6721       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
6722         continue;
6723
6724       /* Check AT&T mnemonic.   */
6725       specific_error = progress (unsupported_with_intel_mnemonic);
6726       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
6727         continue;
6728
6729       /* Check AT&T/Intel syntax.  */
6730       specific_error = progress (unsupported_syntax);
6731       if ((intel_syntax && t->opcode_modifier.attsyntax)
6732           || (!intel_syntax && t->opcode_modifier.intelsyntax))
6733         continue;
6734
6735       /* Check Intel64/AMD64 ISA.   */
6736       switch (isa64)
6737         {
6738         default:
6739           /* Default: Don't accept Intel64.  */
6740           if (t->opcode_modifier.isa64 == INTEL64)
6741             continue;
6742           break;
6743         case amd64:
6744           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
6745           if (t->opcode_modifier.isa64 >= INTEL64)
6746             continue;
6747           break;
6748         case intel64:
6749           /* -mintel64: Don't accept AMD64.  */
6750           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
6751             continue;
6752           break;
6753         }
6754
6755       /* Check the suffix.  */
6756       specific_error = progress (invalid_instruction_suffix);
6757       if ((t->opcode_modifier.no_bsuf && mnem_suffix == BYTE_MNEM_SUFFIX)
6758           || (t->opcode_modifier.no_wsuf && mnem_suffix == WORD_MNEM_SUFFIX)
6759           || (t->opcode_modifier.no_lsuf && mnem_suffix == LONG_MNEM_SUFFIX)
6760           || (t->opcode_modifier.no_ssuf && mnem_suffix == SHORT_MNEM_SUFFIX)
6761           || (t->opcode_modifier.no_qsuf && mnem_suffix == QWORD_MNEM_SUFFIX))
6762         continue;
6763
6764       specific_error = progress (operand_size_mismatch);
6765       size_match = operand_size_match (t);
6766       if (!size_match)
6767         continue;
6768
6769       /* This is intentionally not
6770
6771          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
6772
6773          as the case of a missing * on the operand is accepted (perhaps with
6774          a warning, issued further down).  */
6775       specific_error = progress (operand_type_mismatch);
6776       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
6777         continue;
6778
6779       /* In Intel syntax, normally we can check for memory operand size when
6780          there is no mnemonic suffix.  But jmp and call have 2 different
6781          encodings with Dword memory operand size.  Skip the "near" one
6782          (permitting a register operand) when "far" was requested.  */
6783       if (i.far_branch
6784           && t->opcode_modifier.jump == JUMP_ABSOLUTE
6785           && t->operand_types[0].bitfield.class == Reg)
6786         continue;
6787
6788       for (j = 0; j < MAX_OPERANDS; j++)
6789         operand_types[j] = t->operand_types[j];
6790
6791       /* In general, don't allow 32-bit operands on pre-386.  */
6792       specific_error = progress (mnem_suffix ? invalid_instruction_suffix
6793                                              : operand_size_mismatch);
6794       j = i.imm_operands + (t->operands > i.imm_operands + 1);
6795       if (i.suffix == LONG_MNEM_SUFFIX
6796           && !cpu_arch_flags.bitfield.cpui386
6797           && (intel_syntax
6798               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
6799                  && !intel_float_operand (insn_name (t)))
6800               : intel_float_operand (insn_name (t)) != 2)
6801           && (t->operands == i.imm_operands
6802               || (operand_types[i.imm_operands].bitfield.class != RegMMX
6803                && operand_types[i.imm_operands].bitfield.class != RegSIMD
6804                && operand_types[i.imm_operands].bitfield.class != RegMask)
6805               || (operand_types[j].bitfield.class != RegMMX
6806                   && operand_types[j].bitfield.class != RegSIMD
6807                   && operand_types[j].bitfield.class != RegMask))
6808           && !t->opcode_modifier.sib)
6809         continue;
6810
6811       /* Do not verify operands when there are none.  */
6812       if (!t->operands)
6813         {
6814           if (VEX_check_encoding (t))
6815             {
6816               specific_error = progress (i.error);
6817               continue;
6818             }
6819
6820           /* We've found a match; break out of loop.  */
6821           break;
6822         }
6823
6824       if (!t->opcode_modifier.jump
6825           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
6826         {
6827           /* There should be only one Disp operand.  */
6828           for (j = 0; j < MAX_OPERANDS; j++)
6829             if (operand_type_check (operand_types[j], disp))
6830               break;
6831           if (j < MAX_OPERANDS)
6832             {
6833               bool override = (i.prefix[ADDR_PREFIX] != 0);
6834
6835               addr_prefix_disp = j;
6836
6837               /* Address size prefix will turn Disp64 operand into Disp32 and
6838                  Disp32/Disp16 one into Disp16/Disp32 respectively.  */
6839               switch (flag_code)
6840                 {
6841                 case CODE_16BIT:
6842                   override = !override;
6843                   /* Fall through.  */
6844                 case CODE_32BIT:
6845                   if (operand_types[j].bitfield.disp32
6846                       && operand_types[j].bitfield.disp16)
6847                     {
6848                       operand_types[j].bitfield.disp16 = override;
6849                       operand_types[j].bitfield.disp32 = !override;
6850                     }
6851                   gas_assert (!operand_types[j].bitfield.disp64);
6852                   break;
6853
6854                 case CODE_64BIT:
6855                   if (operand_types[j].bitfield.disp64)
6856                     {
6857                       gas_assert (!operand_types[j].bitfield.disp32);
6858                       operand_types[j].bitfield.disp32 = override;
6859                       operand_types[j].bitfield.disp64 = !override;
6860                     }
6861                   operand_types[j].bitfield.disp16 = 0;
6862                   break;
6863                 }
6864             }
6865         }
6866
6867       /* We check register size if needed.  */
6868       if (t->opcode_modifier.checkoperandsize)
6869         {
6870           check_register = (1 << t->operands) - 1;
6871           if (i.broadcast.type || i.broadcast.bytes)
6872             check_register &= ~(1 << i.broadcast.operand);
6873         }
6874       else
6875         check_register = 0;
6876
6877       overlap0 = operand_type_and (i.types[0], operand_types[0]);
6878       switch (t->operands)
6879         {
6880         case 1:
6881           if (!operand_type_match (overlap0, i.types[0]))
6882             continue;
6883           break;
6884         case 2:
6885           /* xchg %eax, %eax is a special case. It is an alias for nop
6886              only in 32bit mode and we can use opcode 0x90.  In 64bit
6887              mode, we can't use 0x90 for xchg %eax, %eax since it should
6888              zero-extend %eax to %rax.  */
6889           if (flag_code == CODE_64BIT
6890               && t->base_opcode == 0x90
6891               && t->opcode_modifier.opcodespace == SPACE_BASE
6892               && i.types[0].bitfield.instance == Accum
6893               && i.types[0].bitfield.dword
6894               && i.types[1].bitfield.instance == Accum)
6895             continue;
6896
6897           if (t->base_opcode == MOV_AX_DISP32
6898               && t->opcode_modifier.opcodespace == SPACE_BASE)
6899             {
6900               /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
6901               if (i.reloc[0] == BFD_RELOC_386_GOT32)
6902                 continue;
6903
6904               /* xrelease mov %eax, <disp> is another special case. It must not
6905                  match the accumulator-only encoding of mov.  */
6906               if (i.hle_prefix)
6907                 continue;
6908             }
6909           /* Fall through.  */
6910
6911         case 3:
6912           if (!(size_match & MATCH_STRAIGHT))
6913             goto check_reverse;
6914           /* Reverse direction of operands if swapping is possible in the first
6915              place (operands need to be symmetric) and
6916              - the load form is requested, and the template is a store form,
6917              - the store form is requested, and the template is a load form,
6918              - the non-default (swapped) form is requested.  */
6919           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
6920           if (t->opcode_modifier.d && i.reg_operands == i.operands
6921               && !operand_type_all_zero (&overlap1))
6922             switch (i.dir_encoding)
6923               {
6924               case dir_encoding_load:
6925                 if (operand_type_check (operand_types[i.operands - 1], anymem)
6926                     || t->opcode_modifier.regmem)
6927                   goto check_reverse;
6928                 break;
6929
6930               case dir_encoding_store:
6931                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
6932                     && !t->opcode_modifier.regmem)
6933                   goto check_reverse;
6934                 break;
6935
6936               case dir_encoding_swap:
6937                 goto check_reverse;
6938
6939               case dir_encoding_default:
6940                 break;
6941               }
6942           /* If we want store form, we skip the current load.  */
6943           if ((i.dir_encoding == dir_encoding_store
6944                || i.dir_encoding == dir_encoding_swap)
6945               && i.mem_operands == 0
6946               && t->opcode_modifier.load)
6947             continue;
6948           /* Fall through.  */
6949         case 4:
6950         case 5:
6951           overlap1 = operand_type_and (i.types[1], operand_types[1]);
6952           if (!operand_type_match (overlap0, i.types[0])
6953               || !operand_type_match (overlap1, i.types[1])
6954               || ((check_register & 3) == 3
6955                   && !operand_type_register_match (i.types[0],
6956                                                    operand_types[0],
6957                                                    i.types[1],
6958                                                    operand_types[1])))
6959             {
6960               specific_error = progress (i.error);
6961
6962               /* Check if other direction is valid ...  */
6963               if (!t->opcode_modifier.d)
6964                 continue;
6965
6966             check_reverse:
6967               if (!(size_match & MATCH_REVERSE))
6968                 continue;
6969               /* Try reversing direction of operands.  */
6970               j = t->opcode_modifier.vexsources ? 1 : i.operands - 1;
6971               overlap0 = operand_type_and (i.types[0], operand_types[j]);
6972               overlap1 = operand_type_and (i.types[j], operand_types[0]);
6973               overlap2 = operand_type_and (i.types[1], operand_types[1]);
6974               gas_assert (t->operands != 3 || !check_register);
6975               if (!operand_type_match (overlap0, i.types[0])
6976                   || !operand_type_match (overlap1, i.types[j])
6977                   || (t->operands == 3
6978                       && !operand_type_match (overlap2, i.types[1]))
6979                   || (check_register
6980                       && !operand_type_register_match (i.types[0],
6981                                                        operand_types[j],
6982                                                        i.types[j],
6983                                                        operand_types[0])))
6984                 {
6985                   /* Does not match either direction.  */
6986                   specific_error = progress (i.error);
6987                   continue;
6988                 }
6989               /* found_reverse_match holds which variant of D
6990                  we've found.  */
6991               if (!t->opcode_modifier.d)
6992                 found_reverse_match = 0;
6993               else if (operand_types[0].bitfield.tbyte)
6994                 {
6995                   if (t->opcode_modifier.operandconstraint != UGH)
6996                     found_reverse_match = Opcode_FloatD;
6997                   /* FSUB{,R} and FDIV{,R} may need a 2nd bit flipped.  */
6998                   if ((t->base_opcode & 0x20)
6999                       && (intel_syntax || intel_mnemonic))
7000                     found_reverse_match |= Opcode_FloatR;
7001                 }
7002               else if (t->opcode_modifier.vexsources)
7003                 {
7004                   found_reverse_match = Opcode_VexW;
7005                   goto check_operands_345;
7006                 }
7007               else if (t->opcode_modifier.opcodespace != SPACE_BASE
7008                        && (t->opcode_modifier.opcodespace != SPACE_0F
7009                            /* MOV to/from CR/DR/TR, as an exception, follow
7010                               the base opcode space encoding model.  */
7011                            || (t->base_opcode | 7) != 0x27))
7012                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
7013                                       ? Opcode_ExtD : Opcode_SIMD_IntD;
7014               else if (!t->opcode_modifier.commutative)
7015                 found_reverse_match = Opcode_D;
7016               else
7017                 found_reverse_match = ~0;
7018             }
7019           else
7020             {
7021               /* Found a forward 2 operand match here.  */
7022             check_operands_345:
7023               switch (t->operands)
7024                 {
7025                 case 5:
7026                   overlap4 = operand_type_and (i.types[4], operand_types[4]);
7027                   if (!operand_type_match (overlap4, i.types[4])
7028                       || !operand_type_register_match (i.types[3],
7029                                                        operand_types[3],
7030                                                        i.types[4],
7031                                                        operand_types[4]))
7032                     {
7033                       specific_error = progress (i.error);
7034                       continue;
7035                     }
7036                   /* Fall through.  */
7037                 case 4:
7038                   overlap3 = operand_type_and (i.types[3], operand_types[3]);
7039                   if (!operand_type_match (overlap3, i.types[3])
7040                       || ((check_register & 0xa) == 0xa
7041                           && !operand_type_register_match (i.types[1],
7042                                                             operand_types[1],
7043                                                             i.types[3],
7044                                                             operand_types[3]))
7045                       || ((check_register & 0xc) == 0xc
7046                           && !operand_type_register_match (i.types[2],
7047                                                             operand_types[2],
7048                                                             i.types[3],
7049                                                             operand_types[3])))
7050                     {
7051                       specific_error = progress (i.error);
7052                       continue;
7053                     }
7054                   /* Fall through.  */
7055                 case 3:
7056                   overlap2 = operand_type_and (i.types[2], operand_types[2]);
7057                   if (!operand_type_match (overlap2, i.types[2])
7058                       || ((check_register & 5) == 5
7059                           && !operand_type_register_match (i.types[0],
7060                                                             operand_types[0],
7061                                                             i.types[2],
7062                                                             operand_types[2]))
7063                       || ((check_register & 6) == 6
7064                           && !operand_type_register_match (i.types[1],
7065                                                             operand_types[1],
7066                                                             i.types[2],
7067                                                             operand_types[2])))
7068                     {
7069                       specific_error = progress (i.error);
7070                       continue;
7071                     }
7072                   break;
7073                 }
7074             }
7075           /* Found either forward/reverse 2, 3 or 4 operand match here:
7076              slip through to break.  */
7077         }
7078
7079       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
7080       if (VEX_check_encoding (t))
7081         {
7082           specific_error = progress (i.error);
7083           continue;
7084         }
7085
7086       /* Check if vector operands are valid.  */
7087       if (check_VecOperands (t))
7088         {
7089           specific_error = progress (i.error);
7090           continue;
7091         }
7092
7093       /* We've found a match; break out of loop.  */
7094       break;
7095     }
7096
7097 #undef progress
7098
7099   if (t == current_templates->end)
7100     {
7101       /* We found no match.  */
7102       i.error = specific_error;
7103       return NULL;
7104     }
7105
7106   if (!quiet_warnings)
7107     {
7108       if (!intel_syntax
7109           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
7110         as_warn (_("indirect %s without `*'"), insn_name (t));
7111
7112       if (t->opcode_modifier.isprefix
7113           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
7114         {
7115           /* Warn them that a data or address size prefix doesn't
7116              affect assembly of the next line of code.  */
7117           as_warn (_("stand-alone `%s' prefix"), insn_name (t));
7118         }
7119     }
7120
7121   /* Copy the template we found.  */
7122   install_template (t);
7123
7124   if (addr_prefix_disp != -1)
7125     i.tm.operand_types[addr_prefix_disp]
7126       = operand_types[addr_prefix_disp];
7127
7128   switch (found_reverse_match)
7129     {
7130     case 0:
7131       break;
7132
7133     default:
7134       /* If we found a reverse match we must alter the opcode direction
7135          bit and clear/flip the regmem modifier one.  found_reverse_match
7136          holds bits to change (different for int & float insns).  */
7137
7138       i.tm.base_opcode ^= found_reverse_match;
7139
7140       /* Certain SIMD insns have their load forms specified in the opcode
7141          table, and hence we need to _set_ RegMem instead of clearing it.
7142          We need to avoid setting the bit though on insns like KMOVW.  */
7143       i.tm.opcode_modifier.regmem
7144         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
7145           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
7146           && !i.tm.opcode_modifier.regmem;
7147
7148       /* Fall through.  */
7149     case ~0:
7150       i.tm.operand_types[0] = operand_types[i.operands - 1];
7151       i.tm.operand_types[i.operands - 1] = operand_types[0];
7152       break;
7153
7154     case Opcode_VexW:
7155       /* Only the first two register operands need reversing, alongside
7156          flipping VEX.W.  */
7157       i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;
7158
7159       j = i.tm.operand_types[0].bitfield.imm8;
7160       i.tm.operand_types[j] = operand_types[j + 1];
7161       i.tm.operand_types[j + 1] = operand_types[j];
7162       break;
7163     }
7164
7165   return t;
7166 }
7167
7168 static int
7169 check_string (void)
7170 {
7171   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
7172   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
7173
7174   if (i.seg[op] != NULL && i.seg[op] != reg_es)
7175     {
7176       as_bad (_("`%s' operand %u must use `%ses' segment"),
7177               insn_name (&i.tm),
7178               intel_syntax ? i.tm.operands - es_op : es_op + 1,
7179               register_prefix);
7180       return 0;
7181     }
7182
7183   /* There's only ever one segment override allowed per instruction.
7184      This instruction possibly has a legal segment override on the
7185      second operand, so copy the segment to where non-string
7186      instructions store it, allowing common code.  */
7187   i.seg[op] = i.seg[1];
7188
7189   return 1;
7190 }
7191
7192 static int
7193 process_suffix (void)
7194 {
7195   bool is_crc32 = false, is_movx = false;
7196
7197   /* If matched instruction specifies an explicit instruction mnemonic
7198      suffix, use it.  */
7199   if (i.tm.opcode_modifier.size == SIZE16)
7200     i.suffix = WORD_MNEM_SUFFIX;
7201   else if (i.tm.opcode_modifier.size == SIZE32)
7202     i.suffix = LONG_MNEM_SUFFIX;
7203   else if (i.tm.opcode_modifier.size == SIZE64)
7204     i.suffix = QWORD_MNEM_SUFFIX;
7205   else if (i.reg_operands
7206            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
7207            && i.tm.opcode_modifier.operandconstraint != ADDR_PREFIX_OP_REG)
7208     {
7209       unsigned int numop = i.operands;
7210
7211       /* MOVSX/MOVZX */
7212       is_movx = (i.tm.opcode_modifier.opcodespace == SPACE_0F
7213                  && (i.tm.base_opcode | 8) == 0xbe)
7214                 || (i.tm.opcode_modifier.opcodespace == SPACE_BASE
7215                     && i.tm.base_opcode == 0x63
7216                     && i.tm.cpu_flags.bitfield.cpu64);
7217
7218       /* CRC32 */
7219       is_crc32 = (i.tm.base_opcode == 0xf0
7220                   && i.tm.opcode_modifier.opcodespace == SPACE_0F38
7221                   && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2);
7222
7223       /* movsx/movzx want only their source operand considered here, for the
7224          ambiguity checking below.  The suffix will be replaced afterwards
7225          to represent the destination (register).  */
7226       if (is_movx && (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63))
7227         --i.operands;
7228
7229       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
7230       if (is_crc32 && i.tm.operand_types[1].bitfield.qword)
7231         i.rex |= REX_W;
7232
7233       /* If there's no instruction mnemonic suffix we try to invent one
7234          based on GPR operands.  */
7235       if (!i.suffix)
7236         {
7237           /* We take i.suffix from the last register operand specified,
7238              Destination register type is more significant than source
7239              register type.  crc32 in SSE4.2 prefers source register
7240              type. */
7241           unsigned int op = is_crc32 ? 1 : i.operands;
7242
7243           while (op--)
7244             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
7245                 || i.tm.operand_types[op].bitfield.instance == Accum)
7246               {
7247                 if (i.types[op].bitfield.class != Reg)
7248                   continue;
7249                 if (i.types[op].bitfield.byte)
7250                   i.suffix = BYTE_MNEM_SUFFIX;
7251                 else if (i.types[op].bitfield.word)
7252                   i.suffix = WORD_MNEM_SUFFIX;
7253                 else if (i.types[op].bitfield.dword)
7254                   i.suffix = LONG_MNEM_SUFFIX;
7255                 else if (i.types[op].bitfield.qword)
7256                   i.suffix = QWORD_MNEM_SUFFIX;
7257                 else
7258                   continue;
7259                 break;
7260               }
7261
7262           /* As an exception, movsx/movzx silently default to a byte source
7263              in AT&T mode.  */
7264           if (is_movx && i.tm.opcode_modifier.w && !i.suffix && !intel_syntax)
7265             i.suffix = BYTE_MNEM_SUFFIX;
7266         }
7267       else if (i.suffix == BYTE_MNEM_SUFFIX)
7268         {
7269           if (!check_byte_reg ())
7270             return 0;
7271         }
7272       else if (i.suffix == LONG_MNEM_SUFFIX)
7273         {
7274           if (!check_long_reg ())
7275             return 0;
7276         }
7277       else if (i.suffix == QWORD_MNEM_SUFFIX)
7278         {
7279           if (!check_qword_reg ())
7280             return 0;
7281         }
7282       else if (i.suffix == WORD_MNEM_SUFFIX)
7283         {
7284           if (!check_word_reg ())
7285             return 0;
7286         }
7287       else if (intel_syntax
7288                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
7289         /* Do nothing if the instruction is going to ignore the prefix.  */
7290         ;
7291       else
7292         abort ();
7293
7294       /* Undo the movsx/movzx change done above.  */
7295       i.operands = numop;
7296     }
7297   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
7298            && !i.suffix)
7299     {
7300       i.suffix = stackop_size;
7301       if (stackop_size == LONG_MNEM_SUFFIX)
7302         {
7303           /* stackop_size is set to LONG_MNEM_SUFFIX for the
7304              .code16gcc directive to support 16-bit mode with
7305              32-bit address.  For IRET without a suffix, generate
7306              16-bit IRET (opcode 0xcf) to return from an interrupt
7307              handler.  */
7308           if (i.tm.base_opcode == 0xcf)
7309             {
7310               i.suffix = WORD_MNEM_SUFFIX;
7311               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
7312             }
7313           /* Warn about changed behavior for segment register push/pop.  */
7314           else if ((i.tm.base_opcode | 1) == 0x07)
7315             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
7316                      insn_name (&i.tm));
7317         }
7318     }
7319   else if (!i.suffix
7320            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
7321                || i.tm.opcode_modifier.jump == JUMP_BYTE
7322                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
7323                || (i.tm.opcode_modifier.opcodespace == SPACE_0F
7324                    && i.tm.base_opcode == 0x01 /* [ls][gi]dt */
7325                    && i.tm.extension_opcode <= 3)))
7326     {
7327       switch (flag_code)
7328         {
7329         case CODE_64BIT:
7330           if (!i.tm.opcode_modifier.no_qsuf)
7331             {
7332               if (i.tm.opcode_modifier.jump == JUMP_BYTE
7333                   || i.tm.opcode_modifier.no_lsuf)
7334                 i.suffix = QWORD_MNEM_SUFFIX;
7335               break;
7336             }
7337           /* Fall through.  */
7338         case CODE_32BIT:
7339           if (!i.tm.opcode_modifier.no_lsuf)
7340             i.suffix = LONG_MNEM_SUFFIX;
7341           break;
7342         case CODE_16BIT:
7343           if (!i.tm.opcode_modifier.no_wsuf)
7344             i.suffix = WORD_MNEM_SUFFIX;
7345           break;
7346         }
7347     }
7348
7349   if (!i.suffix
7350       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7351           /* Also cover lret/retf/iret in 64-bit mode.  */
7352           || (flag_code == CODE_64BIT
7353               && !i.tm.opcode_modifier.no_lsuf
7354               && !i.tm.opcode_modifier.no_qsuf))
7355       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7356       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
7357       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
7358       /* Accept FLDENV et al without suffix.  */
7359       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
7360     {
7361       unsigned int suffixes, evex = 0;
7362
7363       suffixes = !i.tm.opcode_modifier.no_bsuf;
7364       if (!i.tm.opcode_modifier.no_wsuf)
7365         suffixes |= 1 << 1;
7366       if (!i.tm.opcode_modifier.no_lsuf)
7367         suffixes |= 1 << 2;
7368       if (!i.tm.opcode_modifier.no_ssuf)
7369         suffixes |= 1 << 4;
7370       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
7371         suffixes |= 1 << 5;
7372
7373       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
7374          also suitable for AT&T syntax mode, it was requested that this be
7375          restricted to just Intel syntax.  */
7376       if (intel_syntax && is_any_vex_encoding (&i.tm)
7377           && !i.broadcast.type && !i.broadcast.bytes)
7378         {
7379           unsigned int op;
7380
7381           for (op = 0; op < i.tm.operands; ++op)
7382             {
7383               if (is_evex_encoding (&i.tm)
7384                   && !cpu_arch_flags.bitfield.cpuavx512vl)
7385                 {
7386                   if (i.tm.operand_types[op].bitfield.ymmword)
7387                     i.tm.operand_types[op].bitfield.xmmword = 0;
7388                   if (i.tm.operand_types[op].bitfield.zmmword)
7389                     i.tm.operand_types[op].bitfield.ymmword = 0;
7390                   if (!i.tm.opcode_modifier.evex
7391                       || i.tm.opcode_modifier.evex == EVEXDYN)
7392                     i.tm.opcode_modifier.evex = EVEX512;
7393                 }
7394
7395               if (i.tm.operand_types[op].bitfield.xmmword
7396                   + i.tm.operand_types[op].bitfield.ymmword
7397                   + i.tm.operand_types[op].bitfield.zmmword < 2)
7398                 continue;
7399
7400               /* Any properly sized operand disambiguates the insn.  */
7401               if (i.types[op].bitfield.xmmword
7402                   || i.types[op].bitfield.ymmword
7403                   || i.types[op].bitfield.zmmword)
7404                 {
7405                   suffixes &= ~(7 << 6);
7406                   evex = 0;
7407                   break;
7408                 }
7409
7410               if ((i.flags[op] & Operand_Mem)
7411                   && i.tm.operand_types[op].bitfield.unspecified)
7412                 {
7413                   if (i.tm.operand_types[op].bitfield.xmmword)
7414                     suffixes |= 1 << 6;
7415                   if (i.tm.operand_types[op].bitfield.ymmword)
7416                     suffixes |= 1 << 7;
7417                   if (i.tm.operand_types[op].bitfield.zmmword)
7418                     suffixes |= 1 << 8;
7419                   if (is_evex_encoding (&i.tm))
7420                     evex = EVEX512;
7421                 }
7422             }
7423         }
7424
7425       /* Are multiple suffixes / operand sizes allowed?  */
7426       if (suffixes & (suffixes - 1))
7427         {
7428           if (intel_syntax
7429               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7430                   || operand_check == check_error))
7431             {
7432               as_bad (_("ambiguous operand size for `%s'"), insn_name (&i.tm));
7433               return 0;
7434             }
7435           if (operand_check == check_error)
7436             {
7437               as_bad (_("no instruction mnemonic suffix given and "
7438                         "no register operands; can't size `%s'"), insn_name (&i.tm));
7439               return 0;
7440             }
7441           if (operand_check == check_warning)
7442             as_warn (_("%s; using default for `%s'"),
7443                        intel_syntax
7444                        ? _("ambiguous operand size")
7445                        : _("no instruction mnemonic suffix given and "
7446                            "no register operands"),
7447                        insn_name (&i.tm));
7448
7449           if (i.tm.opcode_modifier.floatmf)
7450             i.suffix = SHORT_MNEM_SUFFIX;
7451           else if (is_movx)
7452             /* handled below */;
7453           else if (evex)
7454             i.tm.opcode_modifier.evex = evex;
7455           else if (flag_code == CODE_16BIT)
7456             i.suffix = WORD_MNEM_SUFFIX;
7457           else if (!i.tm.opcode_modifier.no_lsuf)
7458             i.suffix = LONG_MNEM_SUFFIX;
7459           else
7460             i.suffix = QWORD_MNEM_SUFFIX;
7461         }
7462     }
7463
7464   if (is_movx)
7465     {
7466       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
7467          In AT&T syntax, if there is no suffix (warned about above), the default
7468          will be byte extension.  */
7469       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
7470         i.tm.base_opcode |= 1;
7471
7472       /* For further processing, the suffix should represent the destination
7473          (register).  This is already the case when one was used with
7474          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
7475          no suffix to begin with.  */
7476       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
7477         {
7478           if (i.types[1].bitfield.word)
7479             i.suffix = WORD_MNEM_SUFFIX;
7480           else if (i.types[1].bitfield.qword)
7481             i.suffix = QWORD_MNEM_SUFFIX;
7482           else
7483             i.suffix = LONG_MNEM_SUFFIX;
7484
7485           i.tm.opcode_modifier.w = 0;
7486         }
7487     }
7488
7489   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
7490     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
7491                    != (i.tm.operand_types[1].bitfield.class == Reg);
7492
7493   /* Change the opcode based on the operand size given by i.suffix.  */
7494   switch (i.suffix)
7495     {
7496     /* Size floating point instruction.  */
7497     case LONG_MNEM_SUFFIX:
7498       if (i.tm.opcode_modifier.floatmf)
7499         {
7500           i.tm.base_opcode ^= 4;
7501           break;
7502         }
7503     /* fall through */
7504     case WORD_MNEM_SUFFIX:
7505     case QWORD_MNEM_SUFFIX:
7506       /* It's not a byte, select word/dword operation.  */
7507       if (i.tm.opcode_modifier.w)
7508         {
7509           if (i.short_form)
7510             i.tm.base_opcode |= 8;
7511           else
7512             i.tm.base_opcode |= 1;
7513         }
7514     /* fall through */
7515     case SHORT_MNEM_SUFFIX:
7516       /* Now select between word & dword operations via the operand
7517          size prefix, except for instructions that will ignore this
7518          prefix anyway.  */
7519       if (i.suffix != QWORD_MNEM_SUFFIX
7520           && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7521           && !i.tm.opcode_modifier.floatmf
7522           && !is_any_vex_encoding (&i.tm)
7523           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
7524               || (flag_code == CODE_64BIT
7525                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
7526         {
7527           unsigned int prefix = DATA_PREFIX_OPCODE;
7528
7529           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
7530             prefix = ADDR_PREFIX_OPCODE;
7531
7532           if (!add_prefix (prefix))
7533             return 0;
7534         }
7535
7536       /* Set mode64 for an operand.  */
7537       if (i.suffix == QWORD_MNEM_SUFFIX
7538           && flag_code == CODE_64BIT
7539           && !i.tm.opcode_modifier.norex64
7540           && !i.tm.opcode_modifier.vexw
7541           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
7542              need rex64. */
7543           && ! (i.operands == 2
7544                 && i.tm.base_opcode == 0x90
7545                 && i.tm.opcode_modifier.opcodespace == SPACE_BASE
7546                 && i.types[0].bitfield.instance == Accum
7547                 && i.types[0].bitfield.qword
7548                 && i.types[1].bitfield.instance == Accum))
7549         i.rex |= REX_W;
7550
7551       break;
7552
7553     case 0:
7554       /* Select word/dword/qword operation with explicit data sizing prefix
7555          when there are no suitable register operands.  */
7556       if (i.tm.opcode_modifier.w
7557           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
7558           && (!i.reg_operands
7559               || (i.reg_operands == 1
7560                       /* ShiftCount */
7561                   && (i.tm.operand_types[0].bitfield.instance == RegC
7562                       /* InOutPortReg */
7563                       || i.tm.operand_types[0].bitfield.instance == RegD
7564                       || i.tm.operand_types[1].bitfield.instance == RegD
7565                       /* CRC32 */
7566                       || is_crc32))))
7567         i.tm.base_opcode |= 1;
7568       break;
7569     }
7570
7571   if (i.tm.opcode_modifier.operandconstraint == ADDR_PREFIX_OP_REG)
7572     {
7573       gas_assert (!i.suffix);
7574       gas_assert (i.reg_operands);
7575
7576       if (i.tm.operand_types[0].bitfield.instance == Accum
7577           || i.operands == 1)
7578         {
7579           /* The address size override prefix changes the size of the
7580              first operand.  */
7581           if (flag_code == CODE_64BIT
7582               && i.op[0].regs->reg_type.bitfield.word)
7583             {
7584               as_bad (_("16-bit addressing unavailable for `%s'"),
7585                       insn_name (&i.tm));
7586               return 0;
7587             }
7588
7589           if ((flag_code == CODE_32BIT
7590                ? i.op[0].regs->reg_type.bitfield.word
7591                : i.op[0].regs->reg_type.bitfield.dword)
7592               && !add_prefix (ADDR_PREFIX_OPCODE))
7593             return 0;
7594         }
7595       else
7596         {
7597           /* Check invalid register operand when the address size override
7598              prefix changes the size of register operands.  */
7599           unsigned int op;
7600           enum { need_word, need_dword, need_qword } need;
7601
7602           /* Check the register operand for the address size prefix if
7603              the memory operand has no real registers, like symbol, DISP
7604              or bogus (x32-only) symbol(%rip) when symbol(%eip) is meant.  */
7605           if (i.mem_operands == 1
7606               && i.reg_operands == 1
7607               && i.operands == 2
7608               && i.types[1].bitfield.class == Reg
7609               && (flag_code == CODE_32BIT
7610                   ? i.op[1].regs->reg_type.bitfield.word
7611                   : i.op[1].regs->reg_type.bitfield.dword)
7612               && ((i.base_reg == NULL && i.index_reg == NULL)
7613 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
7614                   || (x86_elf_abi == X86_64_X32_ABI
7615                       && i.base_reg
7616                       && i.base_reg->reg_num == RegIP
7617                       && i.base_reg->reg_type.bitfield.qword))
7618 #else
7619                   || 0)
7620 #endif
7621               && !add_prefix (ADDR_PREFIX_OPCODE))
7622             return 0;
7623
7624           if (flag_code == CODE_32BIT)
7625             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
7626           else if (i.prefix[ADDR_PREFIX])
7627             need = need_dword;
7628           else
7629             need = flag_code == CODE_64BIT ? need_qword : need_word;
7630
7631           for (op = 0; op < i.operands; op++)
7632             {
7633               if (i.types[op].bitfield.class != Reg)
7634                 continue;
7635
7636               switch (need)
7637                 {
7638                 case need_word:
7639                   if (i.op[op].regs->reg_type.bitfield.word)
7640                     continue;
7641                   break;
7642                 case need_dword:
7643                   if (i.op[op].regs->reg_type.bitfield.dword)
7644                     continue;
7645                   break;
7646                 case need_qword:
7647                   if (i.op[op].regs->reg_type.bitfield.qword)
7648                     continue;
7649                   break;
7650                 }
7651
7652               as_bad (_("invalid register operand size for `%s'"),
7653                       insn_name (&i.tm));
7654               return 0;
7655             }
7656         }
7657     }
7658
7659   return 1;
7660 }
7661
7662 static int
7663 check_byte_reg (void)
7664 {
7665   int op;
7666
7667   for (op = i.operands; --op >= 0;)
7668     {
7669       /* Skip non-register operands. */
7670       if (i.types[op].bitfield.class != Reg)
7671         continue;
7672
7673       /* If this is an eight bit register, it's OK.  If it's the 16 or
7674          32 bit version of an eight bit register, we will just use the
7675          low portion, and that's OK too.  */
7676       if (i.types[op].bitfield.byte)
7677         continue;
7678
7679       /* I/O port address operands are OK too.  */
7680       if (i.tm.operand_types[op].bitfield.instance == RegD
7681           && i.tm.operand_types[op].bitfield.word)
7682         continue;
7683
7684       /* crc32 only wants its source operand checked here.  */
7685       if (i.tm.base_opcode == 0xf0
7686           && i.tm.opcode_modifier.opcodespace == SPACE_0F38
7687           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2
7688           && op != 0)
7689         continue;
7690
7691       /* Any other register is bad.  */
7692       as_bad (_("`%s%s' not allowed with `%s%c'"),
7693               register_prefix, i.op[op].regs->reg_name,
7694               insn_name (&i.tm), i.suffix);
7695       return 0;
7696     }
7697   return 1;
7698 }
7699
7700 static int
7701 check_long_reg (void)
7702 {
7703   int op;
7704
7705   for (op = i.operands; --op >= 0;)
7706     /* Skip non-register operands. */
7707     if (i.types[op].bitfield.class != Reg)
7708       continue;
7709     /* Reject eight bit registers, except where the template requires
7710        them. (eg. movzb)  */
7711     else if (i.types[op].bitfield.byte
7712              && (i.tm.operand_types[op].bitfield.class == Reg
7713                  || i.tm.operand_types[op].bitfield.instance == Accum)
7714              && (i.tm.operand_types[op].bitfield.word
7715                  || i.tm.operand_types[op].bitfield.dword))
7716       {
7717         as_bad (_("`%s%s' not allowed with `%s%c'"),
7718                 register_prefix,
7719                 i.op[op].regs->reg_name,
7720                 insn_name (&i.tm),
7721                 i.suffix);
7722         return 0;
7723       }
7724     /* Error if the e prefix on a general reg is missing.  */
7725     else if (i.types[op].bitfield.word
7726              && (i.tm.operand_types[op].bitfield.class == Reg
7727                  || i.tm.operand_types[op].bitfield.instance == Accum)
7728              && i.tm.operand_types[op].bitfield.dword)
7729       {
7730         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7731                 register_prefix, i.op[op].regs->reg_name,
7732                 i.suffix);
7733         return 0;
7734       }
7735     /* Warn if the r prefix on a general reg is present.  */
7736     else if (i.types[op].bitfield.qword
7737              && (i.tm.operand_types[op].bitfield.class == Reg
7738                  || i.tm.operand_types[op].bitfield.instance == Accum)
7739              && i.tm.operand_types[op].bitfield.dword)
7740       {
7741         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7742                 register_prefix, i.op[op].regs->reg_name, i.suffix);
7743         return 0;
7744       }
7745   return 1;
7746 }
7747
7748 static int
7749 check_qword_reg (void)
7750 {
7751   int op;
7752
7753   for (op = i.operands; --op >= 0; )
7754     /* Skip non-register operands. */
7755     if (i.types[op].bitfield.class != Reg)
7756       continue;
7757     /* Reject eight bit registers, except where the template requires
7758        them. (eg. movzb)  */
7759     else if (i.types[op].bitfield.byte
7760              && (i.tm.operand_types[op].bitfield.class == Reg
7761                  || i.tm.operand_types[op].bitfield.instance == Accum)
7762              && (i.tm.operand_types[op].bitfield.word
7763                  || i.tm.operand_types[op].bitfield.dword))
7764       {
7765         as_bad (_("`%s%s' not allowed with `%s%c'"),
7766                 register_prefix,
7767                 i.op[op].regs->reg_name,
7768                 insn_name (&i.tm),
7769                 i.suffix);
7770         return 0;
7771       }
7772     /* Warn if the r prefix on a general reg is missing.  */
7773     else if ((i.types[op].bitfield.word
7774               || i.types[op].bitfield.dword)
7775              && (i.tm.operand_types[op].bitfield.class == Reg
7776                  || i.tm.operand_types[op].bitfield.instance == Accum)
7777              && i.tm.operand_types[op].bitfield.qword)
7778       {
7779         /* Prohibit these changes in the 64bit mode, since the
7780            lowering is more complicated.  */
7781         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7782                 register_prefix, i.op[op].regs->reg_name, i.suffix);
7783         return 0;
7784       }
7785   return 1;
7786 }
7787
7788 static int
7789 check_word_reg (void)
7790 {
7791   int op;
7792   for (op = i.operands; --op >= 0;)
7793     /* Skip non-register operands. */
7794     if (i.types[op].bitfield.class != Reg)
7795       continue;
7796     /* Reject eight bit registers, except where the template requires
7797        them. (eg. movzb)  */
7798     else if (i.types[op].bitfield.byte
7799              && (i.tm.operand_types[op].bitfield.class == Reg
7800                  || i.tm.operand_types[op].bitfield.instance == Accum)
7801              && (i.tm.operand_types[op].bitfield.word
7802                  || i.tm.operand_types[op].bitfield.dword))
7803       {
7804         as_bad (_("`%s%s' not allowed with `%s%c'"),
7805                 register_prefix,
7806                 i.op[op].regs->reg_name,
7807                 insn_name (&i.tm),
7808                 i.suffix);
7809         return 0;
7810       }
7811     /* Error if the e or r prefix on a general reg is present.  */
7812     else if ((i.types[op].bitfield.dword
7813                  || i.types[op].bitfield.qword)
7814              && (i.tm.operand_types[op].bitfield.class == Reg
7815                  || i.tm.operand_types[op].bitfield.instance == Accum)
7816              && i.tm.operand_types[op].bitfield.word)
7817       {
7818         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7819                 register_prefix, i.op[op].regs->reg_name,
7820                 i.suffix);
7821         return 0;
7822       }
7823   return 1;
7824 }
7825
7826 static int
7827 update_imm (unsigned int j)
7828 {
7829   i386_operand_type overlap = i.types[j];
7830   if (overlap.bitfield.imm8
7831       + overlap.bitfield.imm8s
7832       + overlap.bitfield.imm16
7833       + overlap.bitfield.imm32
7834       + overlap.bitfield.imm32s
7835       + overlap.bitfield.imm64 > 1)
7836     {
7837       static const i386_operand_type imm16 = { .bitfield = { .imm16 = 1 } };
7838       static const i386_operand_type imm32 = { .bitfield = { .imm32 = 1 } };
7839       static const i386_operand_type imm32s = { .bitfield = { .imm32s = 1 } };
7840       static const i386_operand_type imm16_32 = { .bitfield =
7841         { .imm16 = 1, .imm32 = 1 }
7842       };
7843       static const i386_operand_type imm16_32s =  { .bitfield =
7844         { .imm16 = 1, .imm32s = 1 }
7845       };
7846       static const i386_operand_type imm16_32_32s = { .bitfield =
7847         { .imm16 = 1, .imm32 = 1, .imm32s = 1 }
7848       };
7849
7850       if (i.suffix)
7851         {
7852           i386_operand_type temp;
7853
7854           operand_type_set (&temp, 0);
7855           if (i.suffix == BYTE_MNEM_SUFFIX)
7856             {
7857               temp.bitfield.imm8 = overlap.bitfield.imm8;
7858               temp.bitfield.imm8s = overlap.bitfield.imm8s;
7859             }
7860           else if (i.suffix == WORD_MNEM_SUFFIX)
7861             temp.bitfield.imm16 = overlap.bitfield.imm16;
7862           else if (i.suffix == QWORD_MNEM_SUFFIX)
7863             {
7864               temp.bitfield.imm64 = overlap.bitfield.imm64;
7865               temp.bitfield.imm32s = overlap.bitfield.imm32s;
7866             }
7867           else
7868             temp.bitfield.imm32 = overlap.bitfield.imm32;
7869           overlap = temp;
7870         }
7871       else if (operand_type_equal (&overlap, &imm16_32_32s)
7872                || operand_type_equal (&overlap, &imm16_32)
7873                || operand_type_equal (&overlap, &imm16_32s))
7874         {
7875           if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
7876             overlap = imm16;
7877           else
7878             overlap = imm32s;
7879         }
7880       else if (i.prefix[REX_PREFIX] & REX_W)
7881         overlap = operand_type_and (overlap, imm32s);
7882       else if (i.prefix[DATA_PREFIX])
7883         overlap = operand_type_and (overlap,
7884                                     flag_code != CODE_16BIT ? imm16 : imm32);
7885       if (overlap.bitfield.imm8
7886           + overlap.bitfield.imm8s
7887           + overlap.bitfield.imm16
7888           + overlap.bitfield.imm32
7889           + overlap.bitfield.imm32s
7890           + overlap.bitfield.imm64 != 1)
7891         {
7892           as_bad (_("no instruction mnemonic suffix given; "
7893                     "can't determine immediate size"));
7894           return 0;
7895         }
7896     }
7897   i.types[j] = overlap;
7898
7899   return 1;
7900 }
7901
7902 static int
7903 finalize_imm (void)
7904 {
7905   unsigned int j, n;
7906
7907   /* Update the first 2 immediate operands.  */
7908   n = i.operands > 2 ? 2 : i.operands;
7909   if (n)
7910     {
7911       for (j = 0; j < n; j++)
7912         if (update_imm (j) == 0)
7913           return 0;
7914
7915       /* The 3rd operand can't be immediate operand.  */
7916       gas_assert (operand_type_check (i.types[2], imm) == 0);
7917     }
7918
7919   return 1;
7920 }
7921
7922 static int
7923 process_operands (void)
7924 {
7925   /* Default segment register this instruction will use for memory
7926      accesses.  0 means unknown.  This is only for optimizing out
7927      unnecessary segment overrides.  */
7928   const reg_entry *default_seg = NULL;
7929
7930   if (i.tm.opcode_modifier.sse2avx)
7931     {
7932       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
7933          need converting.  */
7934       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
7935       i.prefix[REX_PREFIX] = 0;
7936       i.rex_encoding = 0;
7937     }
7938   /* ImmExt should be processed after SSE2AVX.  */
7939   else if (i.tm.opcode_modifier.immext)
7940     process_immext ();
7941
7942   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
7943     {
7944       static const i386_operand_type regxmm = {
7945         .bitfield = { .class = RegSIMD, .xmmword = 1 }
7946       };
7947       unsigned int dupl = i.operands;
7948       unsigned int dest = dupl - 1;
7949       unsigned int j;
7950
7951       /* The destination must be an xmm register.  */
7952       gas_assert (i.reg_operands
7953                   && MAX_OPERANDS > dupl
7954                   && operand_type_equal (&i.types[dest], &regxmm));
7955
7956       if (i.tm.operand_types[0].bitfield.instance == Accum
7957           && i.tm.operand_types[0].bitfield.xmmword)
7958         {
7959           if (i.tm.opcode_modifier.vexsources == VEX3SOURCES)
7960             {
7961               /* Keep xmm0 for instructions with VEX prefix and 3
7962                  sources.  */
7963               i.tm.operand_types[0].bitfield.instance = InstanceNone;
7964               i.tm.operand_types[0].bitfield.class = RegSIMD;
7965               goto duplicate;
7966             }
7967           else
7968             {
7969               /* We remove the first xmm0 and keep the number of
7970                  operands unchanged, which in fact duplicates the
7971                  destination.  */
7972               for (j = 1; j < i.operands; j++)
7973                 {
7974                   i.op[j - 1] = i.op[j];
7975                   i.types[j - 1] = i.types[j];
7976                   i.tm.operand_types[j - 1] = i.tm.operand_types[j];
7977                   i.flags[j - 1] = i.flags[j];
7978                 }
7979             }
7980         }
7981       else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_1ST_XMM0)
7982         {
7983           gas_assert ((MAX_OPERANDS - 1) > dupl
7984                       && (i.tm.opcode_modifier.vexsources
7985                           == VEX3SOURCES));
7986
7987           /* Add the implicit xmm0 for instructions with VEX prefix
7988              and 3 sources.  */
7989           for (j = i.operands; j > 0; j--)
7990             {
7991               i.op[j] = i.op[j - 1];
7992               i.types[j] = i.types[j - 1];
7993               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
7994               i.flags[j] = i.flags[j - 1];
7995             }
7996           i.op[0].regs
7997             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
7998           i.types[0] = regxmm;
7999           i.tm.operand_types[0] = regxmm;
8000
8001           i.operands += 2;
8002           i.reg_operands += 2;
8003           i.tm.operands += 2;
8004
8005           dupl++;
8006           dest++;
8007           i.op[dupl] = i.op[dest];
8008           i.types[dupl] = i.types[dest];
8009           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
8010           i.flags[dupl] = i.flags[dest];
8011         }
8012       else
8013         {
8014         duplicate:
8015           i.operands++;
8016           i.reg_operands++;
8017           i.tm.operands++;
8018
8019           i.op[dupl] = i.op[dest];
8020           i.types[dupl] = i.types[dest];
8021           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
8022           i.flags[dupl] = i.flags[dest];
8023         }
8024
8025        if (i.tm.opcode_modifier.immext)
8026          process_immext ();
8027     }
8028   else if (i.tm.operand_types[0].bitfield.instance == Accum
8029            && i.tm.operand_types[0].bitfield.xmmword)
8030     {
8031       unsigned int j;
8032
8033       for (j = 1; j < i.operands; j++)
8034         {
8035           i.op[j - 1] = i.op[j];
8036           i.types[j - 1] = i.types[j];
8037
8038           /* We need to adjust fields in i.tm since they are used by
8039              build_modrm_byte.  */
8040           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
8041
8042           i.flags[j - 1] = i.flags[j];
8043         }
8044
8045       i.operands--;
8046       i.reg_operands--;
8047       i.tm.operands--;
8048     }
8049   else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_QUAD_GROUP)
8050     {
8051       unsigned int regnum, first_reg_in_group, last_reg_in_group;
8052
8053       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
8054       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
8055       regnum = register_number (i.op[1].regs);
8056       first_reg_in_group = regnum & ~3;
8057       last_reg_in_group = first_reg_in_group + 3;
8058       if (regnum != first_reg_in_group)
8059         as_warn (_("source register `%s%s' implicitly denotes"
8060                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
8061                  register_prefix, i.op[1].regs->reg_name,
8062                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
8063                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
8064                  insn_name (&i.tm));
8065     }
8066   else if (i.tm.opcode_modifier.operandconstraint == REG_KLUDGE)
8067     {
8068       /* The imul $imm, %reg instruction is converted into
8069          imul $imm, %reg, %reg, and the clr %reg instruction
8070          is converted into xor %reg, %reg.  */
8071
8072       unsigned int first_reg_op;
8073
8074       if (operand_type_check (i.types[0], reg))
8075         first_reg_op = 0;
8076       else
8077         first_reg_op = 1;
8078       /* Pretend we saw the extra register operand.  */
8079       gas_assert (i.reg_operands == 1
8080                   && i.op[first_reg_op + 1].regs == 0);
8081       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
8082       i.types[first_reg_op + 1] = i.types[first_reg_op];
8083       i.operands++;
8084       i.reg_operands++;
8085     }
8086
8087   if (i.tm.opcode_modifier.modrm)
8088     {
8089       /* The opcode is completed (modulo i.tm.extension_opcode which
8090          must be put into the modrm byte).  Now, we make the modrm and
8091          index base bytes based on all the info we've collected.  */
8092
8093       default_seg = build_modrm_byte ();
8094     }
8095   else if (i.types[0].bitfield.class == SReg)
8096     {
8097       if (flag_code != CODE_64BIT
8098           ? i.tm.base_opcode == POP_SEG_SHORT
8099             && i.op[0].regs->reg_num == 1
8100           : (i.tm.base_opcode | 1) == (POP_SEG386_SHORT & 0xff)
8101             && i.op[0].regs->reg_num < 4)
8102         {
8103           as_bad (_("you can't `%s %s%s'"),
8104                   insn_name (&i.tm), register_prefix, i.op[0].regs->reg_name);
8105           return 0;
8106         }
8107       if (i.op[0].regs->reg_num > 3
8108           && i.tm.opcode_modifier.opcodespace == SPACE_BASE )
8109         {
8110           i.tm.base_opcode ^= (POP_SEG_SHORT ^ POP_SEG386_SHORT) & 0xff;
8111           i.tm.opcode_modifier.opcodespace = SPACE_0F;
8112         }
8113       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
8114     }
8115   else if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
8116            && (i.tm.base_opcode & ~3) == MOV_AX_DISP32)
8117     {
8118       default_seg = reg_ds;
8119     }
8120   else if (i.tm.opcode_modifier.isstring)
8121     {
8122       /* For the string instructions that allow a segment override
8123          on one of their operands, the default segment is ds.  */
8124       default_seg = reg_ds;
8125     }
8126   else if (i.short_form)
8127     {
8128       /* The register or float register operand is in operand
8129          0 or 1.  */
8130       const reg_entry *r = i.op[0].regs;
8131
8132       if (i.imm_operands
8133           || (r->reg_type.bitfield.instance == Accum && i.op[1].regs))
8134         r = i.op[1].regs;
8135       /* Register goes in low 3 bits of opcode.  */
8136       i.tm.base_opcode |= r->reg_num;
8137       if ((r->reg_flags & RegRex) != 0)
8138         i.rex |= REX_B;
8139       if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
8140         {
8141           /* Warn about some common errors, but press on regardless.  */
8142           if (i.operands != 2)
8143             {
8144               /* Extraneous `l' suffix on fp insn.  */
8145               as_warn (_("translating to `%s %s%s'"), insn_name (&i.tm),
8146                        register_prefix, i.op[0].regs->reg_name);
8147             }
8148           else if (i.op[0].regs->reg_type.bitfield.instance != Accum)
8149             {
8150               /* Reversed arguments on faddp or fmulp.  */
8151               as_warn (_("translating to `%s %s%s,%s%s'"), insn_name (&i.tm),
8152                        register_prefix, i.op[!intel_syntax].regs->reg_name,
8153                        register_prefix, i.op[intel_syntax].regs->reg_name);
8154             }
8155         }
8156     }
8157
8158   if ((i.seg[0] || i.prefix[SEG_PREFIX])
8159       && i.tm.base_opcode == 0x8d /* lea */
8160       && i.tm.opcode_modifier.opcodespace == SPACE_BASE
8161       && !is_any_vex_encoding(&i.tm))
8162     {
8163       if (!quiet_warnings)
8164         as_warn (_("segment override on `%s' is ineffectual"), insn_name (&i.tm));
8165       if (optimize)
8166         {
8167           i.seg[0] = NULL;
8168           i.prefix[SEG_PREFIX] = 0;
8169         }
8170     }
8171
8172   /* If a segment was explicitly specified, and the specified segment
8173      is neither the default nor the one already recorded from a prefix,
8174      use an opcode prefix to select it.  If we never figured out what
8175      the default segment is, then default_seg will be zero at this
8176      point, and the specified segment prefix will always be used.  */
8177   if (i.seg[0]
8178       && i.seg[0] != default_seg
8179       && i386_seg_prefixes[i.seg[0]->reg_num] != i.prefix[SEG_PREFIX])
8180     {
8181       if (!add_prefix (i386_seg_prefixes[i.seg[0]->reg_num]))
8182         return 0;
8183     }
8184   return 1;
8185 }
8186
8187 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
8188                                  bool do_sse2avx)
8189 {
8190   if (r->reg_flags & RegRex)
8191     {
8192       if (i.rex & rex_bit)
8193         as_bad (_("same type of prefix used twice"));
8194       i.rex |= rex_bit;
8195     }
8196   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
8197     {
8198       gas_assert (i.vex.register_specifier == r);
8199       i.vex.register_specifier += 8;
8200     }
8201
8202   if (r->reg_flags & RegVRex)
8203     i.vrex |= rex_bit;
8204 }
8205
8206 static const reg_entry *
8207 build_modrm_byte (void)
8208 {
8209   const reg_entry *default_seg = NULL;
8210   unsigned int source, dest;
8211   int vex_3_sources;
8212
8213   vex_3_sources = i.tm.opcode_modifier.vexsources == VEX3SOURCES;
8214   if (vex_3_sources)
8215     {
8216       unsigned int nds, reg_slot;
8217       expressionS *exp;
8218
8219       dest = i.operands - 1;
8220       nds = dest - 1;
8221
8222       /* There are 2 kinds of instructions:
8223          1. 5 operands: 4 register operands or 3 register operands
8224          plus 1 memory operand plus one Imm4 operand, VexXDS, and
8225          VexW0 or VexW1.  The destination must be either XMM, YMM or
8226          ZMM register.
8227          2. 4 operands: 4 register operands or 3 register operands
8228          plus 1 memory operand, with VexXDS.  */
8229       gas_assert ((i.reg_operands == 4
8230                    || (i.reg_operands == 3 && i.mem_operands == 1))
8231                   && i.tm.opcode_modifier.vexvvvv == VEXXDS
8232                   && i.tm.opcode_modifier.vexw
8233                   && i.tm.operand_types[dest].bitfield.class == RegSIMD);
8234
8235       /* If VexW1 is set, the first non-immediate operand is the source and
8236          the second non-immediate one is encoded in the immediate operand.  */
8237       if (i.tm.opcode_modifier.vexw == VEXW1)
8238         {
8239           source = i.imm_operands;
8240           reg_slot = i.imm_operands + 1;
8241         }
8242       else
8243         {
8244           source = i.imm_operands + 1;
8245           reg_slot = i.imm_operands;
8246         }
8247
8248       if (i.imm_operands == 0)
8249         {
8250           /* When there is no immediate operand, generate an 8bit
8251              immediate operand to encode the first operand.  */
8252           exp = &im_expressions[i.imm_operands++];
8253           i.op[i.operands].imms = exp;
8254           i.types[i.operands].bitfield.imm8 = 1;
8255           i.operands++;
8256
8257           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8258           exp->X_op = O_constant;
8259           exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
8260           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8261         }
8262       else
8263         {
8264           gas_assert (i.imm_operands == 1);
8265           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
8266           gas_assert (!i.tm.opcode_modifier.immext);
8267
8268           /* Turn on Imm8 again so that output_imm will generate it.  */
8269           i.types[0].bitfield.imm8 = 1;
8270
8271           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8272           i.op[0].imms->X_add_number
8273               |= register_number (i.op[reg_slot].regs) << 4;
8274           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8275         }
8276
8277       gas_assert (i.tm.operand_types[nds].bitfield.class == RegSIMD);
8278       i.vex.register_specifier = i.op[nds].regs;
8279     }
8280   else
8281     source = dest = 0;
8282
8283   /* i.reg_operands MUST be the number of real register operands;
8284      implicit registers do not count.  If there are 3 register
8285      operands, it must be a instruction with VexNDS.  For a
8286      instruction with VexNDD, the destination register is encoded
8287      in VEX prefix.  If there are 4 register operands, it must be
8288      a instruction with VEX prefix and 3 sources.  */
8289   if (i.mem_operands == 0
8290       && ((i.reg_operands == 2
8291            && i.tm.opcode_modifier.vexvvvv <= VEXXDS)
8292           || (i.reg_operands == 3
8293               && i.tm.opcode_modifier.vexvvvv == VEXXDS)
8294           || (i.reg_operands == 4 && vex_3_sources)))
8295     {
8296       switch (i.operands)
8297         {
8298         case 2:
8299           source = 0;
8300           break;
8301         case 3:
8302           /* When there are 3 operands, one of them may be immediate,
8303              which may be the first or the last operand.  Otherwise,
8304              the first operand must be shift count register (cl) or it
8305              is an instruction with VexNDS. */
8306           gas_assert (i.imm_operands == 1
8307                       || (i.imm_operands == 0
8308                           && (i.tm.opcode_modifier.vexvvvv == VEXXDS
8309                               || (i.types[0].bitfield.instance == RegC
8310                                   && i.types[0].bitfield.byte))));
8311           if (operand_type_check (i.types[0], imm)
8312               || (i.types[0].bitfield.instance == RegC
8313                   && i.types[0].bitfield.byte))
8314             source = 1;
8315           else
8316             source = 0;
8317           break;
8318         case 4:
8319           /* When there are 4 operands, the first two must be 8bit
8320              immediate operands. The source operand will be the 3rd
8321              one.
8322
8323              For instructions with VexNDS, if the first operand
8324              an imm8, the source operand is the 2nd one.  If the last
8325              operand is imm8, the source operand is the first one.  */
8326           gas_assert ((i.imm_operands == 2
8327                        && i.types[0].bitfield.imm8
8328                        && i.types[1].bitfield.imm8)
8329                       || (i.tm.opcode_modifier.vexvvvv == VEXXDS
8330                           && i.imm_operands == 1
8331                           && (i.types[0].bitfield.imm8
8332                               || i.types[i.operands - 1].bitfield.imm8)));
8333           if (i.imm_operands == 2)
8334             source = 2;
8335           else
8336             {
8337               if (i.types[0].bitfield.imm8)
8338                 source = 1;
8339               else
8340                 source = 0;
8341             }
8342           break;
8343         case 5:
8344           gas_assert (!is_evex_encoding (&i.tm));
8345           gas_assert (i.imm_operands == 1 && vex_3_sources);
8346           break;
8347         default:
8348           abort ();
8349         }
8350
8351       if (!vex_3_sources)
8352         {
8353           dest = source + 1;
8354
8355           if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8356             {
8357               /* For instructions with VexNDS, the register-only source
8358                  operand must be a 32/64bit integer, XMM, YMM, ZMM, or mask
8359                  register.  It is encoded in VEX prefix.  */
8360
8361               i386_operand_type op;
8362               unsigned int vvvv;
8363
8364               /* Swap two source operands if needed.  */
8365               if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES)
8366                 {
8367                   vvvv = source;
8368                   source = dest;
8369                 }
8370               else
8371                 vvvv = dest;
8372
8373               op = i.tm.operand_types[vvvv];
8374               if ((dest + 1) >= i.operands
8375                   || ((op.bitfield.class != Reg
8376                        || (!op.bitfield.dword && !op.bitfield.qword))
8377                       && op.bitfield.class != RegSIMD
8378                       && op.bitfield.class != RegMask))
8379                 abort ();
8380               i.vex.register_specifier = i.op[vvvv].regs;
8381               dest++;
8382             }
8383         }
8384
8385       i.rm.mode = 3;
8386       /* One of the register operands will be encoded in the i.rm.reg
8387          field, the other in the combined i.rm.mode and i.rm.regmem
8388          fields.  If no form of this instruction supports a memory
8389          destination operand, then we assume the source operand may
8390          sometimes be a memory operand and so we need to store the
8391          destination in the i.rm.reg field.  */
8392       if (!i.tm.opcode_modifier.regmem
8393           && operand_type_check (i.tm.operand_types[dest], anymem) == 0)
8394         {
8395           i.rm.reg = i.op[dest].regs->reg_num;
8396           i.rm.regmem = i.op[source].regs->reg_num;
8397           set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
8398           set_rex_vrex (i.op[source].regs, REX_B, false);
8399         }
8400       else
8401         {
8402           i.rm.reg = i.op[source].regs->reg_num;
8403           i.rm.regmem = i.op[dest].regs->reg_num;
8404           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
8405           set_rex_vrex (i.op[source].regs, REX_R, false);
8406         }
8407       if (flag_code != CODE_64BIT && (i.rex & REX_R))
8408         {
8409           if (i.types[!i.tm.opcode_modifier.regmem].bitfield.class != RegCR)
8410             abort ();
8411           i.rex &= ~REX_R;
8412           add_prefix (LOCK_PREFIX_OPCODE);
8413         }
8414     }
8415   else
8416     {                   /* If it's not 2 reg operands...  */
8417       unsigned int mem;
8418
8419       if (i.mem_operands)
8420         {
8421           unsigned int fake_zero_displacement = 0;
8422           unsigned int op;
8423
8424           for (op = 0; op < i.operands; op++)
8425             if (i.flags[op] & Operand_Mem)
8426               break;
8427           gas_assert (op < i.operands);
8428
8429           if (i.tm.opcode_modifier.sib)
8430             {
8431               /* The index register of VSIB shouldn't be RegIZ.  */
8432               if (i.tm.opcode_modifier.sib != SIBMEM
8433                   && i.index_reg->reg_num == RegIZ)
8434                 abort ();
8435
8436               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8437               if (!i.base_reg)
8438                 {
8439                   i.sib.base = NO_BASE_REGISTER;
8440                   i.sib.scale = i.log2_scale_factor;
8441                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8442                   i.types[op].bitfield.disp32 = 1;
8443                 }
8444
8445               /* Since the mandatory SIB always has index register, so
8446                  the code logic remains unchanged. The non-mandatory SIB
8447                  without index register is allowed and will be handled
8448                  later.  */
8449               if (i.index_reg)
8450                 {
8451                   if (i.index_reg->reg_num == RegIZ)
8452                     i.sib.index = NO_INDEX_REGISTER;
8453                   else
8454                     i.sib.index = i.index_reg->reg_num;
8455                   set_rex_vrex (i.index_reg, REX_X, false);
8456                 }
8457             }
8458
8459           default_seg = reg_ds;
8460
8461           if (i.base_reg == 0)
8462             {
8463               i.rm.mode = 0;
8464               if (!i.disp_operands)
8465                 fake_zero_displacement = 1;
8466               if (i.index_reg == 0)
8467                 {
8468                   /* Both check for VSIB and mandatory non-vector SIB. */
8469                   gas_assert (!i.tm.opcode_modifier.sib
8470                               || i.tm.opcode_modifier.sib == SIBMEM);
8471                   /* Operand is just <disp>  */
8472                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8473                   if (flag_code == CODE_64BIT)
8474                     {
8475                       /* 64bit mode overwrites the 32bit absolute
8476                          addressing by RIP relative addressing and
8477                          absolute addressing is encoded by one of the
8478                          redundant SIB forms.  */
8479                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8480                       i.sib.base = NO_BASE_REGISTER;
8481                       i.sib.index = NO_INDEX_REGISTER;
8482                       i.types[op].bitfield.disp32 = 1;
8483                     }
8484                   else if ((flag_code == CODE_16BIT)
8485                            ^ (i.prefix[ADDR_PREFIX] != 0))
8486                     {
8487                       i.rm.regmem = NO_BASE_REGISTER_16;
8488                       i.types[op].bitfield.disp16 = 1;
8489                     }
8490                   else
8491                     {
8492                       i.rm.regmem = NO_BASE_REGISTER;
8493                       i.types[op].bitfield.disp32 = 1;
8494                     }
8495                 }
8496               else if (!i.tm.opcode_modifier.sib)
8497                 {
8498                   /* !i.base_reg && i.index_reg  */
8499                   if (i.index_reg->reg_num == RegIZ)
8500                     i.sib.index = NO_INDEX_REGISTER;
8501                   else
8502                     i.sib.index = i.index_reg->reg_num;
8503                   i.sib.base = NO_BASE_REGISTER;
8504                   i.sib.scale = i.log2_scale_factor;
8505                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8506                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8507                   i.types[op].bitfield.disp32 = 1;
8508                   if ((i.index_reg->reg_flags & RegRex) != 0)
8509                     i.rex |= REX_X;
8510                 }
8511             }
8512           /* RIP addressing for 64bit mode.  */
8513           else if (i.base_reg->reg_num == RegIP)
8514             {
8515               gas_assert (!i.tm.opcode_modifier.sib);
8516               i.rm.regmem = NO_BASE_REGISTER;
8517               i.types[op].bitfield.disp8 = 0;
8518               i.types[op].bitfield.disp16 = 0;
8519               i.types[op].bitfield.disp32 = 1;
8520               i.types[op].bitfield.disp64 = 0;
8521               i.flags[op] |= Operand_PCrel;
8522               if (! i.disp_operands)
8523                 fake_zero_displacement = 1;
8524             }
8525           else if (i.base_reg->reg_type.bitfield.word)
8526             {
8527               gas_assert (!i.tm.opcode_modifier.sib);
8528               switch (i.base_reg->reg_num)
8529                 {
8530                 case 3: /* (%bx)  */
8531                   if (i.index_reg == 0)
8532                     i.rm.regmem = 7;
8533                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
8534                     i.rm.regmem = i.index_reg->reg_num - 6;
8535                   break;
8536                 case 5: /* (%bp)  */
8537                   default_seg = reg_ss;
8538                   if (i.index_reg == 0)
8539                     {
8540                       i.rm.regmem = 6;
8541                       if (operand_type_check (i.types[op], disp) == 0)
8542                         {
8543                           /* fake (%bp) into 0(%bp)  */
8544                           if (i.disp_encoding == disp_encoding_16bit)
8545                             i.types[op].bitfield.disp16 = 1;
8546                           else
8547                             i.types[op].bitfield.disp8 = 1;
8548                           fake_zero_displacement = 1;
8549                         }
8550                     }
8551                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
8552                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
8553                   break;
8554                 default: /* (%si) -> 4 or (%di) -> 5  */
8555                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
8556                 }
8557               if (!fake_zero_displacement
8558                   && !i.disp_operands
8559                   && i.disp_encoding)
8560                 {
8561                   fake_zero_displacement = 1;
8562                   if (i.disp_encoding == disp_encoding_8bit)
8563                     i.types[op].bitfield.disp8 = 1;
8564                   else
8565                     i.types[op].bitfield.disp16 = 1;
8566                 }
8567               i.rm.mode = mode_from_disp_size (i.types[op]);
8568             }
8569           else /* i.base_reg and 32/64 bit mode  */
8570             {
8571               if (operand_type_check (i.types[op], disp))
8572                 {
8573                   i.types[op].bitfield.disp16 = 0;
8574                   i.types[op].bitfield.disp64 = 0;
8575                   i.types[op].bitfield.disp32 = 1;
8576                 }
8577
8578               if (!i.tm.opcode_modifier.sib)
8579                 i.rm.regmem = i.base_reg->reg_num;
8580               if ((i.base_reg->reg_flags & RegRex) != 0)
8581                 i.rex |= REX_B;
8582               i.sib.base = i.base_reg->reg_num;
8583               /* x86-64 ignores REX prefix bit here to avoid decoder
8584                  complications.  */
8585               if (!(i.base_reg->reg_flags & RegRex)
8586                   && (i.base_reg->reg_num == EBP_REG_NUM
8587                    || i.base_reg->reg_num == ESP_REG_NUM))
8588                   default_seg = reg_ss;
8589               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
8590                 {
8591                   fake_zero_displacement = 1;
8592                   if (i.disp_encoding == disp_encoding_32bit)
8593                     i.types[op].bitfield.disp32 = 1;
8594                   else
8595                     i.types[op].bitfield.disp8 = 1;
8596                 }
8597               i.sib.scale = i.log2_scale_factor;
8598               if (i.index_reg == 0)
8599                 {
8600                   /* Only check for VSIB. */
8601                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
8602                               && i.tm.opcode_modifier.sib != VECSIB256
8603                               && i.tm.opcode_modifier.sib != VECSIB512);
8604
8605                   /* <disp>(%esp) becomes two byte modrm with no index
8606                      register.  We've already stored the code for esp
8607                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
8608                      Any base register besides %esp will not use the
8609                      extra modrm byte.  */
8610                   i.sib.index = NO_INDEX_REGISTER;
8611                 }
8612               else if (!i.tm.opcode_modifier.sib)
8613                 {
8614                   if (i.index_reg->reg_num == RegIZ)
8615                     i.sib.index = NO_INDEX_REGISTER;
8616                   else
8617                     i.sib.index = i.index_reg->reg_num;
8618                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8619                   if ((i.index_reg->reg_flags & RegRex) != 0)
8620                     i.rex |= REX_X;
8621                 }
8622
8623               if (i.disp_operands
8624                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
8625                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
8626                 i.rm.mode = 0;
8627               else
8628                 {
8629                   if (!fake_zero_displacement
8630                       && !i.disp_operands
8631                       && i.disp_encoding)
8632                     {
8633                       fake_zero_displacement = 1;
8634                       if (i.disp_encoding == disp_encoding_8bit)
8635                         i.types[op].bitfield.disp8 = 1;
8636                       else
8637                         i.types[op].bitfield.disp32 = 1;
8638                     }
8639                   i.rm.mode = mode_from_disp_size (i.types[op]);
8640                 }
8641             }
8642
8643           if (fake_zero_displacement)
8644             {
8645               /* Fakes a zero displacement assuming that i.types[op]
8646                  holds the correct displacement size.  */
8647               expressionS *exp;
8648
8649               gas_assert (i.op[op].disps == 0);
8650               exp = &disp_expressions[i.disp_operands++];
8651               i.op[op].disps = exp;
8652               exp->X_op = O_constant;
8653               exp->X_add_number = 0;
8654               exp->X_add_symbol = (symbolS *) 0;
8655               exp->X_op_symbol = (symbolS *) 0;
8656             }
8657
8658           mem = op;
8659         }
8660       else
8661         mem = ~0;
8662
8663       if (i.tm.opcode_modifier.vexsources == XOP2SOURCES)
8664         {
8665           if (operand_type_check (i.types[0], imm))
8666             i.vex.register_specifier = NULL;
8667           else
8668             {
8669               /* VEX.vvvv encodes one of the sources when the first
8670                  operand is not an immediate.  */
8671               if (i.tm.opcode_modifier.vexw == VEXW0)
8672                 i.vex.register_specifier = i.op[0].regs;
8673               else
8674                 i.vex.register_specifier = i.op[1].regs;
8675             }
8676
8677           /* Destination is a XMM register encoded in the ModRM.reg
8678              and VEX.R bit.  */
8679           i.rm.reg = i.op[2].regs->reg_num;
8680           if ((i.op[2].regs->reg_flags & RegRex) != 0)
8681             i.rex |= REX_R;
8682
8683           /* ModRM.rm and VEX.B encodes the other source.  */
8684           if (!i.mem_operands)
8685             {
8686               i.rm.mode = 3;
8687
8688               if (i.tm.opcode_modifier.vexw == VEXW0)
8689                 i.rm.regmem = i.op[1].regs->reg_num;
8690               else
8691                 i.rm.regmem = i.op[0].regs->reg_num;
8692
8693               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8694                 i.rex |= REX_B;
8695             }
8696         }
8697       else if (i.tm.opcode_modifier.vexvvvv == VEXLWP)
8698         {
8699           i.vex.register_specifier = i.op[2].regs;
8700           if (!i.mem_operands)
8701             {
8702               i.rm.mode = 3;
8703               i.rm.regmem = i.op[1].regs->reg_num;
8704               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8705                 i.rex |= REX_B;
8706             }
8707         }
8708       /* Fill in i.rm.reg or i.rm.regmem field with register operand
8709          (if any) based on i.tm.extension_opcode.  Again, we must be
8710          careful to make sure that segment/control/debug/test/MMX
8711          registers are coded into the i.rm.reg field.  */
8712       else if (i.reg_operands)
8713         {
8714           unsigned int op;
8715           unsigned int vex_reg = ~0;
8716
8717           for (op = 0; op < i.operands; op++)
8718             if (i.types[op].bitfield.class == Reg
8719                 || i.types[op].bitfield.class == RegBND
8720                 || i.types[op].bitfield.class == RegMask
8721                 || i.types[op].bitfield.class == SReg
8722                 || i.types[op].bitfield.class == RegCR
8723                 || i.types[op].bitfield.class == RegDR
8724                 || i.types[op].bitfield.class == RegTR
8725                 || i.types[op].bitfield.class == RegSIMD
8726                 || i.types[op].bitfield.class == RegMMX)
8727               break;
8728
8729           if (vex_3_sources)
8730             op = dest;
8731           else if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8732             {
8733               /* For instructions with VexNDS, the register-only
8734                  source operand is encoded in VEX prefix. */
8735               gas_assert (mem != (unsigned int) ~0);
8736
8737               if (op > mem || i.tm.cpu_flags.bitfield.cpucmpccxadd)
8738                 {
8739                   vex_reg = op++;
8740                   gas_assert (op < i.operands);
8741                 }
8742               else
8743                 {
8744                   /* Check register-only source operand when two source
8745                      operands are swapped.  */
8746                   if (!i.tm.operand_types[op].bitfield.baseindex
8747                       && i.tm.operand_types[op + 1].bitfield.baseindex)
8748                     {
8749                       vex_reg = op;
8750                       op += 2;
8751                       gas_assert (mem == (vex_reg + 1)
8752                                   && op < i.operands);
8753                     }
8754                   else
8755                     {
8756                       vex_reg = op + 1;
8757                       gas_assert (vex_reg < i.operands);
8758                     }
8759                 }
8760             }
8761           else if (i.tm.opcode_modifier.vexvvvv == VEXNDD)
8762             {
8763               /* For instructions with VexNDD, the register destination
8764                  is encoded in VEX prefix.  */
8765               if (i.mem_operands == 0)
8766                 {
8767                   /* There is no memory operand.  */
8768                   gas_assert ((op + 2) == i.operands);
8769                   vex_reg = op + 1;
8770                 }
8771               else
8772                 {
8773                   /* There are only 2 non-immediate operands.  */
8774                   gas_assert (op < i.imm_operands + 2
8775                               && i.operands == i.imm_operands + 2);
8776                   vex_reg = i.imm_operands + 1;
8777                 }
8778             }
8779           else
8780             gas_assert (op < i.operands);
8781
8782           if (vex_reg != (unsigned int) ~0)
8783             {
8784               i386_operand_type *type = &i.tm.operand_types[vex_reg];
8785
8786               if ((type->bitfield.class != Reg
8787                    || (!type->bitfield.dword && !type->bitfield.qword))
8788                   && type->bitfield.class != RegSIMD
8789                   && type->bitfield.class != RegMask)
8790                 abort ();
8791
8792               i.vex.register_specifier = i.op[vex_reg].regs;
8793             }
8794
8795           /* Don't set OP operand twice.  */
8796           if (vex_reg != op)
8797             {
8798               /* If there is an extension opcode to put here, the
8799                  register number must be put into the regmem field.  */
8800               if (i.tm.extension_opcode != None)
8801                 {
8802                   i.rm.regmem = i.op[op].regs->reg_num;
8803                   set_rex_vrex (i.op[op].regs, REX_B,
8804                                 i.tm.opcode_modifier.sse2avx);
8805                 }
8806               else
8807                 {
8808                   i.rm.reg = i.op[op].regs->reg_num;
8809                   set_rex_vrex (i.op[op].regs, REX_R,
8810                                 i.tm.opcode_modifier.sse2avx);
8811                 }
8812             }
8813
8814           /* Now, if no memory operand has set i.rm.mode = 0, 1, 2 we
8815              must set it to 3 to indicate this is a register operand
8816              in the regmem field.  */
8817           if (!i.mem_operands)
8818             i.rm.mode = 3;
8819         }
8820
8821       /* Fill in i.rm.reg field with extension opcode (if any).  */
8822       if (i.tm.extension_opcode != None)
8823         i.rm.reg = i.tm.extension_opcode;
8824     }
8825   return default_seg;
8826 }
8827
8828 static INLINE void
8829 frag_opcode_byte (unsigned char byte)
8830 {
8831   if (now_seg != absolute_section)
8832     FRAG_APPEND_1_CHAR (byte);
8833   else
8834     ++abs_section_offset;
8835 }
8836
8837 static unsigned int
8838 flip_code16 (unsigned int code16)
8839 {
8840   gas_assert (i.tm.operands == 1);
8841
8842   return !(i.prefix[REX_PREFIX] & REX_W)
8843          && (code16 ? i.tm.operand_types[0].bitfield.disp32
8844                     : i.tm.operand_types[0].bitfield.disp16)
8845          ? CODE16 : 0;
8846 }
8847
8848 static void
8849 output_branch (void)
8850 {
8851   char *p;
8852   int size;
8853   int code16;
8854   int prefix;
8855   relax_substateT subtype;
8856   symbolS *sym;
8857   offsetT off;
8858
8859   if (now_seg == absolute_section)
8860     {
8861       as_bad (_("relaxable branches not supported in absolute section"));
8862       return;
8863     }
8864
8865   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
8866   size = i.disp_encoding > disp_encoding_8bit ? BIG : SMALL;
8867
8868   prefix = 0;
8869   if (i.prefix[DATA_PREFIX] != 0)
8870     {
8871       prefix = 1;
8872       i.prefixes -= 1;
8873       code16 ^= flip_code16(code16);
8874     }
8875   /* Pentium4 branch hints.  */
8876   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8877       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8878     {
8879       prefix++;
8880       i.prefixes--;
8881     }
8882   if (i.prefix[REX_PREFIX] != 0)
8883     {
8884       prefix++;
8885       i.prefixes--;
8886     }
8887
8888   /* BND prefixed jump.  */
8889   if (i.prefix[BND_PREFIX] != 0)
8890     {
8891       prefix++;
8892       i.prefixes--;
8893     }
8894
8895   if (i.prefixes != 0)
8896     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
8897
8898   /* It's always a symbol;  End frag & setup for relax.
8899      Make sure there is enough room in this frag for the largest
8900      instruction we may generate in md_convert_frag.  This is 2
8901      bytes for the opcode and room for the prefix and largest
8902      displacement.  */
8903   frag_grow (prefix + 2 + 4);
8904   /* Prefix and 1 opcode byte go in fr_fix.  */
8905   p = frag_more (prefix + 1);
8906   if (i.prefix[DATA_PREFIX] != 0)
8907     *p++ = DATA_PREFIX_OPCODE;
8908   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
8909       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
8910     *p++ = i.prefix[SEG_PREFIX];
8911   if (i.prefix[BND_PREFIX] != 0)
8912     *p++ = BND_PREFIX_OPCODE;
8913   if (i.prefix[REX_PREFIX] != 0)
8914     *p++ = i.prefix[REX_PREFIX];
8915   *p = i.tm.base_opcode;
8916
8917   if ((unsigned char) *p == JUMP_PC_RELATIVE)
8918     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
8919   else if (cpu_arch_flags.bitfield.cpui386)
8920     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
8921   else
8922     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
8923   subtype |= code16;
8924
8925   sym = i.op[0].disps->X_add_symbol;
8926   off = i.op[0].disps->X_add_number;
8927
8928   if (i.op[0].disps->X_op != O_constant
8929       && i.op[0].disps->X_op != O_symbol)
8930     {
8931       /* Handle complex expressions.  */
8932       sym = make_expr_symbol (i.op[0].disps);
8933       off = 0;
8934     }
8935
8936   frag_now->tc_frag_data.code64 = flag_code == CODE_64BIT;
8937
8938   /* 1 possible extra opcode + 4 byte displacement go in var part.
8939      Pass reloc in fr_var.  */
8940   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
8941 }
8942
8943 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8944 /* Return TRUE iff PLT32 relocation should be used for branching to
8945    symbol S.  */
8946
8947 static bool
8948 need_plt32_p (symbolS *s)
8949 {
8950   /* PLT32 relocation is ELF only.  */
8951   if (!IS_ELF)
8952     return false;
8953
8954 #ifdef TE_SOLARIS
8955   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
8956      krtld support it.  */
8957   return false;
8958 #endif
8959
8960   /* Since there is no need to prepare for PLT branch on x86-64, we
8961      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
8962      be used as a marker for 32-bit PC-relative branches.  */
8963   if (!object_64bit)
8964     return false;
8965
8966   if (s == NULL)
8967     return false;
8968
8969   /* Weak or undefined symbol need PLT32 relocation.  */
8970   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
8971     return true;
8972
8973   /* Non-global symbol doesn't need PLT32 relocation.  */
8974   if (! S_IS_EXTERNAL (s))
8975     return false;
8976
8977   /* Other global symbols need PLT32 relocation.  NB: Symbol with
8978      non-default visibilities are treated as normal global symbol
8979      so that PLT32 relocation can be used as a marker for 32-bit
8980      PC-relative branches.  It is useful for linker relaxation.  */
8981   return true;
8982 }
8983 #endif
8984
8985 static void
8986 output_jump (void)
8987 {
8988   char *p;
8989   int size;
8990   fixS *fixP;
8991   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
8992
8993   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
8994     {
8995       /* This is a loop or jecxz type instruction.  */
8996       size = 1;
8997       if (i.prefix[ADDR_PREFIX] != 0)
8998         {
8999           frag_opcode_byte (ADDR_PREFIX_OPCODE);
9000           i.prefixes -= 1;
9001         }
9002       /* Pentium4 branch hints.  */
9003       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
9004           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
9005         {
9006           frag_opcode_byte (i.prefix[SEG_PREFIX]);
9007           i.prefixes--;
9008         }
9009     }
9010   else
9011     {
9012       int code16;
9013
9014       code16 = 0;
9015       if (flag_code == CODE_16BIT)
9016         code16 = CODE16;
9017
9018       if (i.prefix[DATA_PREFIX] != 0)
9019         {
9020           frag_opcode_byte (DATA_PREFIX_OPCODE);
9021           i.prefixes -= 1;
9022           code16 ^= flip_code16(code16);
9023         }
9024
9025       size = 4;
9026       if (code16)
9027         size = 2;
9028     }
9029
9030   /* BND prefixed jump.  */
9031   if (i.prefix[BND_PREFIX] != 0)
9032     {
9033       frag_opcode_byte (i.prefix[BND_PREFIX]);
9034       i.prefixes -= 1;
9035     }
9036
9037   if (i.prefix[REX_PREFIX] != 0)
9038     {
9039       frag_opcode_byte (i.prefix[REX_PREFIX]);
9040       i.prefixes -= 1;
9041     }
9042
9043   if (i.prefixes != 0)
9044     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
9045
9046   if (now_seg == absolute_section)
9047     {
9048       abs_section_offset += i.opcode_length + size;
9049       return;
9050     }
9051
9052   p = frag_more (i.opcode_length + size);
9053   switch (i.opcode_length)
9054     {
9055     case 2:
9056       *p++ = i.tm.base_opcode >> 8;
9057       /* Fall through.  */
9058     case 1:
9059       *p++ = i.tm.base_opcode;
9060       break;
9061     default:
9062       abort ();
9063     }
9064
9065 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9066   if (flag_code == CODE_64BIT && size == 4
9067       && jump_reloc == NO_RELOC && i.op[0].disps->X_add_number == 0
9068       && need_plt32_p (i.op[0].disps->X_add_symbol))
9069     jump_reloc = BFD_RELOC_X86_64_PLT32;
9070 #endif
9071
9072   jump_reloc = reloc (size, 1, 1, jump_reloc);
9073
9074   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9075                       i.op[0].disps, 1, jump_reloc);
9076
9077   /* All jumps handled here are signed, but don't unconditionally use a
9078      signed limit check for 32 and 16 bit jumps as we want to allow wrap
9079      around at 4G (outside of 64-bit mode) and 64k (except for XBEGIN)
9080      respectively.  */
9081   switch (size)
9082     {
9083     case 1:
9084       fixP->fx_signed = 1;
9085       break;
9086
9087     case 2:
9088       if (i.tm.base_opcode == 0xc7f8)
9089         fixP->fx_signed = 1;
9090       break;
9091
9092     case 4:
9093       if (flag_code == CODE_64BIT)
9094         fixP->fx_signed = 1;
9095       break;
9096     }
9097 }
9098
9099 static void
9100 output_interseg_jump (void)
9101 {
9102   char *p;
9103   int size;
9104   int prefix;
9105   int code16;
9106
9107   code16 = 0;
9108   if (flag_code == CODE_16BIT)
9109     code16 = CODE16;
9110
9111   prefix = 0;
9112   if (i.prefix[DATA_PREFIX] != 0)
9113     {
9114       prefix = 1;
9115       i.prefixes -= 1;
9116       code16 ^= CODE16;
9117     }
9118
9119   gas_assert (!i.prefix[REX_PREFIX]);
9120
9121   size = 4;
9122   if (code16)
9123     size = 2;
9124
9125   if (i.prefixes != 0)
9126     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
9127
9128   if (now_seg == absolute_section)
9129     {
9130       abs_section_offset += prefix + 1 + 2 + size;
9131       return;
9132     }
9133
9134   /* 1 opcode; 2 segment; offset  */
9135   p = frag_more (prefix + 1 + 2 + size);
9136
9137   if (i.prefix[DATA_PREFIX] != 0)
9138     *p++ = DATA_PREFIX_OPCODE;
9139
9140   if (i.prefix[REX_PREFIX] != 0)
9141     *p++ = i.prefix[REX_PREFIX];
9142
9143   *p++ = i.tm.base_opcode;
9144   if (i.op[1].imms->X_op == O_constant)
9145     {
9146       offsetT n = i.op[1].imms->X_add_number;
9147
9148       if (size == 2
9149           && !fits_in_unsigned_word (n)
9150           && !fits_in_signed_word (n))
9151         {
9152           as_bad (_("16-bit jump out of range"));
9153           return;
9154         }
9155       md_number_to_chars (p, n, size);
9156     }
9157   else
9158     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9159                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
9160
9161   p += size;
9162   if (i.op[0].imms->X_op == O_constant)
9163     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
9164   else
9165     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
9166                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
9167 }
9168
9169 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9170 void
9171 x86_cleanup (void)
9172 {
9173   char *p;
9174   asection *seg = now_seg;
9175   subsegT subseg = now_subseg;
9176   asection *sec;
9177   unsigned int alignment, align_size_1;
9178   unsigned int isa_1_descsz, feature_2_descsz, descsz;
9179   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
9180   unsigned int padding;
9181
9182   if (!IS_ELF || !x86_used_note)
9183     return;
9184
9185   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
9186
9187   /* The .note.gnu.property section layout:
9188
9189      Field      Length          Contents
9190      ----       ----            ----
9191      n_namsz    4               4
9192      n_descsz   4               The note descriptor size
9193      n_type     4               NT_GNU_PROPERTY_TYPE_0
9194      n_name     4               "GNU"
9195      n_desc     n_descsz        The program property array
9196      ....       ....            ....
9197    */
9198
9199   /* Create the .note.gnu.property section.  */
9200   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
9201   bfd_set_section_flags (sec,
9202                          (SEC_ALLOC
9203                           | SEC_LOAD
9204                           | SEC_DATA
9205                           | SEC_HAS_CONTENTS
9206                           | SEC_READONLY));
9207
9208   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
9209     {
9210       align_size_1 = 7;
9211       alignment = 3;
9212     }
9213   else
9214     {
9215       align_size_1 = 3;
9216       alignment = 2;
9217     }
9218
9219   bfd_set_section_alignment (sec, alignment);
9220   elf_section_type (sec) = SHT_NOTE;
9221
9222   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
9223                                   + 4-byte data  */
9224   isa_1_descsz_raw = 4 + 4 + 4;
9225   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
9226   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
9227
9228   feature_2_descsz_raw = isa_1_descsz;
9229   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
9230                                       + 4-byte data  */
9231   feature_2_descsz_raw += 4 + 4 + 4;
9232   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
9233   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
9234                       & ~align_size_1);
9235
9236   descsz = feature_2_descsz;
9237   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
9238   p = frag_more (4 + 4 + 4 + 4 + descsz);
9239
9240   /* Write n_namsz.  */
9241   md_number_to_chars (p, (valueT) 4, 4);
9242
9243   /* Write n_descsz.  */
9244   md_number_to_chars (p + 4, (valueT) descsz, 4);
9245
9246   /* Write n_type.  */
9247   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
9248
9249   /* Write n_name.  */
9250   memcpy (p + 4 * 3, "GNU", 4);
9251
9252   /* Write 4-byte type.  */
9253   md_number_to_chars (p + 4 * 4,
9254                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
9255
9256   /* Write 4-byte data size.  */
9257   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
9258
9259   /* Write 4-byte data.  */
9260   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
9261
9262   /* Zero out paddings.  */
9263   padding = isa_1_descsz - isa_1_descsz_raw;
9264   if (padding)
9265     memset (p + 4 * 7, 0, padding);
9266
9267   /* Write 4-byte type.  */
9268   md_number_to_chars (p + isa_1_descsz + 4 * 4,
9269                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
9270
9271   /* Write 4-byte data size.  */
9272   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
9273
9274   /* Write 4-byte data.  */
9275   md_number_to_chars (p + isa_1_descsz + 4 * 6,
9276                       (valueT) x86_feature_2_used, 4);
9277
9278   /* Zero out paddings.  */
9279   padding = feature_2_descsz - feature_2_descsz_raw;
9280   if (padding)
9281     memset (p + isa_1_descsz + 4 * 7, 0, padding);
9282
9283   /* We probably can't restore the current segment, for there likely
9284      isn't one yet...  */
9285   if (seg && subseg)
9286     subseg_set (seg, subseg);
9287 }
9288
9289 bool
9290 x86_support_sframe_p (void)
9291 {
9292   /* At this time, SFrame unwind is supported for AMD64 ABI only.  */
9293   return (x86_elf_abi == X86_64_ABI);
9294 }
9295
9296 bool
9297 x86_sframe_ra_tracking_p (void)
9298 {
9299   /* In AMD64, return address is always stored on the stack at a fixed offset
9300      from the CFA (provided via x86_sframe_cfa_ra_offset ()).
9301      Do not track explicitly via an SFrame Frame Row Entry.  */
9302   return false;
9303 }
9304
9305 offsetT
9306 x86_sframe_cfa_ra_offset (void)
9307 {
9308   gas_assert (x86_elf_abi == X86_64_ABI);
9309   return (offsetT) -8;
9310 }
9311
9312 unsigned char
9313 x86_sframe_get_abi_arch (void)
9314 {
9315   unsigned char sframe_abi_arch = 0;
9316
9317   if (x86_support_sframe_p ())
9318     {
9319       gas_assert (!target_big_endian);
9320       sframe_abi_arch = SFRAME_ABI_AMD64_ENDIAN_LITTLE;
9321     }
9322
9323   return sframe_abi_arch;
9324 }
9325
9326 #endif
9327
9328 static unsigned int
9329 encoding_length (const fragS *start_frag, offsetT start_off,
9330                  const char *frag_now_ptr)
9331 {
9332   unsigned int len = 0;
9333
9334   if (start_frag != frag_now)
9335     {
9336       const fragS *fr = start_frag;
9337
9338       do {
9339         len += fr->fr_fix;
9340         fr = fr->fr_next;
9341       } while (fr && fr != frag_now);
9342     }
9343
9344   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
9345 }
9346
9347 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
9348    be macro-fused with conditional jumps.
9349    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
9350    or is one of the following format:
9351
9352     cmp m, imm
9353     add m, imm
9354     sub m, imm
9355    test m, imm
9356     and m, imm
9357     inc m
9358     dec m
9359
9360    it is unfusible.  */
9361
9362 static int
9363 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
9364 {
9365   /* No RIP address.  */
9366   if (i.base_reg && i.base_reg->reg_num == RegIP)
9367     return 0;
9368
9369   /* No opcodes outside of base encoding space.  */
9370   if (i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9371     return 0;
9372
9373   /* add, sub without add/sub m, imm.  */
9374   if (i.tm.base_opcode <= 5
9375       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
9376       || ((i.tm.base_opcode | 3) == 0x83
9377           && (i.tm.extension_opcode == 0x5
9378               || i.tm.extension_opcode == 0x0)))
9379     {
9380       *mf_cmp_p = mf_cmp_alu_cmp;
9381       return !(i.mem_operands && i.imm_operands);
9382     }
9383
9384   /* and without and m, imm.  */
9385   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
9386       || ((i.tm.base_opcode | 3) == 0x83
9387           && i.tm.extension_opcode == 0x4))
9388     {
9389       *mf_cmp_p = mf_cmp_test_and;
9390       return !(i.mem_operands && i.imm_operands);
9391     }
9392
9393   /* test without test m imm.  */
9394   if ((i.tm.base_opcode | 1) == 0x85
9395       || (i.tm.base_opcode | 1) == 0xa9
9396       || ((i.tm.base_opcode | 1) == 0xf7
9397           && i.tm.extension_opcode == 0))
9398     {
9399       *mf_cmp_p = mf_cmp_test_and;
9400       return !(i.mem_operands && i.imm_operands);
9401     }
9402
9403   /* cmp without cmp m, imm.  */
9404   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
9405       || ((i.tm.base_opcode | 3) == 0x83
9406           && (i.tm.extension_opcode == 0x7)))
9407     {
9408       *mf_cmp_p = mf_cmp_alu_cmp;
9409       return !(i.mem_operands && i.imm_operands);
9410     }
9411
9412   /* inc, dec without inc/dec m.   */
9413   if ((i.tm.cpu_flags.bitfield.cpuno64
9414        && (i.tm.base_opcode | 0xf) == 0x4f)
9415       || ((i.tm.base_opcode | 1) == 0xff
9416           && i.tm.extension_opcode <= 0x1))
9417     {
9418       *mf_cmp_p = mf_cmp_incdec;
9419       return !i.mem_operands;
9420     }
9421
9422   return 0;
9423 }
9424
9425 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
9426
9427 static int
9428 add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
9429 {
9430   /* NB: Don't work with COND_JUMP86 without i386.  */
9431   if (!align_branch_power
9432       || now_seg == absolute_section
9433       || !cpu_arch_flags.bitfield.cpui386
9434       || !(align_branch & align_branch_fused_bit))
9435     return 0;
9436
9437   if (maybe_fused_with_jcc_p (mf_cmp_p))
9438     {
9439       if (last_insn.kind == last_insn_other
9440           || last_insn.seg != now_seg)
9441         return 1;
9442       if (flag_debug)
9443         as_warn_where (last_insn.file, last_insn.line,
9444                        _("`%s` skips -malign-branch-boundary on `%s`"),
9445                        last_insn.name, insn_name (&i.tm));
9446     }
9447
9448   return 0;
9449 }
9450
9451 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
9452
9453 static int
9454 add_branch_prefix_frag_p (void)
9455 {
9456   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
9457      to PadLock instructions since they include prefixes in opcode.  */
9458   if (!align_branch_power
9459       || !align_branch_prefix_size
9460       || now_seg == absolute_section
9461       || i.tm.cpu_flags.bitfield.cpupadlock
9462       || !cpu_arch_flags.bitfield.cpui386)
9463     return 0;
9464
9465   /* Don't add prefix if it is a prefix or there is no operand in case
9466      that segment prefix is special.  */
9467   if (!i.operands || i.tm.opcode_modifier.isprefix)
9468     return 0;
9469
9470   if (last_insn.kind == last_insn_other
9471       || last_insn.seg != now_seg)
9472     return 1;
9473
9474   if (flag_debug)
9475     as_warn_where (last_insn.file, last_insn.line,
9476                    _("`%s` skips -malign-branch-boundary on `%s`"),
9477                    last_insn.name, insn_name (&i.tm));
9478
9479   return 0;
9480 }
9481
9482 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
9483
9484 static int
9485 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
9486                            enum mf_jcc_kind *mf_jcc_p)
9487 {
9488   int add_padding;
9489
9490   /* NB: Don't work with COND_JUMP86 without i386.  */
9491   if (!align_branch_power
9492       || now_seg == absolute_section
9493       || !cpu_arch_flags.bitfield.cpui386
9494       || i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9495     return 0;
9496
9497   add_padding = 0;
9498
9499   /* Check for jcc and direct jmp.  */
9500   if (i.tm.opcode_modifier.jump == JUMP)
9501     {
9502       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
9503         {
9504           *branch_p = align_branch_jmp;
9505           add_padding = align_branch & align_branch_jmp_bit;
9506         }
9507       else
9508         {
9509           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
9510              igore the lowest bit.  */
9511           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
9512           *branch_p = align_branch_jcc;
9513           if ((align_branch & align_branch_jcc_bit))
9514             add_padding = 1;
9515         }
9516     }
9517   else if ((i.tm.base_opcode | 1) == 0xc3)
9518     {
9519       /* Near ret.  */
9520       *branch_p = align_branch_ret;
9521       if ((align_branch & align_branch_ret_bit))
9522         add_padding = 1;
9523     }
9524   else
9525     {
9526       /* Check for indirect jmp, direct and indirect calls.  */
9527       if (i.tm.base_opcode == 0xe8)
9528         {
9529           /* Direct call.  */
9530           *branch_p = align_branch_call;
9531           if ((align_branch & align_branch_call_bit))
9532             add_padding = 1;
9533         }
9534       else if (i.tm.base_opcode == 0xff
9535                && (i.tm.extension_opcode == 2
9536                    || i.tm.extension_opcode == 4))
9537         {
9538           /* Indirect call and jmp.  */
9539           *branch_p = align_branch_indirect;
9540           if ((align_branch & align_branch_indirect_bit))
9541             add_padding = 1;
9542         }
9543
9544       if (add_padding
9545           && i.disp_operands
9546           && tls_get_addr
9547           && (i.op[0].disps->X_op == O_symbol
9548               || (i.op[0].disps->X_op == O_subtract
9549                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
9550         {
9551           symbolS *s = i.op[0].disps->X_add_symbol;
9552           /* No padding to call to global or undefined tls_get_addr.  */
9553           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
9554               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
9555             return 0;
9556         }
9557     }
9558
9559   if (add_padding
9560       && last_insn.kind != last_insn_other
9561       && last_insn.seg == now_seg)
9562     {
9563       if (flag_debug)
9564         as_warn_where (last_insn.file, last_insn.line,
9565                        _("`%s` skips -malign-branch-boundary on `%s`"),
9566                        last_insn.name, insn_name (&i.tm));
9567       return 0;
9568     }
9569
9570   return add_padding;
9571 }
9572
9573 static void
9574 output_insn (void)
9575 {
9576   fragS *insn_start_frag;
9577   offsetT insn_start_off;
9578   fragS *fragP = NULL;
9579   enum align_branch_kind branch = align_branch_none;
9580   /* The initializer is arbitrary just to avoid uninitialized error.
9581      it's actually either assigned in add_branch_padding_frag_p
9582      or never be used.  */
9583   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
9584
9585 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9586   if (IS_ELF && x86_used_note && now_seg != absolute_section)
9587     {
9588       if ((i.xstate & xstate_tmm) == xstate_tmm
9589           || i.tm.cpu_flags.bitfield.cpuamx_tile)
9590         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
9591
9592       if (i.tm.cpu_flags.bitfield.cpu8087
9593           || i.tm.cpu_flags.bitfield.cpu287
9594           || i.tm.cpu_flags.bitfield.cpu387
9595           || i.tm.cpu_flags.bitfield.cpu687
9596           || i.tm.cpu_flags.bitfield.cpufisttp)
9597         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
9598
9599       if ((i.xstate & xstate_mmx)
9600           || (i.tm.opcode_modifier.opcodespace == SPACE_0F
9601               && !is_any_vex_encoding (&i.tm)
9602               && (i.tm.base_opcode == 0x77 /* emms */
9603                   || i.tm.base_opcode == 0x0e /* femms */)))
9604         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
9605
9606       if (i.index_reg)
9607         {
9608           if (i.index_reg->reg_type.bitfield.zmmword)
9609             i.xstate |= xstate_zmm;
9610           else if (i.index_reg->reg_type.bitfield.ymmword)
9611             i.xstate |= xstate_ymm;
9612           else if (i.index_reg->reg_type.bitfield.xmmword)
9613             i.xstate |= xstate_xmm;
9614         }
9615
9616       /* vzeroall / vzeroupper */
9617       if (i.tm.base_opcode == 0x77 && i.tm.cpu_flags.bitfield.cpuavx)
9618         i.xstate |= xstate_ymm;
9619
9620       if ((i.xstate & xstate_xmm)
9621           /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
9622           || (i.tm.base_opcode == 0xae
9623               && (i.tm.cpu_flags.bitfield.cpusse
9624                   || i.tm.cpu_flags.bitfield.cpuavx))
9625           || i.tm.cpu_flags.bitfield.cpuwidekl
9626           || i.tm.cpu_flags.bitfield.cpukl)
9627         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
9628
9629       if ((i.xstate & xstate_ymm) == xstate_ymm)
9630         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
9631       if ((i.xstate & xstate_zmm) == xstate_zmm)
9632         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
9633       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
9634         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
9635       if (i.tm.cpu_flags.bitfield.cpufxsr)
9636         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
9637       if (i.tm.cpu_flags.bitfield.cpuxsave)
9638         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
9639       if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
9640         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
9641       if (i.tm.cpu_flags.bitfield.cpuxsavec)
9642         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
9643
9644       if (x86_feature_2_used
9645           || i.tm.cpu_flags.bitfield.cpucmov
9646           || i.tm.cpu_flags.bitfield.cpusyscall
9647           || (i.tm.opcode_modifier.opcodespace == SPACE_0F
9648               && i.tm.base_opcode == 0xc7
9649               && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
9650               && i.tm.extension_opcode == 1) /* cmpxchg8b */)
9651         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
9652       if (i.tm.cpu_flags.bitfield.cpusse3
9653           || i.tm.cpu_flags.bitfield.cpussse3
9654           || i.tm.cpu_flags.bitfield.cpusse4_1
9655           || i.tm.cpu_flags.bitfield.cpusse4_2
9656           || i.tm.cpu_flags.bitfield.cpucx16
9657           || i.tm.cpu_flags.bitfield.cpupopcnt
9658           /* LAHF-SAHF insns in 64-bit mode.  */
9659           || (flag_code == CODE_64BIT
9660               && (i.tm.base_opcode | 1) == 0x9f
9661               && i.tm.opcode_modifier.opcodespace == SPACE_BASE))
9662         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
9663       if (i.tm.cpu_flags.bitfield.cpuavx
9664           || i.tm.cpu_flags.bitfield.cpuavx2
9665           /* Any VEX encoded insns execpt for AVX512F, AVX512BW, AVX512DQ,
9666              XOP, FMA4, LPW, TBM, and AMX.  */
9667           || (i.tm.opcode_modifier.vex
9668               && !i.tm.cpu_flags.bitfield.cpuavx512f
9669               && !i.tm.cpu_flags.bitfield.cpuavx512bw
9670               && !i.tm.cpu_flags.bitfield.cpuavx512dq
9671               && !i.tm.cpu_flags.bitfield.cpuxop
9672               && !i.tm.cpu_flags.bitfield.cpufma4
9673               && !i.tm.cpu_flags.bitfield.cpulwp
9674               && !i.tm.cpu_flags.bitfield.cputbm
9675               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
9676           || i.tm.cpu_flags.bitfield.cpuf16c
9677           || i.tm.cpu_flags.bitfield.cpufma
9678           || i.tm.cpu_flags.bitfield.cpulzcnt
9679           || i.tm.cpu_flags.bitfield.cpumovbe
9680           || i.tm.cpu_flags.bitfield.cpuxsaves
9681           || (x86_feature_2_used
9682               & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
9683                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
9684                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
9685         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
9686       if (i.tm.cpu_flags.bitfield.cpuavx512f
9687           || i.tm.cpu_flags.bitfield.cpuavx512bw
9688           || i.tm.cpu_flags.bitfield.cpuavx512dq
9689           || i.tm.cpu_flags.bitfield.cpuavx512vl
9690           /* Any EVEX encoded insns except for AVX512ER, AVX512PF,
9691              AVX512-4FMAPS, and AVX512-4VNNIW.  */
9692           || (i.tm.opcode_modifier.evex
9693               && !i.tm.cpu_flags.bitfield.cpuavx512er
9694               && !i.tm.cpu_flags.bitfield.cpuavx512pf
9695               && !i.tm.cpu_flags.bitfield.cpuavx512_4fmaps
9696               && !i.tm.cpu_flags.bitfield.cpuavx512_4vnniw))
9697         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
9698     }
9699 #endif
9700
9701   /* Tie dwarf2 debug info to the address at the start of the insn.
9702      We can't do this after the insn has been output as the current
9703      frag may have been closed off.  eg. by frag_var.  */
9704   dwarf2_emit_insn (0);
9705
9706   insn_start_frag = frag_now;
9707   insn_start_off = frag_now_fix ();
9708
9709   if (add_branch_padding_frag_p (&branch, &mf_jcc))
9710     {
9711       char *p;
9712       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
9713       unsigned int max_branch_padding_size = 14;
9714
9715       /* Align section to boundary.  */
9716       record_alignment (now_seg, align_branch_power);
9717
9718       /* Make room for padding.  */
9719       frag_grow (max_branch_padding_size);
9720
9721       /* Start of the padding.  */
9722       p = frag_more (0);
9723
9724       fragP = frag_now;
9725
9726       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
9727                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
9728                 NULL, 0, p);
9729
9730       fragP->tc_frag_data.mf_type = mf_jcc;
9731       fragP->tc_frag_data.branch_type = branch;
9732       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
9733     }
9734
9735   if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT)
9736       && !pre_386_16bit_warned)
9737     {
9738       as_warn (_("use .code16 to ensure correct addressing mode"));
9739       pre_386_16bit_warned = true;
9740     }
9741
9742   /* Output jumps.  */
9743   if (i.tm.opcode_modifier.jump == JUMP)
9744     output_branch ();
9745   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
9746            || i.tm.opcode_modifier.jump == JUMP_DWORD)
9747     output_jump ();
9748   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
9749     output_interseg_jump ();
9750   else
9751     {
9752       /* Output normal instructions here.  */
9753       char *p;
9754       unsigned char *q;
9755       unsigned int j;
9756       enum mf_cmp_kind mf_cmp;
9757
9758       if (avoid_fence
9759           && (i.tm.base_opcode == 0xaee8
9760               || i.tm.base_opcode == 0xaef0
9761               || i.tm.base_opcode == 0xaef8))
9762         {
9763           /* Encode lfence, mfence, and sfence as
9764              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
9765           if (flag_code == CODE_16BIT)
9766             as_bad (_("Cannot convert `%s' in 16-bit mode"), insn_name (&i.tm));
9767           else if (omit_lock_prefix)
9768             as_bad (_("Cannot convert `%s' with `-momit-lock-prefix=yes' in effect"),
9769                     insn_name (&i.tm));
9770           else if (now_seg != absolute_section)
9771             {
9772               offsetT val = 0x240483f0ULL;
9773
9774               p = frag_more (5);
9775               md_number_to_chars (p, val, 5);
9776             }
9777           else
9778             abs_section_offset += 5;
9779           return;
9780         }
9781
9782       /* Some processors fail on LOCK prefix. This options makes
9783          assembler ignore LOCK prefix and serves as a workaround.  */
9784       if (omit_lock_prefix)
9785         {
9786           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
9787               && i.tm.opcode_modifier.isprefix)
9788             return;
9789           i.prefix[LOCK_PREFIX] = 0;
9790         }
9791
9792       if (branch)
9793         /* Skip if this is a branch.  */
9794         ;
9795       else if (add_fused_jcc_padding_frag_p (&mf_cmp))
9796         {
9797           /* Make room for padding.  */
9798           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
9799           p = frag_more (0);
9800
9801           fragP = frag_now;
9802
9803           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
9804                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
9805                     NULL, 0, p);
9806
9807           fragP->tc_frag_data.mf_type = mf_cmp;
9808           fragP->tc_frag_data.branch_type = align_branch_fused;
9809           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
9810         }
9811       else if (add_branch_prefix_frag_p ())
9812         {
9813           unsigned int max_prefix_size = align_branch_prefix_size;
9814
9815           /* Make room for padding.  */
9816           frag_grow (max_prefix_size);
9817           p = frag_more (0);
9818
9819           fragP = frag_now;
9820
9821           frag_var (rs_machine_dependent, max_prefix_size, 0,
9822                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
9823                     NULL, 0, p);
9824
9825           fragP->tc_frag_data.max_bytes = max_prefix_size;
9826         }
9827
9828       /* Since the VEX/EVEX prefix contains the implicit prefix, we
9829          don't need the explicit prefix.  */
9830       if (!is_any_vex_encoding (&i.tm))
9831         {
9832           switch (i.tm.opcode_modifier.opcodeprefix)
9833             {
9834             case PREFIX_0X66:
9835               add_prefix (0x66);
9836               break;
9837             case PREFIX_0XF2:
9838               add_prefix (0xf2);
9839               break;
9840             case PREFIX_0XF3:
9841               if (!i.tm.cpu_flags.bitfield.cpupadlock
9842                   || (i.prefix[REP_PREFIX] != 0xf3))
9843                 add_prefix (0xf3);
9844               break;
9845             case PREFIX_NONE:
9846               switch (i.opcode_length)
9847                 {
9848                 case 2:
9849                   break;
9850                 case 1:
9851                   /* Check for pseudo prefixes.  */
9852                   if (!i.tm.opcode_modifier.isprefix || i.tm.base_opcode)
9853                     break;
9854                   as_bad_where (insn_start_frag->fr_file,
9855                                 insn_start_frag->fr_line,
9856                                 _("pseudo prefix without instruction"));
9857                   return;
9858                 default:
9859                   abort ();
9860                 }
9861               break;
9862             default:
9863               abort ();
9864             }
9865
9866 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
9867           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
9868              R_X86_64_GOTTPOFF relocation so that linker can safely
9869              perform IE->LE optimization.  A dummy REX_OPCODE prefix
9870              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
9871              relocation for GDesc -> IE/LE optimization.  */
9872           if (x86_elf_abi == X86_64_X32_ABI
9873               && i.operands == 2
9874               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
9875                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
9876               && i.prefix[REX_PREFIX] == 0)
9877             add_prefix (REX_OPCODE);
9878 #endif
9879
9880           /* The prefix bytes.  */
9881           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
9882             if (*q)
9883               frag_opcode_byte (*q);
9884         }
9885       else
9886         {
9887           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
9888             if (*q)
9889               switch (j)
9890                 {
9891                 case SEG_PREFIX:
9892                 case ADDR_PREFIX:
9893                   frag_opcode_byte (*q);
9894                   break;
9895                 default:
9896                   /* There should be no other prefixes for instructions
9897                      with VEX prefix.  */
9898                   abort ();
9899                 }
9900
9901           /* For EVEX instructions i.vrex should become 0 after
9902              build_evex_prefix.  For VEX instructions upper 16 registers
9903              aren't available, so VREX should be 0.  */
9904           if (i.vrex)
9905             abort ();
9906           /* Now the VEX prefix.  */
9907           if (now_seg != absolute_section)
9908             {
9909               p = frag_more (i.vex.length);
9910               for (j = 0; j < i.vex.length; j++)
9911                 p[j] = i.vex.bytes[j];
9912             }
9913           else
9914             abs_section_offset += i.vex.length;
9915         }
9916
9917       /* Now the opcode; be careful about word order here!  */
9918       j = i.opcode_length;
9919       if (!i.vex.length)
9920         switch (i.tm.opcode_modifier.opcodespace)
9921           {
9922           case SPACE_BASE:
9923             break;
9924           case SPACE_0F:
9925             ++j;
9926             break;
9927           case SPACE_0F38:
9928           case SPACE_0F3A:
9929             j += 2;
9930             break;
9931           default:
9932             abort ();
9933           }
9934
9935       if (now_seg == absolute_section)
9936         abs_section_offset += j;
9937       else if (j == 1)
9938         {
9939           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
9940         }
9941       else
9942         {
9943           p = frag_more (j);
9944           if (!i.vex.length
9945               && i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9946             {
9947               *p++ = 0x0f;
9948               if (i.tm.opcode_modifier.opcodespace != SPACE_0F)
9949                 *p++ = i.tm.opcode_modifier.opcodespace == SPACE_0F38
9950                        ? 0x38 : 0x3a;
9951             }
9952
9953           switch (i.opcode_length)
9954             {
9955             case 2:
9956               /* Put out high byte first: can't use md_number_to_chars!  */
9957               *p++ = (i.tm.base_opcode >> 8) & 0xff;
9958               /* Fall through.  */
9959             case 1:
9960               *p = i.tm.base_opcode & 0xff;
9961               break;
9962             default:
9963               abort ();
9964               break;
9965             }
9966
9967         }
9968
9969       /* Now the modrm byte and sib byte (if present).  */
9970       if (i.tm.opcode_modifier.modrm)
9971         {
9972           frag_opcode_byte ((i.rm.regmem << 0)
9973                              | (i.rm.reg << 3)
9974                              | (i.rm.mode << 6));
9975           /* If i.rm.regmem == ESP (4)
9976              && i.rm.mode != (Register mode)
9977              && not 16 bit
9978              ==> need second modrm byte.  */
9979           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
9980               && i.rm.mode != 3
9981               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
9982             frag_opcode_byte ((i.sib.base << 0)
9983                               | (i.sib.index << 3)
9984                               | (i.sib.scale << 6));
9985         }
9986
9987       if (i.disp_operands)
9988         output_disp (insn_start_frag, insn_start_off);
9989
9990       if (i.imm_operands)
9991         output_imm (insn_start_frag, insn_start_off);
9992
9993       /*
9994        * frag_now_fix () returning plain abs_section_offset when we're in the
9995        * absolute section, and abs_section_offset not getting updated as data
9996        * gets added to the frag breaks the logic below.
9997        */
9998       if (now_seg != absolute_section)
9999         {
10000           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
10001           if (j > 15)
10002             as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
10003                      j);
10004           else if (fragP)
10005             {
10006               /* NB: Don't add prefix with GOTPC relocation since
10007                  output_disp() above depends on the fixed encoding
10008                  length.  Can't add prefix with TLS relocation since
10009                  it breaks TLS linker optimization.  */
10010               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
10011               /* Prefix count on the current instruction.  */
10012               unsigned int count = i.vex.length;
10013               unsigned int k;
10014               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
10015                 /* REX byte is encoded in VEX/EVEX prefix.  */
10016                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
10017                   count++;
10018
10019               /* Count prefixes for extended opcode maps.  */
10020               if (!i.vex.length)
10021                 switch (i.tm.opcode_modifier.opcodespace)
10022                   {
10023                   case SPACE_BASE:
10024                     break;
10025                   case SPACE_0F:
10026                     count++;
10027                     break;
10028                   case SPACE_0F38:
10029                   case SPACE_0F3A:
10030                     count += 2;
10031                     break;
10032                   default:
10033                     abort ();
10034                   }
10035
10036               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
10037                   == BRANCH_PREFIX)
10038                 {
10039                   /* Set the maximum prefix size in BRANCH_PREFIX
10040                      frag.  */
10041                   if (fragP->tc_frag_data.max_bytes > max)
10042                     fragP->tc_frag_data.max_bytes = max;
10043                   if (fragP->tc_frag_data.max_bytes > count)
10044                     fragP->tc_frag_data.max_bytes -= count;
10045                   else
10046                     fragP->tc_frag_data.max_bytes = 0;
10047                 }
10048               else
10049                 {
10050                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
10051                      frag.  */
10052                   unsigned int max_prefix_size;
10053                   if (align_branch_prefix_size > max)
10054                     max_prefix_size = max;
10055                   else
10056                     max_prefix_size = align_branch_prefix_size;
10057                   if (max_prefix_size > count)
10058                     fragP->tc_frag_data.max_prefix_length
10059                       = max_prefix_size - count;
10060                 }
10061
10062               /* Use existing segment prefix if possible.  Use CS
10063                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
10064                  segment prefix with ESP/EBP base register and use DS
10065                  segment prefix without ESP/EBP base register.  */
10066               if (i.prefix[SEG_PREFIX])
10067                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
10068               else if (flag_code == CODE_64BIT)
10069                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
10070               else if (i.base_reg
10071                        && (i.base_reg->reg_num == 4
10072                            || i.base_reg->reg_num == 5))
10073                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
10074               else
10075                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
10076             }
10077         }
10078     }
10079
10080   /* NB: Don't work with COND_JUMP86 without i386.  */
10081   if (align_branch_power
10082       && now_seg != absolute_section
10083       && cpu_arch_flags.bitfield.cpui386)
10084     {
10085       /* Terminate each frag so that we can add prefix and check for
10086          fused jcc.  */
10087       frag_wane (frag_now);
10088       frag_new (0);
10089     }
10090
10091 #ifdef DEBUG386
10092   if (flag_debug)
10093     {
10094       pi ("" /*line*/, &i);
10095     }
10096 #endif /* DEBUG386  */
10097 }
10098
10099 /* Return the size of the displacement operand N.  */
10100
10101 static int
10102 disp_size (unsigned int n)
10103 {
10104   int size = 4;
10105
10106   if (i.types[n].bitfield.disp64)
10107     size = 8;
10108   else if (i.types[n].bitfield.disp8)
10109     size = 1;
10110   else if (i.types[n].bitfield.disp16)
10111     size = 2;
10112   return size;
10113 }
10114
10115 /* Return the size of the immediate operand N.  */
10116
10117 static int
10118 imm_size (unsigned int n)
10119 {
10120   int size = 4;
10121   if (i.types[n].bitfield.imm64)
10122     size = 8;
10123   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
10124     size = 1;
10125   else if (i.types[n].bitfield.imm16)
10126     size = 2;
10127   return size;
10128 }
10129
10130 static void
10131 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
10132 {
10133   char *p;
10134   unsigned int n;
10135
10136   for (n = 0; n < i.operands; n++)
10137     {
10138       if (operand_type_check (i.types[n], disp))
10139         {
10140           int size = disp_size (n);
10141
10142           if (now_seg == absolute_section)
10143             abs_section_offset += size;
10144           else if (i.op[n].disps->X_op == O_constant)
10145             {
10146               offsetT val = i.op[n].disps->X_add_number;
10147
10148               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
10149                                      size);
10150               p = frag_more (size);
10151               md_number_to_chars (p, val, size);
10152             }
10153           else
10154             {
10155               enum bfd_reloc_code_real reloc_type;
10156               bool pcrel = (i.flags[n] & Operand_PCrel) != 0;
10157               bool sign = (flag_code == CODE_64BIT && size == 4
10158                            && (!want_disp32 (&i.tm)
10159                                || (i.tm.opcode_modifier.jump && !i.jumpabsolute
10160                                    && !i.types[n].bitfield.baseindex)))
10161                           || pcrel;
10162               fixS *fixP;
10163
10164               /* We can't have 8 bit displacement here.  */
10165               gas_assert (!i.types[n].bitfield.disp8);
10166
10167               /* The PC relative address is computed relative
10168                  to the instruction boundary, so in case immediate
10169                  fields follows, we need to adjust the value.  */
10170               if (pcrel && i.imm_operands)
10171                 {
10172                   unsigned int n1;
10173                   int sz = 0;
10174
10175                   for (n1 = 0; n1 < i.operands; n1++)
10176                     if (operand_type_check (i.types[n1], imm))
10177                       {
10178                         /* Only one immediate is allowed for PC
10179                            relative address.  */
10180                         gas_assert (sz == 0);
10181                         sz = imm_size (n1);
10182                         i.op[n].disps->X_add_number -= sz;
10183                       }
10184                   /* We should find the immediate.  */
10185                   gas_assert (sz != 0);
10186                 }
10187
10188               p = frag_more (size);
10189               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
10190               if (GOT_symbol
10191                   && GOT_symbol == i.op[n].disps->X_add_symbol
10192                   && (((reloc_type == BFD_RELOC_32
10193                         || reloc_type == BFD_RELOC_X86_64_32S
10194                         || (reloc_type == BFD_RELOC_64
10195                             && object_64bit))
10196                        && (i.op[n].disps->X_op == O_symbol
10197                            || (i.op[n].disps->X_op == O_add
10198                                && ((symbol_get_value_expression
10199                                     (i.op[n].disps->X_op_symbol)->X_op)
10200                                    == O_subtract))))
10201                       || reloc_type == BFD_RELOC_32_PCREL))
10202                 {
10203                   if (!object_64bit)
10204                     {
10205                       reloc_type = BFD_RELOC_386_GOTPC;
10206                       i.has_gotpc_tls_reloc = true;
10207                       i.op[n].disps->X_add_number +=
10208                         encoding_length (insn_start_frag, insn_start_off, p);
10209                     }
10210                   else if (reloc_type == BFD_RELOC_64)
10211                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10212                   else
10213                     /* Don't do the adjustment for x86-64, as there
10214                        the pcrel addressing is relative to the _next_
10215                        insn, and that is taken care of in other code.  */
10216                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10217                 }
10218               else if (align_branch_power)
10219                 {
10220                   switch (reloc_type)
10221                     {
10222                     case BFD_RELOC_386_TLS_GD:
10223                     case BFD_RELOC_386_TLS_LDM:
10224                     case BFD_RELOC_386_TLS_IE:
10225                     case BFD_RELOC_386_TLS_IE_32:
10226                     case BFD_RELOC_386_TLS_GOTIE:
10227                     case BFD_RELOC_386_TLS_GOTDESC:
10228                     case BFD_RELOC_386_TLS_DESC_CALL:
10229                     case BFD_RELOC_X86_64_TLSGD:
10230                     case BFD_RELOC_X86_64_TLSLD:
10231                     case BFD_RELOC_X86_64_GOTTPOFF:
10232                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
10233                     case BFD_RELOC_X86_64_TLSDESC_CALL:
10234                       i.has_gotpc_tls_reloc = true;
10235                     default:
10236                       break;
10237                     }
10238                 }
10239               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
10240                                   size, i.op[n].disps, pcrel,
10241                                   reloc_type);
10242
10243               if (flag_code == CODE_64BIT && size == 4 && pcrel
10244                   && !i.prefix[ADDR_PREFIX])
10245                 fixP->fx_signed = 1;
10246
10247               /* Check for "call/jmp *mem", "mov mem, %reg",
10248                  "test %reg, mem" and "binop mem, %reg" where binop
10249                  is one of adc, add, and, cmp, or, sbb, sub, xor
10250                  instructions without data prefix.  Always generate
10251                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
10252               if (i.prefix[DATA_PREFIX] == 0
10253                   && (generate_relax_relocations
10254                       || (!object_64bit
10255                           && i.rm.mode == 0
10256                           && i.rm.regmem == 5))
10257                   && (i.rm.mode == 2
10258                       || (i.rm.mode == 0 && i.rm.regmem == 5))
10259                   && i.tm.opcode_modifier.opcodespace == SPACE_BASE
10260                   && ((i.operands == 1
10261                        && i.tm.base_opcode == 0xff
10262                        && (i.rm.reg == 2 || i.rm.reg == 4))
10263                       || (i.operands == 2
10264                           && (i.tm.base_opcode == 0x8b
10265                               || i.tm.base_opcode == 0x85
10266                               || (i.tm.base_opcode & ~0x38) == 0x03))))
10267                 {
10268                   if (object_64bit)
10269                     {
10270                       fixP->fx_tcbit = i.rex != 0;
10271                       if (i.base_reg
10272                           && (i.base_reg->reg_num == RegIP))
10273                       fixP->fx_tcbit2 = 1;
10274                     }
10275                   else
10276                     fixP->fx_tcbit2 = 1;
10277                 }
10278             }
10279         }
10280     }
10281 }
10282
10283 static void
10284 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
10285 {
10286   char *p;
10287   unsigned int n;
10288
10289   for (n = 0; n < i.operands; n++)
10290     {
10291       if (operand_type_check (i.types[n], imm))
10292         {
10293           int size = imm_size (n);
10294
10295           if (now_seg == absolute_section)
10296             abs_section_offset += size;
10297           else if (i.op[n].imms->X_op == O_constant)
10298             {
10299               offsetT val;
10300
10301               val = offset_in_range (i.op[n].imms->X_add_number,
10302                                      size);
10303               p = frag_more (size);
10304               md_number_to_chars (p, val, size);
10305             }
10306           else
10307             {
10308               /* Not absolute_section.
10309                  Need a 32-bit fixup (don't support 8bit
10310                  non-absolute imms).  Try to support other
10311                  sizes ...  */
10312               enum bfd_reloc_code_real reloc_type;
10313               int sign;
10314
10315               if (i.types[n].bitfield.imm32s
10316                   && (i.suffix == QWORD_MNEM_SUFFIX
10317                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)))
10318                 sign = 1;
10319               else
10320                 sign = 0;
10321
10322               p = frag_more (size);
10323               reloc_type = reloc (size, 0, sign, i.reloc[n]);
10324
10325               /*   This is tough to explain.  We end up with this one if we
10326                * have operands that look like
10327                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
10328                * obtain the absolute address of the GOT, and it is strongly
10329                * preferable from a performance point of view to avoid using
10330                * a runtime relocation for this.  The actual sequence of
10331                * instructions often look something like:
10332                *
10333                *        call    .L66
10334                * .L66:
10335                *        popl    %ebx
10336                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
10337                *
10338                *   The call and pop essentially return the absolute address
10339                * of the label .L66 and store it in %ebx.  The linker itself
10340                * will ultimately change the first operand of the addl so
10341                * that %ebx points to the GOT, but to keep things simple, the
10342                * .o file must have this operand set so that it generates not
10343                * the absolute address of .L66, but the absolute address of
10344                * itself.  This allows the linker itself simply treat a GOTPC
10345                * relocation as asking for a pcrel offset to the GOT to be
10346                * added in, and the addend of the relocation is stored in the
10347                * operand field for the instruction itself.
10348                *
10349                *   Our job here is to fix the operand so that it would add
10350                * the correct offset so that %ebx would point to itself.  The
10351                * thing that is tricky is that .-.L66 will point to the
10352                * beginning of the instruction, so we need to further modify
10353                * the operand so that it will point to itself.  There are
10354                * other cases where you have something like:
10355                *
10356                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
10357                *
10358                * and here no correction would be required.  Internally in
10359                * the assembler we treat operands of this form as not being
10360                * pcrel since the '.' is explicitly mentioned, and I wonder
10361                * whether it would simplify matters to do it this way.  Who
10362                * knows.  In earlier versions of the PIC patches, the
10363                * pcrel_adjust field was used to store the correction, but
10364                * since the expression is not pcrel, I felt it would be
10365                * confusing to do it this way.  */
10366
10367               if ((reloc_type == BFD_RELOC_32
10368                    || reloc_type == BFD_RELOC_X86_64_32S
10369                    || reloc_type == BFD_RELOC_64)
10370                   && GOT_symbol
10371                   && GOT_symbol == i.op[n].imms->X_add_symbol
10372                   && (i.op[n].imms->X_op == O_symbol
10373                       || (i.op[n].imms->X_op == O_add
10374                           && ((symbol_get_value_expression
10375                                (i.op[n].imms->X_op_symbol)->X_op)
10376                               == O_subtract))))
10377                 {
10378                   if (!object_64bit)
10379                     reloc_type = BFD_RELOC_386_GOTPC;
10380                   else if (size == 4)
10381                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10382                   else if (size == 8)
10383                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10384                   i.has_gotpc_tls_reloc = true;
10385                   i.op[n].imms->X_add_number +=
10386                     encoding_length (insn_start_frag, insn_start_off, p);
10387                 }
10388               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
10389                            i.op[n].imms, 0, reloc_type);
10390             }
10391         }
10392     }
10393 }
10394 \f
10395 /* x86_cons_fix_new is called via the expression parsing code when a
10396    reloc is needed.  We use this hook to get the correct .got reloc.  */
10397 static int cons_sign = -1;
10398
10399 void
10400 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
10401                   expressionS *exp, bfd_reloc_code_real_type r)
10402 {
10403   r = reloc (len, 0, cons_sign, r);
10404
10405 #ifdef TE_PE
10406   if (exp->X_op == O_secrel)
10407     {
10408       exp->X_op = O_symbol;
10409       r = BFD_RELOC_32_SECREL;
10410     }
10411   else if (exp->X_op == O_secidx)
10412     r = BFD_RELOC_16_SECIDX;
10413 #endif
10414
10415   fix_new_exp (frag, off, len, exp, 0, r);
10416 }
10417
10418 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
10419    purpose of the `.dc.a' internal pseudo-op.  */
10420
10421 int
10422 x86_address_bytes (void)
10423 {
10424   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
10425     return 4;
10426   return stdoutput->arch_info->bits_per_address / 8;
10427 }
10428
10429 #if (!(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
10430      || defined (LEX_AT)) && !defined (TE_PE)
10431 # define lex_got(reloc, adjust, types) NULL
10432 #else
10433 /* Parse operands of the form
10434    <symbol>@GOTOFF+<nnn>
10435    and similar .plt or .got references.
10436
10437    If we find one, set up the correct relocation in RELOC and copy the
10438    input string, minus the `@GOTOFF' into a malloc'd buffer for
10439    parsing by the calling routine.  Return this buffer, and if ADJUST
10440    is non-null set it to the length of the string we removed from the
10441    input line.  Otherwise return NULL.  */
10442 static char *
10443 lex_got (enum bfd_reloc_code_real *rel,
10444          int *adjust,
10445          i386_operand_type *types)
10446 {
10447   /* Some of the relocations depend on the size of what field is to
10448      be relocated.  But in our callers i386_immediate and i386_displacement
10449      we don't yet know the operand size (this will be set by insn
10450      matching).  Hence we record the word32 relocation here,
10451      and adjust the reloc according to the real size in reloc().  */
10452   static const struct
10453   {
10454     const char *str;
10455     int len;
10456     const enum bfd_reloc_code_real rel[2];
10457     const i386_operand_type types64;
10458     bool need_GOT_symbol;
10459   }
10460     gotrel[] =
10461   {
10462
10463 #define OPERAND_TYPE_IMM32_32S_DISP32 { .bitfield = \
10464   { .imm32 = 1, .imm32s = 1, .disp32 = 1 } }
10465 #define OPERAND_TYPE_IMM32_32S_64_DISP32 { .bitfield = \
10466   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1 } }
10467 #define OPERAND_TYPE_IMM32_32S_64_DISP32_64 { .bitfield = \
10468   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1, .disp64 = 1 } }
10469 #define OPERAND_TYPE_IMM64_DISP64 { .bitfield = \
10470   { .imm64 = 1, .disp64 = 1 } }
10471
10472 #ifndef TE_PE
10473 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10474     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
10475                                         BFD_RELOC_SIZE32 },
10476       { .bitfield = { .imm32 = 1, .imm64 = 1 } }, false },
10477 #endif
10478     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
10479                                        BFD_RELOC_X86_64_PLTOFF64 },
10480       { .bitfield = { .imm64 = 1 } }, true },
10481     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
10482                                        BFD_RELOC_X86_64_PLT32    },
10483       OPERAND_TYPE_IMM32_32S_DISP32, false },
10484     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
10485                                        BFD_RELOC_X86_64_GOTPLT64 },
10486       OPERAND_TYPE_IMM64_DISP64, true },
10487     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
10488                                        BFD_RELOC_X86_64_GOTOFF64 },
10489       OPERAND_TYPE_IMM64_DISP64, true },
10490     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
10491                                        BFD_RELOC_X86_64_GOTPCREL },
10492       OPERAND_TYPE_IMM32_32S_DISP32, true },
10493     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
10494                                        BFD_RELOC_X86_64_TLSGD    },
10495       OPERAND_TYPE_IMM32_32S_DISP32, true },
10496     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
10497                                        _dummy_first_bfd_reloc_code_real },
10498       OPERAND_TYPE_NONE, true },
10499     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
10500                                        BFD_RELOC_X86_64_TLSLD    },
10501       OPERAND_TYPE_IMM32_32S_DISP32, true },
10502     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
10503                                        BFD_RELOC_X86_64_GOTTPOFF },
10504       OPERAND_TYPE_IMM32_32S_DISP32, true },
10505     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
10506                                        BFD_RELOC_X86_64_TPOFF32  },
10507       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10508     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
10509                                        _dummy_first_bfd_reloc_code_real },
10510       OPERAND_TYPE_NONE, true },
10511     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
10512                                        BFD_RELOC_X86_64_DTPOFF32 },
10513       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10514     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
10515                                        _dummy_first_bfd_reloc_code_real },
10516       OPERAND_TYPE_NONE, true },
10517     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
10518                                        _dummy_first_bfd_reloc_code_real },
10519       OPERAND_TYPE_NONE, true },
10520     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
10521                                        BFD_RELOC_X86_64_GOT32    },
10522       OPERAND_TYPE_IMM32_32S_64_DISP32, true },
10523     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
10524                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
10525       OPERAND_TYPE_IMM32_32S_DISP32, true },
10526     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
10527                                        BFD_RELOC_X86_64_TLSDESC_CALL },
10528       OPERAND_TYPE_IMM32_32S_DISP32, true },
10529 #else /* TE_PE */
10530     { STRING_COMMA_LEN ("SECREL32"), { BFD_RELOC_32_SECREL,
10531                                        BFD_RELOC_32_SECREL },
10532       OPERAND_TYPE_IMM32_32S_64_DISP32_64, false },
10533 #endif
10534
10535 #undef OPERAND_TYPE_IMM32_32S_DISP32
10536 #undef OPERAND_TYPE_IMM32_32S_64_DISP32
10537 #undef OPERAND_TYPE_IMM32_32S_64_DISP32_64
10538 #undef OPERAND_TYPE_IMM64_DISP64
10539
10540   };
10541   char *cp;
10542   unsigned int j;
10543
10544 #if defined (OBJ_MAYBE_ELF) && !defined (TE_PE)
10545   if (!IS_ELF)
10546     return NULL;
10547 #endif
10548
10549   for (cp = input_line_pointer; *cp != '@'; cp++)
10550     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
10551       return NULL;
10552
10553   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
10554     {
10555       int len = gotrel[j].len;
10556       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
10557         {
10558           if (gotrel[j].rel[object_64bit] != 0)
10559             {
10560               int first, second;
10561               char *tmpbuf, *past_reloc;
10562
10563               *rel = gotrel[j].rel[object_64bit];
10564
10565               if (types)
10566                 {
10567                   if (flag_code != CODE_64BIT)
10568                     {
10569                       types->bitfield.imm32 = 1;
10570                       types->bitfield.disp32 = 1;
10571                     }
10572                   else
10573                     *types = gotrel[j].types64;
10574                 }
10575
10576               if (gotrel[j].need_GOT_symbol && GOT_symbol == NULL)
10577                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
10578
10579               /* The length of the first part of our input line.  */
10580               first = cp - input_line_pointer;
10581
10582               /* The second part goes from after the reloc token until
10583                  (and including) an end_of_line char or comma.  */
10584               past_reloc = cp + 1 + len;
10585               cp = past_reloc;
10586               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
10587                 ++cp;
10588               second = cp + 1 - past_reloc;
10589
10590               /* Allocate and copy string.  The trailing NUL shouldn't
10591                  be necessary, but be safe.  */
10592               tmpbuf = XNEWVEC (char, first + second + 2);
10593               memcpy (tmpbuf, input_line_pointer, first);
10594               if (second != 0 && *past_reloc != ' ')
10595                 /* Replace the relocation token with ' ', so that
10596                    errors like foo@GOTOFF1 will be detected.  */
10597                 tmpbuf[first++] = ' ';
10598               else
10599                 /* Increment length by 1 if the relocation token is
10600                    removed.  */
10601                 len++;
10602               if (adjust)
10603                 *adjust = len;
10604               memcpy (tmpbuf + first, past_reloc, second);
10605               tmpbuf[first + second] = '\0';
10606               return tmpbuf;
10607             }
10608
10609           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10610                   gotrel[j].str, 1 << (5 + object_64bit));
10611           return NULL;
10612         }
10613     }
10614
10615   /* Might be a symbol version string.  Don't as_bad here.  */
10616   return NULL;
10617 }
10618 #endif
10619
10620 bfd_reloc_code_real_type
10621 x86_cons (expressionS *exp, int size)
10622 {
10623   bfd_reloc_code_real_type got_reloc = NO_RELOC;
10624
10625 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
10626       && !defined (LEX_AT)) \
10627     || defined (TE_PE)
10628   intel_syntax = -intel_syntax;
10629
10630   exp->X_md = 0;
10631   if (size == 4 || (object_64bit && size == 8))
10632     {
10633       /* Handle @GOTOFF and the like in an expression.  */
10634       char *save;
10635       char *gotfree_input_line;
10636       int adjust = 0;
10637
10638       save = input_line_pointer;
10639       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
10640       if (gotfree_input_line)
10641         input_line_pointer = gotfree_input_line;
10642
10643       expression (exp);
10644
10645       if (gotfree_input_line)
10646         {
10647           /* expression () has merrily parsed up to the end of line,
10648              or a comma - in the wrong buffer.  Transfer how far
10649              input_line_pointer has moved to the right buffer.  */
10650           input_line_pointer = (save
10651                                 + (input_line_pointer - gotfree_input_line)
10652                                 + adjust);
10653           free (gotfree_input_line);
10654           if (exp->X_op == O_constant
10655               || exp->X_op == O_absent
10656               || exp->X_op == O_illegal
10657               || exp->X_op == O_register
10658               || exp->X_op == O_big)
10659             {
10660               char c = *input_line_pointer;
10661               *input_line_pointer = 0;
10662               as_bad (_("missing or invalid expression `%s'"), save);
10663               *input_line_pointer = c;
10664             }
10665           else if ((got_reloc == BFD_RELOC_386_PLT32
10666                     || got_reloc == BFD_RELOC_X86_64_PLT32)
10667                    && exp->X_op != O_symbol)
10668             {
10669               char c = *input_line_pointer;
10670               *input_line_pointer = 0;
10671               as_bad (_("invalid PLT expression `%s'"), save);
10672               *input_line_pointer = c;
10673             }
10674         }
10675     }
10676   else
10677     expression (exp);
10678
10679   intel_syntax = -intel_syntax;
10680
10681   if (intel_syntax)
10682     i386_intel_simplify (exp);
10683 #else
10684   expression (exp);
10685 #endif
10686
10687   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
10688   if (size == 4 && exp->X_op == O_constant && !object_64bit)
10689     exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10690
10691   return got_reloc;
10692 }
10693
10694 static void
10695 signed_cons (int size)
10696 {
10697   if (object_64bit)
10698     cons_sign = 1;
10699   cons (size);
10700   cons_sign = -1;
10701 }
10702
10703 #ifdef TE_PE
10704 static void
10705 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
10706 {
10707   expressionS exp;
10708
10709   do
10710     {
10711       expression (&exp);
10712       if (exp.X_op == O_symbol)
10713         exp.X_op = O_secrel;
10714
10715       emit_expr (&exp, 4);
10716     }
10717   while (*input_line_pointer++ == ',');
10718
10719   input_line_pointer--;
10720   demand_empty_rest_of_line ();
10721 }
10722
10723 static void
10724 pe_directive_secidx (int dummy ATTRIBUTE_UNUSED)
10725 {
10726   expressionS exp;
10727
10728   do
10729     {
10730       expression (&exp);
10731       if (exp.X_op == O_symbol)
10732         exp.X_op = O_secidx;
10733
10734       emit_expr (&exp, 2);
10735     }
10736   while (*input_line_pointer++ == ',');
10737
10738   input_line_pointer--;
10739   demand_empty_rest_of_line ();
10740 }
10741 #endif
10742
10743 /* Handle Rounding Control / SAE specifiers.  */
10744
10745 static char *
10746 RC_SAE_specifier (const char *pstr)
10747 {
10748   unsigned int j;
10749
10750   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
10751     {
10752       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
10753         {
10754           if (i.rounding.type != rc_none)
10755             {
10756               as_bad (_("duplicated `{%s}'"), RC_NamesTable[j].name);
10757               return NULL;
10758             }
10759
10760           i.rounding.type = RC_NamesTable[j].type;
10761
10762           return (char *)(pstr + RC_NamesTable[j].len);
10763         }
10764     }
10765
10766   return NULL;
10767 }
10768
10769 /* Handle Vector operations.  */
10770
10771 static char *
10772 check_VecOperations (char *op_string)
10773 {
10774   const reg_entry *mask;
10775   const char *saved;
10776   char *end_op;
10777
10778   while (*op_string)
10779     {
10780       saved = op_string;
10781       if (*op_string == '{')
10782         {
10783           op_string++;
10784
10785           /* Check broadcasts.  */
10786           if (startswith (op_string, "1to"))
10787             {
10788               unsigned int bcst_type;
10789
10790               if (i.broadcast.type)
10791                 goto duplicated_vec_op;
10792
10793               op_string += 3;
10794               if (*op_string == '8')
10795                 bcst_type = 8;
10796               else if (*op_string == '4')
10797                 bcst_type = 4;
10798               else if (*op_string == '2')
10799                 bcst_type = 2;
10800               else if (*op_string == '1'
10801                        && *(op_string+1) == '6')
10802                 {
10803                   bcst_type = 16;
10804                   op_string++;
10805                 }
10806               else if (*op_string == '3'
10807                        && *(op_string+1) == '2')
10808                 {
10809                   bcst_type = 32;
10810                   op_string++;
10811                 }
10812               else
10813                 {
10814                   as_bad (_("Unsupported broadcast: `%s'"), saved);
10815                   return NULL;
10816                 }
10817               op_string++;
10818
10819               i.broadcast.type = bcst_type;
10820               i.broadcast.operand = this_operand;
10821             }
10822           /* Check masking operation.  */
10823           else if ((mask = parse_register (op_string, &end_op)) != NULL)
10824             {
10825               if (mask == &bad_reg)
10826                 return NULL;
10827
10828               /* k0 can't be used for write mask.  */
10829               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
10830                 {
10831                   as_bad (_("`%s%s' can't be used for write mask"),
10832                           register_prefix, mask->reg_name);
10833                   return NULL;
10834                 }
10835
10836               if (!i.mask.reg)
10837                 {
10838                   i.mask.reg = mask;
10839                   i.mask.operand = this_operand;
10840                 }
10841               else if (i.mask.reg->reg_num)
10842                 goto duplicated_vec_op;
10843               else
10844                 {
10845                   i.mask.reg = mask;
10846
10847                   /* Only "{z}" is allowed here.  No need to check
10848                      zeroing mask explicitly.  */
10849                   if (i.mask.operand != (unsigned int) this_operand)
10850                     {
10851                       as_bad (_("invalid write mask `%s'"), saved);
10852                       return NULL;
10853                     }
10854                 }
10855
10856               op_string = end_op;
10857             }
10858           /* Check zeroing-flag for masking operation.  */
10859           else if (*op_string == 'z')
10860             {
10861               if (!i.mask.reg)
10862                 {
10863                   i.mask.reg = reg_k0;
10864                   i.mask.zeroing = 1;
10865                   i.mask.operand = this_operand;
10866                 }
10867               else
10868                 {
10869                   if (i.mask.zeroing)
10870                     {
10871                     duplicated_vec_op:
10872                       as_bad (_("duplicated `%s'"), saved);
10873                       return NULL;
10874                     }
10875
10876                   i.mask.zeroing = 1;
10877
10878                   /* Only "{%k}" is allowed here.  No need to check mask
10879                      register explicitly.  */
10880                   if (i.mask.operand != (unsigned int) this_operand)
10881                     {
10882                       as_bad (_("invalid zeroing-masking `%s'"),
10883                               saved);
10884                       return NULL;
10885                     }
10886                 }
10887
10888               op_string++;
10889             }
10890           else if (intel_syntax
10891                    && (op_string = RC_SAE_specifier (op_string)) != NULL)
10892             i.rounding.modifier = true;
10893           else
10894             goto unknown_vec_op;
10895
10896           if (*op_string != '}')
10897             {
10898               as_bad (_("missing `}' in `%s'"), saved);
10899               return NULL;
10900             }
10901           op_string++;
10902
10903           /* Strip whitespace since the addition of pseudo prefixes
10904              changed how the scrubber treats '{'.  */
10905           if (is_space_char (*op_string))
10906             ++op_string;
10907
10908           continue;
10909         }
10910     unknown_vec_op:
10911       /* We don't know this one.  */
10912       as_bad (_("unknown vector operation: `%s'"), saved);
10913       return NULL;
10914     }
10915
10916   if (i.mask.reg && i.mask.zeroing && !i.mask.reg->reg_num)
10917     {
10918       as_bad (_("zeroing-masking only allowed with write mask"));
10919       return NULL;
10920     }
10921
10922   return op_string;
10923 }
10924
10925 static int
10926 i386_immediate (char *imm_start)
10927 {
10928   char *save_input_line_pointer;
10929   char *gotfree_input_line;
10930   segT exp_seg = 0;
10931   expressionS *exp;
10932   i386_operand_type types;
10933
10934   operand_type_set (&types, ~0);
10935
10936   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
10937     {
10938       as_bad (_("at most %d immediate operands are allowed"),
10939               MAX_IMMEDIATE_OPERANDS);
10940       return 0;
10941     }
10942
10943   exp = &im_expressions[i.imm_operands++];
10944   i.op[this_operand].imms = exp;
10945
10946   if (is_space_char (*imm_start))
10947     ++imm_start;
10948
10949   save_input_line_pointer = input_line_pointer;
10950   input_line_pointer = imm_start;
10951
10952   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10953   if (gotfree_input_line)
10954     input_line_pointer = gotfree_input_line;
10955
10956   exp_seg = expression (exp);
10957
10958   SKIP_WHITESPACE ();
10959   if (*input_line_pointer)
10960     as_bad (_("junk `%s' after expression"), input_line_pointer);
10961
10962   input_line_pointer = save_input_line_pointer;
10963   if (gotfree_input_line)
10964     {
10965       free (gotfree_input_line);
10966
10967       if (exp->X_op == O_constant)
10968         exp->X_op = O_illegal;
10969     }
10970
10971   if (exp_seg == reg_section)
10972     {
10973       as_bad (_("illegal immediate register operand %s"), imm_start);
10974       return 0;
10975     }
10976
10977   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
10978 }
10979
10980 static int
10981 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10982                          i386_operand_type types, const char *imm_start)
10983 {
10984   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
10985     {
10986       if (imm_start)
10987         as_bad (_("missing or invalid immediate expression `%s'"),
10988                 imm_start);
10989       return 0;
10990     }
10991   else if (exp->X_op == O_constant)
10992     {
10993       /* Size it properly later.  */
10994       i.types[this_operand].bitfield.imm64 = 1;
10995
10996       /* If not 64bit, sign/zero extend val, to account for wraparound
10997          when !BFD64.  */
10998       if (flag_code != CODE_64BIT)
10999         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
11000     }
11001 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
11002   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
11003            && exp_seg != absolute_section
11004            && exp_seg != text_section
11005            && exp_seg != data_section
11006            && exp_seg != bss_section
11007            && exp_seg != undefined_section
11008            && !bfd_is_com_section (exp_seg))
11009     {
11010       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
11011       return 0;
11012     }
11013 #endif
11014   else
11015     {
11016       /* This is an address.  The size of the address will be
11017          determined later, depending on destination register,
11018          suffix, or the default for the section.  */
11019       i.types[this_operand].bitfield.imm8 = 1;
11020       i.types[this_operand].bitfield.imm16 = 1;
11021       i.types[this_operand].bitfield.imm32 = 1;
11022       i.types[this_operand].bitfield.imm32s = 1;
11023       i.types[this_operand].bitfield.imm64 = 1;
11024       i.types[this_operand] = operand_type_and (i.types[this_operand],
11025                                                 types);
11026     }
11027
11028   return 1;
11029 }
11030
11031 static char *
11032 i386_scale (char *scale)
11033 {
11034   offsetT val;
11035   char *save = input_line_pointer;
11036
11037   input_line_pointer = scale;
11038   val = get_absolute_expression ();
11039
11040   switch (val)
11041     {
11042     case 1:
11043       i.log2_scale_factor = 0;
11044       break;
11045     case 2:
11046       i.log2_scale_factor = 1;
11047       break;
11048     case 4:
11049       i.log2_scale_factor = 2;
11050       break;
11051     case 8:
11052       i.log2_scale_factor = 3;
11053       break;
11054     default:
11055       {
11056         char sep = *input_line_pointer;
11057
11058         *input_line_pointer = '\0';
11059         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
11060                 scale);
11061         *input_line_pointer = sep;
11062         input_line_pointer = save;
11063         return NULL;
11064       }
11065     }
11066   if (i.log2_scale_factor != 0 && i.index_reg == 0)
11067     {
11068       as_warn (_("scale factor of %d without an index register"),
11069                1 << i.log2_scale_factor);
11070       i.log2_scale_factor = 0;
11071     }
11072   scale = input_line_pointer;
11073   input_line_pointer = save;
11074   return scale;
11075 }
11076
11077 static int
11078 i386_displacement (char *disp_start, char *disp_end)
11079 {
11080   expressionS *exp;
11081   segT exp_seg = 0;
11082   char *save_input_line_pointer;
11083   char *gotfree_input_line;
11084   int override;
11085   i386_operand_type bigdisp, types = anydisp;
11086   int ret;
11087
11088   if (i.disp_operands == MAX_MEMORY_OPERANDS)
11089     {
11090       as_bad (_("at most %d displacement operands are allowed"),
11091               MAX_MEMORY_OPERANDS);
11092       return 0;
11093     }
11094
11095   operand_type_set (&bigdisp, 0);
11096   if (i.jumpabsolute
11097       || i.types[this_operand].bitfield.baseindex
11098       || (current_templates->start->opcode_modifier.jump != JUMP
11099           && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
11100     {
11101       i386_addressing_mode ();
11102       override = (i.prefix[ADDR_PREFIX] != 0);
11103       if (flag_code == CODE_64BIT)
11104         {
11105           bigdisp.bitfield.disp32 = 1;
11106           if (!override)
11107             bigdisp.bitfield.disp64 = 1;
11108         }
11109       else if ((flag_code == CODE_16BIT) ^ override)
11110           bigdisp.bitfield.disp16 = 1;
11111       else
11112           bigdisp.bitfield.disp32 = 1;
11113     }
11114   else
11115     {
11116       /* For PC-relative branches, the width of the displacement may be
11117          dependent upon data size, but is never dependent upon address size.
11118          Also make sure to not unintentionally match against a non-PC-relative
11119          branch template.  */
11120       static templates aux_templates;
11121       const insn_template *t = current_templates->start;
11122       bool has_intel64 = false;
11123
11124       aux_templates.start = t;
11125       while (++t < current_templates->end)
11126         {
11127           if (t->opcode_modifier.jump
11128               != current_templates->start->opcode_modifier.jump)
11129             break;
11130           if ((t->opcode_modifier.isa64 >= INTEL64))
11131             has_intel64 = true;
11132         }
11133       if (t < current_templates->end)
11134         {
11135           aux_templates.end = t;
11136           current_templates = &aux_templates;
11137         }
11138
11139       override = (i.prefix[DATA_PREFIX] != 0);
11140       if (flag_code == CODE_64BIT)
11141         {
11142           if ((override || i.suffix == WORD_MNEM_SUFFIX)
11143               && (!intel64 || !has_intel64))
11144             bigdisp.bitfield.disp16 = 1;
11145           else
11146             bigdisp.bitfield.disp32 = 1;
11147         }
11148       else
11149         {
11150           if (!override)
11151             override = (i.suffix == (flag_code != CODE_16BIT
11152                                      ? WORD_MNEM_SUFFIX
11153                                      : LONG_MNEM_SUFFIX));
11154           bigdisp.bitfield.disp32 = 1;
11155           if ((flag_code == CODE_16BIT) ^ override)
11156             {
11157               bigdisp.bitfield.disp32 = 0;
11158               bigdisp.bitfield.disp16 = 1;
11159             }
11160         }
11161     }
11162   i.types[this_operand] = operand_type_or (i.types[this_operand],
11163                                            bigdisp);
11164
11165   exp = &disp_expressions[i.disp_operands];
11166   i.op[this_operand].disps = exp;
11167   i.disp_operands++;
11168   save_input_line_pointer = input_line_pointer;
11169   input_line_pointer = disp_start;
11170   END_STRING_AND_SAVE (disp_end);
11171
11172 #ifndef GCC_ASM_O_HACK
11173 #define GCC_ASM_O_HACK 0
11174 #endif
11175 #if GCC_ASM_O_HACK
11176   END_STRING_AND_SAVE (disp_end + 1);
11177   if (i.types[this_operand].bitfield.baseIndex
11178       && displacement_string_end[-1] == '+')
11179     {
11180       /* This hack is to avoid a warning when using the "o"
11181          constraint within gcc asm statements.
11182          For instance:
11183
11184          #define _set_tssldt_desc(n,addr,limit,type) \
11185          __asm__ __volatile__ ( \
11186          "movw %w2,%0\n\t" \
11187          "movw %w1,2+%0\n\t" \
11188          "rorl $16,%1\n\t" \
11189          "movb %b1,4+%0\n\t" \
11190          "movb %4,5+%0\n\t" \
11191          "movb $0,6+%0\n\t" \
11192          "movb %h1,7+%0\n\t" \
11193          "rorl $16,%1" \
11194          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
11195
11196          This works great except that the output assembler ends
11197          up looking a bit weird if it turns out that there is
11198          no offset.  You end up producing code that looks like:
11199
11200          #APP
11201          movw $235,(%eax)
11202          movw %dx,2+(%eax)
11203          rorl $16,%edx
11204          movb %dl,4+(%eax)
11205          movb $137,5+(%eax)
11206          movb $0,6+(%eax)
11207          movb %dh,7+(%eax)
11208          rorl $16,%edx
11209          #NO_APP
11210
11211          So here we provide the missing zero.  */
11212
11213       *displacement_string_end = '0';
11214     }
11215 #endif
11216   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
11217   if (gotfree_input_line)
11218     input_line_pointer = gotfree_input_line;
11219
11220   exp_seg = expression (exp);
11221
11222   SKIP_WHITESPACE ();
11223   if (*input_line_pointer)
11224     as_bad (_("junk `%s' after expression"), input_line_pointer);
11225 #if GCC_ASM_O_HACK
11226   RESTORE_END_STRING (disp_end + 1);
11227 #endif
11228   input_line_pointer = save_input_line_pointer;
11229   if (gotfree_input_line)
11230     {
11231       free (gotfree_input_line);
11232
11233       if (exp->X_op == O_constant || exp->X_op == O_register)
11234         exp->X_op = O_illegal;
11235     }
11236
11237   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
11238
11239   RESTORE_END_STRING (disp_end);
11240
11241   return ret;
11242 }
11243
11244 static int
11245 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
11246                             i386_operand_type types, const char *disp_start)
11247 {
11248   int ret = 1;
11249
11250   /* We do this to make sure that the section symbol is in
11251      the symbol table.  We will ultimately change the relocation
11252      to be relative to the beginning of the section.  */
11253   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
11254       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
11255       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11256     {
11257       if (exp->X_op != O_symbol)
11258         goto inv_disp;
11259
11260       if (S_IS_LOCAL (exp->X_add_symbol)
11261           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
11262           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
11263         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
11264       exp->X_op = O_subtract;
11265       exp->X_op_symbol = GOT_symbol;
11266       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
11267         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
11268       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11269         i.reloc[this_operand] = BFD_RELOC_64;
11270       else
11271         i.reloc[this_operand] = BFD_RELOC_32;
11272     }
11273
11274   else if (exp->X_op == O_absent
11275            || exp->X_op == O_illegal
11276            || exp->X_op == O_big)
11277     {
11278     inv_disp:
11279       as_bad (_("missing or invalid displacement expression `%s'"),
11280               disp_start);
11281       ret = 0;
11282     }
11283
11284   else if (exp->X_op == O_constant)
11285     {
11286       /* Sizing gets taken care of by optimize_disp().
11287
11288          If not 64bit, sign/zero extend val, to account for wraparound
11289          when !BFD64.  */
11290       if (flag_code != CODE_64BIT)
11291         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
11292     }
11293
11294 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
11295   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
11296            && exp_seg != absolute_section
11297            && exp_seg != text_section
11298            && exp_seg != data_section
11299            && exp_seg != bss_section
11300            && exp_seg != undefined_section
11301            && !bfd_is_com_section (exp_seg))
11302     {
11303       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
11304       ret = 0;
11305     }
11306 #endif
11307
11308   else if (current_templates->start->opcode_modifier.jump == JUMP_BYTE)
11309     i.types[this_operand].bitfield.disp8 = 1;
11310
11311   /* Check if this is a displacement only operand.  */
11312   if (!i.types[this_operand].bitfield.baseindex)
11313     i.types[this_operand] =
11314       operand_type_or (operand_type_and_not (i.types[this_operand], anydisp),
11315                        operand_type_and (i.types[this_operand], types));
11316
11317   return ret;
11318 }
11319
11320 /* Return the active addressing mode, taking address override and
11321    registers forming the address into consideration.  Update the
11322    address override prefix if necessary.  */
11323
11324 static enum flag_code
11325 i386_addressing_mode (void)
11326 {
11327   enum flag_code addr_mode;
11328
11329   if (i.prefix[ADDR_PREFIX])
11330     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
11331   else if (flag_code == CODE_16BIT
11332            && current_templates->start->cpu_flags.bitfield.cpumpx
11333            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
11334               from md_assemble() by "is not a valid base/index expression"
11335               when there is a base and/or index.  */
11336            && !i.types[this_operand].bitfield.baseindex)
11337     {
11338       /* MPX insn memory operands with neither base nor index must be forced
11339          to use 32-bit addressing in 16-bit mode.  */
11340       addr_mode = CODE_32BIT;
11341       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11342       ++i.prefixes;
11343       gas_assert (!i.types[this_operand].bitfield.disp16);
11344       gas_assert (!i.types[this_operand].bitfield.disp32);
11345     }
11346   else
11347     {
11348       addr_mode = flag_code;
11349
11350 #if INFER_ADDR_PREFIX
11351       if (i.mem_operands == 0)
11352         {
11353           /* Infer address prefix from the first memory operand.  */
11354           const reg_entry *addr_reg = i.base_reg;
11355
11356           if (addr_reg == NULL)
11357             addr_reg = i.index_reg;
11358
11359           if (addr_reg)
11360             {
11361               if (addr_reg->reg_type.bitfield.dword)
11362                 addr_mode = CODE_32BIT;
11363               else if (flag_code != CODE_64BIT
11364                        && addr_reg->reg_type.bitfield.word)
11365                 addr_mode = CODE_16BIT;
11366
11367               if (addr_mode != flag_code)
11368                 {
11369                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11370                   i.prefixes += 1;
11371                   /* Change the size of any displacement too.  At most one
11372                      of Disp16 or Disp32 is set.
11373                      FIXME.  There doesn't seem to be any real need for
11374                      separate Disp16 and Disp32 flags.  The same goes for
11375                      Imm16 and Imm32.  Removing them would probably clean
11376                      up the code quite a lot.  */
11377                   if (flag_code != CODE_64BIT
11378                       && (i.types[this_operand].bitfield.disp16
11379                           || i.types[this_operand].bitfield.disp32))
11380                     {
11381                       static const i386_operand_type disp16_32 = {
11382                         .bitfield = { .disp16 = 1, .disp32 = 1 }
11383                       };
11384
11385                       i.types[this_operand]
11386                         = operand_type_xor (i.types[this_operand], disp16_32);
11387                     }
11388                 }
11389             }
11390         }
11391 #endif
11392     }
11393
11394   return addr_mode;
11395 }
11396
11397 /* Make sure the memory operand we've been dealt is valid.
11398    Return 1 on success, 0 on a failure.  */
11399
11400 static int
11401 i386_index_check (const char *operand_string)
11402 {
11403   const char *kind = "base/index";
11404   enum flag_code addr_mode = i386_addressing_mode ();
11405   const insn_template *t = current_templates->end - 1;
11406
11407   if (t->opcode_modifier.isstring)
11408     {
11409       /* Memory operands of string insns are special in that they only allow
11410          a single register (rDI, rSI, or rBX) as their memory address.  */
11411       const reg_entry *expected_reg;
11412       static const char *di_si[][2] =
11413         {
11414           { "esi", "edi" },
11415           { "si", "di" },
11416           { "rsi", "rdi" }
11417         };
11418       static const char *bx[] = { "ebx", "bx", "rbx" };
11419
11420       kind = "string address";
11421
11422       if (t->opcode_modifier.prefixok == PrefixRep)
11423         {
11424           int es_op = t->opcode_modifier.isstring - IS_STRING_ES_OP0;
11425           int op = 0;
11426
11427           if (!t->operand_types[0].bitfield.baseindex
11428               || ((!i.mem_operands != !intel_syntax)
11429                   && t->operand_types[1].bitfield.baseindex))
11430             op = 1;
11431           expected_reg
11432             = (const reg_entry *) str_hash_find (reg_hash,
11433                                                  di_si[addr_mode][op == es_op]);
11434         }
11435       else
11436         expected_reg
11437           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
11438
11439       if (i.base_reg != expected_reg
11440           || i.index_reg
11441           || operand_type_check (i.types[this_operand], disp))
11442         {
11443           /* The second memory operand must have the same size as
11444              the first one.  */
11445           if (i.mem_operands
11446               && i.base_reg
11447               && !((addr_mode == CODE_64BIT
11448                     && i.base_reg->reg_type.bitfield.qword)
11449                    || (addr_mode == CODE_32BIT
11450                        ? i.base_reg->reg_type.bitfield.dword
11451                        : i.base_reg->reg_type.bitfield.word)))
11452             goto bad_address;
11453
11454           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
11455                    operand_string,
11456                    intel_syntax ? '[' : '(',
11457                    register_prefix,
11458                    expected_reg->reg_name,
11459                    intel_syntax ? ']' : ')');
11460           return 1;
11461         }
11462       else
11463         return 1;
11464
11465     bad_address:
11466       as_bad (_("`%s' is not a valid %s expression"),
11467               operand_string, kind);
11468       return 0;
11469     }
11470   else
11471     {
11472       t = current_templates->start;
11473
11474       if (addr_mode != CODE_16BIT)
11475         {
11476           /* 32-bit/64-bit checks.  */
11477           if (i.disp_encoding == disp_encoding_16bit)
11478             {
11479             bad_disp:
11480               as_bad (_("invalid `%s' prefix"),
11481                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
11482               return 0;
11483             }
11484
11485           if ((i.base_reg
11486                && ((addr_mode == CODE_64BIT
11487                     ? !i.base_reg->reg_type.bitfield.qword
11488                     : !i.base_reg->reg_type.bitfield.dword)
11489                    || (i.index_reg && i.base_reg->reg_num == RegIP)
11490                    || i.base_reg->reg_num == RegIZ))
11491               || (i.index_reg
11492                   && !i.index_reg->reg_type.bitfield.xmmword
11493                   && !i.index_reg->reg_type.bitfield.ymmword
11494                   && !i.index_reg->reg_type.bitfield.zmmword
11495                   && ((addr_mode == CODE_64BIT
11496                        ? !i.index_reg->reg_type.bitfield.qword
11497                        : !i.index_reg->reg_type.bitfield.dword)
11498                       || !i.index_reg->reg_type.bitfield.baseindex)))
11499             goto bad_address;
11500
11501           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
11502           if ((t->opcode_modifier.opcodeprefix == PREFIX_0XF3
11503                && t->opcode_modifier.opcodespace == SPACE_0F
11504                && t->base_opcode == 0x1b)
11505               || (t->opcode_modifier.opcodeprefix == PREFIX_NONE
11506                   && t->opcode_modifier.opcodespace == SPACE_0F
11507                   && (t->base_opcode & ~1) == 0x1a)
11508               || t->opcode_modifier.sib == SIBMEM)
11509             {
11510               /* They cannot use RIP-relative addressing. */
11511               if (i.base_reg && i.base_reg->reg_num == RegIP)
11512                 {
11513                   as_bad (_("`%s' cannot be used here"), operand_string);
11514                   return 0;
11515                 }
11516
11517               /* bndldx and bndstx ignore their scale factor. */
11518               if (t->opcode_modifier.opcodeprefix == PREFIX_NONE
11519                   && t->opcode_modifier.opcodespace == SPACE_0F
11520                   && (t->base_opcode & ~1) == 0x1a
11521                   && i.log2_scale_factor)
11522                 as_warn (_("register scaling is being ignored here"));
11523             }
11524         }
11525       else
11526         {
11527           /* 16-bit checks.  */
11528           if (i.disp_encoding == disp_encoding_32bit)
11529             goto bad_disp;
11530
11531           if ((i.base_reg
11532                && (!i.base_reg->reg_type.bitfield.word
11533                    || !i.base_reg->reg_type.bitfield.baseindex))
11534               || (i.index_reg
11535                   && (!i.index_reg->reg_type.bitfield.word
11536                       || !i.index_reg->reg_type.bitfield.baseindex
11537                       || !(i.base_reg
11538                            && i.base_reg->reg_num < 6
11539                            && i.index_reg->reg_num >= 6
11540                            && i.log2_scale_factor == 0))))
11541             goto bad_address;
11542         }
11543     }
11544   return 1;
11545 }
11546
11547 /* Handle vector immediates.  */
11548
11549 static int
11550 RC_SAE_immediate (const char *imm_start)
11551 {
11552   const char *pstr = imm_start;
11553
11554   if (*pstr != '{')
11555     return 0;
11556
11557   pstr = RC_SAE_specifier (pstr + 1);
11558   if (pstr == NULL)
11559     return 0;
11560
11561   if (*pstr++ != '}')
11562     {
11563       as_bad (_("Missing '}': '%s'"), imm_start);
11564       return 0;
11565     }
11566   /* RC/SAE immediate string should contain nothing more.  */;
11567   if (*pstr != 0)
11568     {
11569       as_bad (_("Junk after '}': '%s'"), imm_start);
11570       return 0;
11571     }
11572
11573   /* Internally this doesn't count as an operand.  */
11574   --i.operands;
11575
11576   return 1;
11577 }
11578
11579 static INLINE bool starts_memory_operand (char c)
11580 {
11581   return ISDIGIT (c)
11582          || is_identifier_char (c)
11583          || strchr ("([\"+-!~", c);
11584 }
11585
11586 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
11587    on error.  */
11588
11589 static int
11590 i386_att_operand (char *operand_string)
11591 {
11592   const reg_entry *r;
11593   char *end_op;
11594   char *op_string = operand_string;
11595
11596   if (is_space_char (*op_string))
11597     ++op_string;
11598
11599   /* We check for an absolute prefix (differentiating,
11600      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
11601   if (*op_string == ABSOLUTE_PREFIX)
11602     {
11603       ++op_string;
11604       if (is_space_char (*op_string))
11605         ++op_string;
11606       i.jumpabsolute = true;
11607     }
11608
11609   /* Check if operand is a register.  */
11610   if ((r = parse_register (op_string, &end_op)) != NULL)
11611     {
11612       i386_operand_type temp;
11613
11614       if (r == &bad_reg)
11615         return 0;
11616
11617       /* Check for a segment override by searching for ':' after a
11618          segment register.  */
11619       op_string = end_op;
11620       if (is_space_char (*op_string))
11621         ++op_string;
11622       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
11623         {
11624           i.seg[i.mem_operands] = r;
11625
11626           /* Skip the ':' and whitespace.  */
11627           ++op_string;
11628           if (is_space_char (*op_string))
11629             ++op_string;
11630
11631           /* Handle case of %es:*foo.  */
11632           if (!i.jumpabsolute && *op_string == ABSOLUTE_PREFIX)
11633             {
11634               ++op_string;
11635               if (is_space_char (*op_string))
11636                 ++op_string;
11637               i.jumpabsolute = true;
11638             }
11639
11640           if (!starts_memory_operand (*op_string))
11641             {
11642               as_bad (_("bad memory operand `%s'"), op_string);
11643               return 0;
11644             }
11645           goto do_memory_reference;
11646         }
11647
11648       /* Handle vector operations.  */
11649       if (*op_string == '{')
11650         {
11651           op_string = check_VecOperations (op_string);
11652           if (op_string == NULL)
11653             return 0;
11654         }
11655
11656       if (*op_string)
11657         {
11658           as_bad (_("junk `%s' after register"), op_string);
11659           return 0;
11660         }
11661       temp = r->reg_type;
11662       temp.bitfield.baseindex = 0;
11663       i.types[this_operand] = operand_type_or (i.types[this_operand],
11664                                                temp);
11665       i.types[this_operand].bitfield.unspecified = 0;
11666       i.op[this_operand].regs = r;
11667       i.reg_operands++;
11668
11669       /* A GPR may follow an RC or SAE immediate only if a (vector) register
11670          operand was also present earlier on.  */
11671       if (i.rounding.type != rc_none && temp.bitfield.class == Reg
11672           && i.reg_operands == 1)
11673         {
11674           unsigned int j;
11675
11676           for (j = 0; j < ARRAY_SIZE (RC_NamesTable); ++j)
11677             if (i.rounding.type == RC_NamesTable[j].type)
11678               break;
11679           as_bad (_("`%s': misplaced `{%s}'"),
11680                   insn_name (current_templates->start), RC_NamesTable[j].name);
11681           return 0;
11682         }
11683     }
11684   else if (*op_string == REGISTER_PREFIX)
11685     {
11686       as_bad (_("bad register name `%s'"), op_string);
11687       return 0;
11688     }
11689   else if (*op_string == IMMEDIATE_PREFIX)
11690     {
11691       ++op_string;
11692       if (i.jumpabsolute)
11693         {
11694           as_bad (_("immediate operand illegal with absolute jump"));
11695           return 0;
11696         }
11697       if (!i386_immediate (op_string))
11698         return 0;
11699       if (i.rounding.type != rc_none)
11700         {
11701           as_bad (_("`%s': RC/SAE operand must follow immediate operands"),
11702                   insn_name (current_templates->start));
11703           return 0;
11704         }
11705     }
11706   else if (RC_SAE_immediate (operand_string))
11707     {
11708       /* If it is a RC or SAE immediate, do the necessary placement check:
11709          Only another immediate or a GPR may precede it.  */
11710       if (i.mem_operands || i.reg_operands + i.imm_operands > 1
11711           || (i.reg_operands == 1
11712               && i.op[0].regs->reg_type.bitfield.class != Reg))
11713         {
11714           as_bad (_("`%s': misplaced `%s'"),
11715                   insn_name (current_templates->start), operand_string);
11716           return 0;
11717         }
11718     }
11719   else if (starts_memory_operand (*op_string))
11720     {
11721       /* This is a memory reference of some sort.  */
11722       char *base_string;
11723
11724       /* Start and end of displacement string expression (if found).  */
11725       char *displacement_string_start;
11726       char *displacement_string_end;
11727
11728     do_memory_reference:
11729       /* Check for base index form.  We detect the base index form by
11730          looking for an ')' at the end of the operand, searching
11731          for the '(' matching it, and finding a REGISTER_PREFIX or ','
11732          after the '('.  */
11733       base_string = op_string + strlen (op_string);
11734
11735       /* Handle vector operations.  */
11736       --base_string;
11737       if (is_space_char (*base_string))
11738         --base_string;
11739
11740       if (*base_string == '}')
11741         {
11742           char *vop_start = NULL;
11743
11744           while (base_string-- > op_string)
11745             {
11746               if (*base_string == '"')
11747                 break;
11748               if (*base_string != '{')
11749                 continue;
11750
11751               vop_start = base_string;
11752
11753               --base_string;
11754               if (is_space_char (*base_string))
11755                 --base_string;
11756
11757               if (*base_string != '}')
11758                 break;
11759
11760               vop_start = NULL;
11761             }
11762
11763           if (!vop_start)
11764             {
11765               as_bad (_("unbalanced figure braces"));
11766               return 0;
11767             }
11768
11769           if (check_VecOperations (vop_start) == NULL)
11770             return 0;
11771         }
11772
11773       /* If we only have a displacement, set-up for it to be parsed later.  */
11774       displacement_string_start = op_string;
11775       displacement_string_end = base_string + 1;
11776
11777       if (*base_string == ')')
11778         {
11779           char *temp_string;
11780           unsigned int parens_not_balanced = 0;
11781           bool in_quotes = false;
11782
11783           /* We've already checked that the number of left & right ()'s are
11784              equal, and that there's a matching set of double quotes.  */
11785           end_op = base_string;
11786           for (temp_string = op_string; temp_string < end_op; temp_string++)
11787             {
11788               if (*temp_string == '\\' && temp_string[1] == '"')
11789                 ++temp_string;
11790               else if (*temp_string == '"')
11791                 in_quotes = !in_quotes;
11792               else if (!in_quotes)
11793                 {
11794                   if (*temp_string == '(' && !parens_not_balanced++)
11795                     base_string = temp_string;
11796                   if (*temp_string == ')')
11797                     --parens_not_balanced;
11798                 }
11799             }
11800
11801           temp_string = base_string;
11802
11803           /* Skip past '(' and whitespace.  */
11804           gas_assert (*base_string == '(');
11805           ++base_string;
11806           if (is_space_char (*base_string))
11807             ++base_string;
11808
11809           if (*base_string == ','
11810               || ((i.base_reg = parse_register (base_string, &end_op))
11811                   != NULL))
11812             {
11813               displacement_string_end = temp_string;
11814
11815               i.types[this_operand].bitfield.baseindex = 1;
11816
11817               if (i.base_reg)
11818                 {
11819                   if (i.base_reg == &bad_reg)
11820                     return 0;
11821                   base_string = end_op;
11822                   if (is_space_char (*base_string))
11823                     ++base_string;
11824                 }
11825
11826               /* There may be an index reg or scale factor here.  */
11827               if (*base_string == ',')
11828                 {
11829                   ++base_string;
11830                   if (is_space_char (*base_string))
11831                     ++base_string;
11832
11833                   if ((i.index_reg = parse_register (base_string, &end_op))
11834                       != NULL)
11835                     {
11836                       if (i.index_reg == &bad_reg)
11837                         return 0;
11838                       base_string = end_op;
11839                       if (is_space_char (*base_string))
11840                         ++base_string;
11841                       if (*base_string == ',')
11842                         {
11843                           ++base_string;
11844                           if (is_space_char (*base_string))
11845                             ++base_string;
11846                         }
11847                       else if (*base_string != ')')
11848                         {
11849                           as_bad (_("expecting `,' or `)' "
11850                                     "after index register in `%s'"),
11851                                   operand_string);
11852                           return 0;
11853                         }
11854                     }
11855                   else if (*base_string == REGISTER_PREFIX)
11856                     {
11857                       end_op = strchr (base_string, ',');
11858                       if (end_op)
11859                         *end_op = '\0';
11860                       as_bad (_("bad register name `%s'"), base_string);
11861                       return 0;
11862                     }
11863
11864                   /* Check for scale factor.  */
11865                   if (*base_string != ')')
11866                     {
11867                       char *end_scale = i386_scale (base_string);
11868
11869                       if (!end_scale)
11870                         return 0;
11871
11872                       base_string = end_scale;
11873                       if (is_space_char (*base_string))
11874                         ++base_string;
11875                       if (*base_string != ')')
11876                         {
11877                           as_bad (_("expecting `)' "
11878                                     "after scale factor in `%s'"),
11879                                   operand_string);
11880                           return 0;
11881                         }
11882                     }
11883                   else if (!i.index_reg)
11884                     {
11885                       as_bad (_("expecting index register or scale factor "
11886                                 "after `,'; got '%c'"),
11887                               *base_string);
11888                       return 0;
11889                     }
11890                 }
11891               else if (*base_string != ')')
11892                 {
11893                   as_bad (_("expecting `,' or `)' "
11894                             "after base register in `%s'"),
11895                           operand_string);
11896                   return 0;
11897                 }
11898             }
11899           else if (*base_string == REGISTER_PREFIX)
11900             {
11901               end_op = strchr (base_string, ',');
11902               if (end_op)
11903                 *end_op = '\0';
11904               as_bad (_("bad register name `%s'"), base_string);
11905               return 0;
11906             }
11907         }
11908
11909       /* If there's an expression beginning the operand, parse it,
11910          assuming displacement_string_start and
11911          displacement_string_end are meaningful.  */
11912       if (displacement_string_start != displacement_string_end)
11913         {
11914           if (!i386_displacement (displacement_string_start,
11915                                   displacement_string_end))
11916             return 0;
11917         }
11918
11919       /* Special case for (%dx) while doing input/output op.  */
11920       if (i.base_reg
11921           && i.base_reg->reg_type.bitfield.instance == RegD
11922           && i.base_reg->reg_type.bitfield.word
11923           && i.index_reg == 0
11924           && i.log2_scale_factor == 0
11925           && i.seg[i.mem_operands] == 0
11926           && !operand_type_check (i.types[this_operand], disp))
11927         {
11928           i.types[this_operand] = i.base_reg->reg_type;
11929           i.input_output_operand = true;
11930           return 1;
11931         }
11932
11933       if (i386_index_check (operand_string) == 0)
11934         return 0;
11935       i.flags[this_operand] |= Operand_Mem;
11936       i.mem_operands++;
11937     }
11938   else
11939     {
11940       /* It's not a memory operand; argh!  */
11941       as_bad (_("invalid char %s beginning operand %d `%s'"),
11942               output_invalid (*op_string),
11943               this_operand + 1,
11944               op_string);
11945       return 0;
11946     }
11947   return 1;                     /* Normal return.  */
11948 }
11949 \f
11950 /* Calculate the maximum variable size (i.e., excluding fr_fix)
11951    that an rs_machine_dependent frag may reach.  */
11952
11953 unsigned int
11954 i386_frag_max_var (fragS *frag)
11955 {
11956   /* The only relaxable frags are for jumps.
11957      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
11958   gas_assert (frag->fr_type == rs_machine_dependent);
11959   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
11960 }
11961
11962 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11963 static int
11964 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
11965 {
11966   /* STT_GNU_IFUNC symbol must go through PLT.  */
11967   if ((symbol_get_bfdsym (fr_symbol)->flags
11968        & BSF_GNU_INDIRECT_FUNCTION) != 0)
11969     return 0;
11970
11971   if (!S_IS_EXTERNAL (fr_symbol))
11972     /* Symbol may be weak or local.  */
11973     return !S_IS_WEAK (fr_symbol);
11974
11975   /* Global symbols with non-default visibility can't be preempted. */
11976   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
11977     return 1;
11978
11979   if (fr_var != NO_RELOC)
11980     switch ((enum bfd_reloc_code_real) fr_var)
11981       {
11982       case BFD_RELOC_386_PLT32:
11983       case BFD_RELOC_X86_64_PLT32:
11984         /* Symbol with PLT relocation may be preempted. */
11985         return 0;
11986       default:
11987         abort ();
11988       }
11989
11990   /* Global symbols with default visibility in a shared library may be
11991      preempted by another definition.  */
11992   return !shared;
11993 }
11994 #endif
11995
11996 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
11997    Note also work for Skylake and Cascadelake.
11998 ---------------------------------------------------------------------
11999 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
12000 | ------  | ----------- | ------- | -------- |
12001 |   Jo    |      N      |    N    |     Y    |
12002 |   Jno   |      N      |    N    |     Y    |
12003 |  Jc/Jb  |      Y      |    N    |     Y    |
12004 | Jae/Jnb |      Y      |    N    |     Y    |
12005 |  Je/Jz  |      Y      |    Y    |     Y    |
12006 | Jne/Jnz |      Y      |    Y    |     Y    |
12007 | Jna/Jbe |      Y      |    N    |     Y    |
12008 | Ja/Jnbe |      Y      |    N    |     Y    |
12009 |   Js    |      N      |    N    |     Y    |
12010 |   Jns   |      N      |    N    |     Y    |
12011 |  Jp/Jpe |      N      |    N    |     Y    |
12012 | Jnp/Jpo |      N      |    N    |     Y    |
12013 | Jl/Jnge |      Y      |    Y    |     Y    |
12014 | Jge/Jnl |      Y      |    Y    |     Y    |
12015 | Jle/Jng |      Y      |    Y    |     Y    |
12016 | Jg/Jnle |      Y      |    Y    |     Y    |
12017 ---------------------------------------------------------------------  */
12018 static int
12019 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
12020 {
12021   if (mf_cmp == mf_cmp_alu_cmp)
12022     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
12023             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
12024   if (mf_cmp == mf_cmp_incdec)
12025     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
12026             || mf_jcc == mf_jcc_jle);
12027   if (mf_cmp == mf_cmp_test_and)
12028     return 1;
12029   return 0;
12030 }
12031
12032 /* Return the next non-empty frag.  */
12033
12034 static fragS *
12035 i386_next_non_empty_frag (fragS *fragP)
12036 {
12037   /* There may be a frag with a ".fill 0" when there is no room in
12038      the current frag for frag_grow in output_insn.  */
12039   for (fragP = fragP->fr_next;
12040        (fragP != NULL
12041         && fragP->fr_type == rs_fill
12042         && fragP->fr_fix == 0);
12043        fragP = fragP->fr_next)
12044     ;
12045   return fragP;
12046 }
12047
12048 /* Return the next jcc frag after BRANCH_PADDING.  */
12049
12050 static fragS *
12051 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
12052 {
12053   fragS *branch_fragP;
12054   if (!pad_fragP)
12055     return NULL;
12056
12057   if (pad_fragP->fr_type == rs_machine_dependent
12058       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
12059           == BRANCH_PADDING))
12060     {
12061       branch_fragP = i386_next_non_empty_frag (pad_fragP);
12062       if (branch_fragP->fr_type != rs_machine_dependent)
12063         return NULL;
12064       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
12065           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
12066                                    pad_fragP->tc_frag_data.mf_type))
12067         return branch_fragP;
12068     }
12069
12070   return NULL;
12071 }
12072
12073 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
12074
12075 static void
12076 i386_classify_machine_dependent_frag (fragS *fragP)
12077 {
12078   fragS *cmp_fragP;
12079   fragS *pad_fragP;
12080   fragS *branch_fragP;
12081   fragS *next_fragP;
12082   unsigned int max_prefix_length;
12083
12084   if (fragP->tc_frag_data.classified)
12085     return;
12086
12087   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
12088      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
12089   for (next_fragP = fragP;
12090        next_fragP != NULL;
12091        next_fragP = next_fragP->fr_next)
12092     {
12093       next_fragP->tc_frag_data.classified = 1;
12094       if (next_fragP->fr_type == rs_machine_dependent)
12095         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
12096           {
12097           case BRANCH_PADDING:
12098             /* The BRANCH_PADDING frag must be followed by a branch
12099                frag.  */
12100             branch_fragP = i386_next_non_empty_frag (next_fragP);
12101             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
12102             break;
12103           case FUSED_JCC_PADDING:
12104             /* Check if this is a fused jcc:
12105                FUSED_JCC_PADDING
12106                CMP like instruction
12107                BRANCH_PADDING
12108                COND_JUMP
12109                */
12110             cmp_fragP = i386_next_non_empty_frag (next_fragP);
12111             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
12112             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
12113             if (branch_fragP)
12114               {
12115                 /* The BRANCH_PADDING frag is merged with the
12116                    FUSED_JCC_PADDING frag.  */
12117                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
12118                 /* CMP like instruction size.  */
12119                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
12120                 frag_wane (pad_fragP);
12121                 /* Skip to branch_fragP.  */
12122                 next_fragP = branch_fragP;
12123               }
12124             else if (next_fragP->tc_frag_data.max_prefix_length)
12125               {
12126                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
12127                    a fused jcc.  */
12128                 next_fragP->fr_subtype
12129                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
12130                 next_fragP->tc_frag_data.max_bytes
12131                   = next_fragP->tc_frag_data.max_prefix_length;
12132                 /* This will be updated in the BRANCH_PREFIX scan.  */
12133                 next_fragP->tc_frag_data.max_prefix_length = 0;
12134               }
12135             else
12136               frag_wane (next_fragP);
12137             break;
12138           }
12139     }
12140
12141   /* Stop if there is no BRANCH_PREFIX.  */
12142   if (!align_branch_prefix_size)
12143     return;
12144
12145   /* Scan for BRANCH_PREFIX.  */
12146   for (; fragP != NULL; fragP = fragP->fr_next)
12147     {
12148       if (fragP->fr_type != rs_machine_dependent
12149           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12150               != BRANCH_PREFIX))
12151         continue;
12152
12153       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
12154          COND_JUMP_PREFIX.  */
12155       max_prefix_length = 0;
12156       for (next_fragP = fragP;
12157            next_fragP != NULL;
12158            next_fragP = next_fragP->fr_next)
12159         {
12160           if (next_fragP->fr_type == rs_fill)
12161             /* Skip rs_fill frags.  */
12162             continue;
12163           else if (next_fragP->fr_type != rs_machine_dependent)
12164             /* Stop for all other frags.  */
12165             break;
12166
12167           /* rs_machine_dependent frags.  */
12168           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12169               == BRANCH_PREFIX)
12170             {
12171               /* Count BRANCH_PREFIX frags.  */
12172               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
12173                 {
12174                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
12175                   frag_wane (next_fragP);
12176                 }
12177               else
12178                 max_prefix_length
12179                   += next_fragP->tc_frag_data.max_bytes;
12180             }
12181           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12182                     == BRANCH_PADDING)
12183                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12184                        == FUSED_JCC_PADDING))
12185             {
12186               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
12187               fragP->tc_frag_data.u.padding_fragP = next_fragP;
12188               break;
12189             }
12190           else
12191             /* Stop for other rs_machine_dependent frags.  */
12192             break;
12193         }
12194
12195       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
12196
12197       /* Skip to the next frag.  */
12198       fragP = next_fragP;
12199     }
12200 }
12201
12202 /* Compute padding size for
12203
12204         FUSED_JCC_PADDING
12205         CMP like instruction
12206         BRANCH_PADDING
12207         COND_JUMP/UNCOND_JUMP
12208
12209    or
12210
12211         BRANCH_PADDING
12212         COND_JUMP/UNCOND_JUMP
12213  */
12214
12215 static int
12216 i386_branch_padding_size (fragS *fragP, offsetT address)
12217 {
12218   unsigned int offset, size, padding_size;
12219   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
12220
12221   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
12222   if (!address)
12223     address = fragP->fr_address;
12224   address += fragP->fr_fix;
12225
12226   /* CMP like instrunction size.  */
12227   size = fragP->tc_frag_data.cmp_size;
12228
12229   /* The base size of the branch frag.  */
12230   size += branch_fragP->fr_fix;
12231
12232   /* Add opcode and displacement bytes for the rs_machine_dependent
12233      branch frag.  */
12234   if (branch_fragP->fr_type == rs_machine_dependent)
12235     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
12236
12237   /* Check if branch is within boundary and doesn't end at the last
12238      byte.  */
12239   offset = address & ((1U << align_branch_power) - 1);
12240   if ((offset + size) >= (1U << align_branch_power))
12241     /* Padding needed to avoid crossing boundary.  */
12242     padding_size = (1U << align_branch_power) - offset;
12243   else
12244     /* No padding needed.  */
12245     padding_size = 0;
12246
12247   /* The return value may be saved in tc_frag_data.length which is
12248      unsigned byte.  */
12249   if (!fits_in_unsigned_byte (padding_size))
12250     abort ();
12251
12252   return padding_size;
12253 }
12254
12255 /* i386_generic_table_relax_frag()
12256
12257    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
12258    grow/shrink padding to align branch frags.  Hand others to
12259    relax_frag().  */
12260
12261 long
12262 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
12263 {
12264   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12265       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12266     {
12267       long padding_size = i386_branch_padding_size (fragP, 0);
12268       long grow = padding_size - fragP->tc_frag_data.length;
12269
12270       /* When the BRANCH_PREFIX frag is used, the computed address
12271          must match the actual address and there should be no padding.  */
12272       if (fragP->tc_frag_data.padding_address
12273           && (fragP->tc_frag_data.padding_address != fragP->fr_address
12274               || padding_size))
12275         abort ();
12276
12277       /* Update the padding size.  */
12278       if (grow)
12279         fragP->tc_frag_data.length = padding_size;
12280
12281       return grow;
12282     }
12283   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12284     {
12285       fragS *padding_fragP, *next_fragP;
12286       long padding_size, left_size, last_size;
12287
12288       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12289       if (!padding_fragP)
12290         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
12291         return (fragP->tc_frag_data.length
12292                 - fragP->tc_frag_data.last_length);
12293
12294       /* Compute the relative address of the padding frag in the very
12295         first time where the BRANCH_PREFIX frag sizes are zero.  */
12296       if (!fragP->tc_frag_data.padding_address)
12297         fragP->tc_frag_data.padding_address
12298           = padding_fragP->fr_address - (fragP->fr_address - stretch);
12299
12300       /* First update the last length from the previous interation.  */
12301       left_size = fragP->tc_frag_data.prefix_length;
12302       for (next_fragP = fragP;
12303            next_fragP != padding_fragP;
12304            next_fragP = next_fragP->fr_next)
12305         if (next_fragP->fr_type == rs_machine_dependent
12306             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12307                 == BRANCH_PREFIX))
12308           {
12309             if (left_size)
12310               {
12311                 int max = next_fragP->tc_frag_data.max_bytes;
12312                 if (max)
12313                   {
12314                     int size;
12315                     if (max > left_size)
12316                       size = left_size;
12317                     else
12318                       size = max;
12319                     left_size -= size;
12320                     next_fragP->tc_frag_data.last_length = size;
12321                   }
12322               }
12323             else
12324               next_fragP->tc_frag_data.last_length = 0;
12325           }
12326
12327       /* Check the padding size for the padding frag.  */
12328       padding_size = i386_branch_padding_size
12329         (padding_fragP, (fragP->fr_address
12330                          + fragP->tc_frag_data.padding_address));
12331
12332       last_size = fragP->tc_frag_data.prefix_length;
12333       /* Check if there is change from the last interation.  */
12334       if (padding_size == last_size)
12335         {
12336           /* Update the expected address of the padding frag.  */
12337           padding_fragP->tc_frag_data.padding_address
12338             = (fragP->fr_address + padding_size
12339                + fragP->tc_frag_data.padding_address);
12340           return 0;
12341         }
12342
12343       if (padding_size > fragP->tc_frag_data.max_prefix_length)
12344         {
12345           /* No padding if there is no sufficient room.  Clear the
12346              expected address of the padding frag.  */
12347           padding_fragP->tc_frag_data.padding_address = 0;
12348           padding_size = 0;
12349         }
12350       else
12351         /* Store the expected address of the padding frag.  */
12352         padding_fragP->tc_frag_data.padding_address
12353           = (fragP->fr_address + padding_size
12354              + fragP->tc_frag_data.padding_address);
12355
12356       fragP->tc_frag_data.prefix_length = padding_size;
12357
12358       /* Update the length for the current interation.  */
12359       left_size = padding_size;
12360       for (next_fragP = fragP;
12361            next_fragP != padding_fragP;
12362            next_fragP = next_fragP->fr_next)
12363         if (next_fragP->fr_type == rs_machine_dependent
12364             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12365                 == BRANCH_PREFIX))
12366           {
12367             if (left_size)
12368               {
12369                 int max = next_fragP->tc_frag_data.max_bytes;
12370                 if (max)
12371                   {
12372                     int size;
12373                     if (max > left_size)
12374                       size = left_size;
12375                     else
12376                       size = max;
12377                     left_size -= size;
12378                     next_fragP->tc_frag_data.length = size;
12379                   }
12380               }
12381             else
12382               next_fragP->tc_frag_data.length = 0;
12383           }
12384
12385       return (fragP->tc_frag_data.length
12386               - fragP->tc_frag_data.last_length);
12387     }
12388   return relax_frag (segment, fragP, stretch);
12389 }
12390
12391 /* md_estimate_size_before_relax()
12392
12393    Called just before relax() for rs_machine_dependent frags.  The x86
12394    assembler uses these frags to handle variable size jump
12395    instructions.
12396
12397    Any symbol that is now undefined will not become defined.
12398    Return the correct fr_subtype in the frag.
12399    Return the initial "guess for variable size of frag" to caller.
12400    The guess is actually the growth beyond the fixed part.  Whatever
12401    we do to grow the fixed or variable part contributes to our
12402    returned value.  */
12403
12404 int
12405 md_estimate_size_before_relax (fragS *fragP, segT segment)
12406 {
12407   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12408       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
12409       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12410     {
12411       i386_classify_machine_dependent_frag (fragP);
12412       return fragP->tc_frag_data.length;
12413     }
12414
12415   /* We've already got fragP->fr_subtype right;  all we have to do is
12416      check for un-relaxable symbols.  On an ELF system, we can't relax
12417      an externally visible symbol, because it may be overridden by a
12418      shared library.  */
12419   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
12420 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12421       || (IS_ELF
12422           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
12423                                                 fragP->fr_var))
12424 #endif
12425 #if defined (OBJ_COFF) && defined (TE_PE)
12426       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
12427           && S_IS_WEAK (fragP->fr_symbol))
12428 #endif
12429       )
12430     {
12431       /* Symbol is undefined in this segment, or we need to keep a
12432          reloc so that weak symbols can be overridden.  */
12433       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
12434       enum bfd_reloc_code_real reloc_type;
12435       unsigned char *opcode;
12436       int old_fr_fix;
12437       fixS *fixP = NULL;
12438
12439       if (fragP->fr_var != NO_RELOC)
12440         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
12441       else if (size == 2)
12442         reloc_type = BFD_RELOC_16_PCREL;
12443 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12444       else if (fragP->tc_frag_data.code64 && fragP->fr_offset == 0
12445                && need_plt32_p (fragP->fr_symbol))
12446         reloc_type = BFD_RELOC_X86_64_PLT32;
12447 #endif
12448       else
12449         reloc_type = BFD_RELOC_32_PCREL;
12450
12451       old_fr_fix = fragP->fr_fix;
12452       opcode = (unsigned char *) fragP->fr_opcode;
12453
12454       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
12455         {
12456         case UNCOND_JUMP:
12457           /* Make jmp (0xeb) a (d)word displacement jump.  */
12458           opcode[0] = 0xe9;
12459           fragP->fr_fix += size;
12460           fixP = fix_new (fragP, old_fr_fix, size,
12461                           fragP->fr_symbol,
12462                           fragP->fr_offset, 1,
12463                           reloc_type);
12464           break;
12465
12466         case COND_JUMP86:
12467           if (size == 2
12468               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
12469             {
12470               /* Negate the condition, and branch past an
12471                  unconditional jump.  */
12472               opcode[0] ^= 1;
12473               opcode[1] = 3;
12474               /* Insert an unconditional jump.  */
12475               opcode[2] = 0xe9;
12476               /* We added two extra opcode bytes, and have a two byte
12477                  offset.  */
12478               fragP->fr_fix += 2 + 2;
12479               fix_new (fragP, old_fr_fix + 2, 2,
12480                        fragP->fr_symbol,
12481                        fragP->fr_offset, 1,
12482                        reloc_type);
12483               break;
12484             }
12485           /* Fall through.  */
12486
12487         case COND_JUMP:
12488           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
12489             {
12490               fragP->fr_fix += 1;
12491               fixP = fix_new (fragP, old_fr_fix, 1,
12492                               fragP->fr_symbol,
12493                               fragP->fr_offset, 1,
12494                               BFD_RELOC_8_PCREL);
12495               fixP->fx_signed = 1;
12496               break;
12497             }
12498
12499           /* This changes the byte-displacement jump 0x7N
12500              to the (d)word-displacement jump 0x0f,0x8N.  */
12501           opcode[1] = opcode[0] + 0x10;
12502           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12503           /* We've added an opcode byte.  */
12504           fragP->fr_fix += 1 + size;
12505           fixP = fix_new (fragP, old_fr_fix + 1, size,
12506                           fragP->fr_symbol,
12507                           fragP->fr_offset, 1,
12508                           reloc_type);
12509           break;
12510
12511         default:
12512           BAD_CASE (fragP->fr_subtype);
12513           break;
12514         }
12515
12516       /* All jumps handled here are signed, but don't unconditionally use a
12517          signed limit check for 32 and 16 bit jumps as we want to allow wrap
12518          around at 4G (outside of 64-bit mode) and 64k.  */
12519       if (size == 4 && flag_code == CODE_64BIT)
12520         fixP->fx_signed = 1;
12521
12522       frag_wane (fragP);
12523       return fragP->fr_fix - old_fr_fix;
12524     }
12525
12526   /* Guess size depending on current relax state.  Initially the relax
12527      state will correspond to a short jump and we return 1, because
12528      the variable part of the frag (the branch offset) is one byte
12529      long.  However, we can relax a section more than once and in that
12530      case we must either set fr_subtype back to the unrelaxed state,
12531      or return the value for the appropriate branch.  */
12532   return md_relax_table[fragP->fr_subtype].rlx_length;
12533 }
12534
12535 /* Called after relax() is finished.
12536
12537    In:  Address of frag.
12538         fr_type == rs_machine_dependent.
12539         fr_subtype is what the address relaxed to.
12540
12541    Out: Any fixSs and constants are set up.
12542         Caller will turn frag into a ".space 0".  */
12543
12544 void
12545 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
12546                  fragS *fragP)
12547 {
12548   unsigned char *opcode;
12549   unsigned char *where_to_put_displacement = NULL;
12550   offsetT target_address;
12551   offsetT opcode_address;
12552   unsigned int extension = 0;
12553   offsetT displacement_from_opcode_start;
12554
12555   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12556       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
12557       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12558     {
12559       /* Generate nop padding.  */
12560       unsigned int size = fragP->tc_frag_data.length;
12561       if (size)
12562         {
12563           if (size > fragP->tc_frag_data.max_bytes)
12564             abort ();
12565
12566           if (flag_debug)
12567             {
12568               const char *msg;
12569               const char *branch = "branch";
12570               const char *prefix = "";
12571               fragS *padding_fragP;
12572               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12573                   == BRANCH_PREFIX)
12574                 {
12575                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12576                   switch (fragP->tc_frag_data.default_prefix)
12577                     {
12578                     default:
12579                       abort ();
12580                       break;
12581                     case CS_PREFIX_OPCODE:
12582                       prefix = " cs";
12583                       break;
12584                     case DS_PREFIX_OPCODE:
12585                       prefix = " ds";
12586                       break;
12587                     case ES_PREFIX_OPCODE:
12588                       prefix = " es";
12589                       break;
12590                     case FS_PREFIX_OPCODE:
12591                       prefix = " fs";
12592                       break;
12593                     case GS_PREFIX_OPCODE:
12594                       prefix = " gs";
12595                       break;
12596                     case SS_PREFIX_OPCODE:
12597                       prefix = " ss";
12598                       break;
12599                     }
12600                   if (padding_fragP)
12601                     msg = _("%s:%u: add %d%s at 0x%llx to align "
12602                             "%s within %d-byte boundary\n");
12603                   else
12604                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
12605                             "align %s within %d-byte boundary\n");
12606                 }
12607               else
12608                 {
12609                   padding_fragP = fragP;
12610                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
12611                           "%s within %d-byte boundary\n");
12612                 }
12613
12614               if (padding_fragP)
12615                 switch (padding_fragP->tc_frag_data.branch_type)
12616                   {
12617                   case align_branch_jcc:
12618                     branch = "jcc";
12619                     break;
12620                   case align_branch_fused:
12621                     branch = "fused jcc";
12622                     break;
12623                   case align_branch_jmp:
12624                     branch = "jmp";
12625                     break;
12626                   case align_branch_call:
12627                     branch = "call";
12628                     break;
12629                   case align_branch_indirect:
12630                     branch = "indiret branch";
12631                     break;
12632                   case align_branch_ret:
12633                     branch = "ret";
12634                     break;
12635                   default:
12636                     break;
12637                   }
12638
12639               fprintf (stdout, msg,
12640                        fragP->fr_file, fragP->fr_line, size, prefix,
12641                        (long long) fragP->fr_address, branch,
12642                        1 << align_branch_power);
12643             }
12644           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12645             memset (fragP->fr_opcode,
12646                     fragP->tc_frag_data.default_prefix, size);
12647           else
12648             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
12649                                 size, 0);
12650           fragP->fr_fix += size;
12651         }
12652       return;
12653     }
12654
12655   opcode = (unsigned char *) fragP->fr_opcode;
12656
12657   /* Address we want to reach in file space.  */
12658   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
12659
12660   /* Address opcode resides at in file space.  */
12661   opcode_address = fragP->fr_address + fragP->fr_fix;
12662
12663   /* Displacement from opcode start to fill into instruction.  */
12664   displacement_from_opcode_start = target_address - opcode_address;
12665
12666   if ((fragP->fr_subtype & BIG) == 0)
12667     {
12668       /* Don't have to change opcode.  */
12669       extension = 1;            /* 1 opcode + 1 displacement  */
12670       where_to_put_displacement = &opcode[1];
12671     }
12672   else
12673     {
12674       if (no_cond_jump_promotion
12675           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
12676         as_warn_where (fragP->fr_file, fragP->fr_line,
12677                        _("long jump required"));
12678
12679       switch (fragP->fr_subtype)
12680         {
12681         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
12682           extension = 4;                /* 1 opcode + 4 displacement  */
12683           opcode[0] = 0xe9;
12684           where_to_put_displacement = &opcode[1];
12685           break;
12686
12687         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
12688           extension = 2;                /* 1 opcode + 2 displacement  */
12689           opcode[0] = 0xe9;
12690           where_to_put_displacement = &opcode[1];
12691           break;
12692
12693         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
12694         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
12695           extension = 5;                /* 2 opcode + 4 displacement  */
12696           opcode[1] = opcode[0] + 0x10;
12697           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12698           where_to_put_displacement = &opcode[2];
12699           break;
12700
12701         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
12702           extension = 3;                /* 2 opcode + 2 displacement  */
12703           opcode[1] = opcode[0] + 0x10;
12704           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12705           where_to_put_displacement = &opcode[2];
12706           break;
12707
12708         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
12709           extension = 4;
12710           opcode[0] ^= 1;
12711           opcode[1] = 3;
12712           opcode[2] = 0xe9;
12713           where_to_put_displacement = &opcode[3];
12714           break;
12715
12716         default:
12717           BAD_CASE (fragP->fr_subtype);
12718           break;
12719         }
12720     }
12721
12722   /* If size if less then four we are sure that the operand fits,
12723      but if it's 4, then it could be that the displacement is larger
12724      then -/+ 2GB.  */
12725   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
12726       && object_64bit
12727       && ((addressT) (displacement_from_opcode_start - extension
12728                       + ((addressT) 1 << 31))
12729           > (((addressT) 2 << 31) - 1)))
12730     {
12731       as_bad_where (fragP->fr_file, fragP->fr_line,
12732                     _("jump target out of range"));
12733       /* Make us emit 0.  */
12734       displacement_from_opcode_start = extension;
12735     }
12736   /* Now put displacement after opcode.  */
12737   md_number_to_chars ((char *) where_to_put_displacement,
12738                       (valueT) (displacement_from_opcode_start - extension),
12739                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
12740   fragP->fr_fix += extension;
12741 }
12742 \f
12743 /* Apply a fixup (fixP) to segment data, once it has been determined
12744    by our caller that we have all the info we need to fix it up.
12745
12746    Parameter valP is the pointer to the value of the bits.
12747
12748    On the 386, immediates, displacements, and data pointers are all in
12749    the same (little-endian) format, so we don't need to care about which
12750    we are handling.  */
12751
12752 void
12753 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
12754 {
12755   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
12756   valueT value = *valP;
12757
12758 #if !defined (TE_Mach)
12759   if (fixP->fx_pcrel)
12760     {
12761       switch (fixP->fx_r_type)
12762         {
12763         default:
12764           break;
12765
12766         case BFD_RELOC_64:
12767           fixP->fx_r_type = BFD_RELOC_64_PCREL;
12768           break;
12769         case BFD_RELOC_32:
12770         case BFD_RELOC_X86_64_32S:
12771           fixP->fx_r_type = BFD_RELOC_32_PCREL;
12772           break;
12773         case BFD_RELOC_16:
12774           fixP->fx_r_type = BFD_RELOC_16_PCREL;
12775           break;
12776         case BFD_RELOC_8:
12777           fixP->fx_r_type = BFD_RELOC_8_PCREL;
12778           break;
12779         }
12780     }
12781
12782   if (fixP->fx_addsy != NULL
12783       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
12784           || fixP->fx_r_type == BFD_RELOC_64_PCREL
12785           || fixP->fx_r_type == BFD_RELOC_16_PCREL
12786           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
12787       && !use_rela_relocations)
12788     {
12789       /* This is a hack.  There should be a better way to handle this.
12790          This covers for the fact that bfd_install_relocation will
12791          subtract the current location (for partial_inplace, PC relative
12792          relocations); see more below.  */
12793 #ifndef OBJ_AOUT
12794       if (IS_ELF
12795 #ifdef TE_PE
12796           || OUTPUT_FLAVOR == bfd_target_coff_flavour
12797 #endif
12798           )
12799         value += fixP->fx_where + fixP->fx_frag->fr_address;
12800 #endif
12801 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12802       if (IS_ELF)
12803         {
12804           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
12805
12806           if ((sym_seg == seg
12807                || (symbol_section_p (fixP->fx_addsy)
12808                    && sym_seg != absolute_section))
12809               && !generic_force_reloc (fixP))
12810             {
12811               /* Yes, we add the values in twice.  This is because
12812                  bfd_install_relocation subtracts them out again.  I think
12813                  bfd_install_relocation is broken, but I don't dare change
12814                  it.  FIXME.  */
12815               value += fixP->fx_where + fixP->fx_frag->fr_address;
12816             }
12817         }
12818 #endif
12819 #if defined (OBJ_COFF) && defined (TE_PE)
12820       /* For some reason, the PE format does not store a
12821          section address offset for a PC relative symbol.  */
12822       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
12823           || S_IS_WEAK (fixP->fx_addsy))
12824         value += md_pcrel_from (fixP);
12825 #endif
12826     }
12827 #if defined (OBJ_COFF) && defined (TE_PE)
12828   if (fixP->fx_addsy != NULL
12829       && S_IS_WEAK (fixP->fx_addsy)
12830       /* PR 16858: Do not modify weak function references.  */
12831       && ! fixP->fx_pcrel)
12832     {
12833 #if !defined (TE_PEP)
12834       /* For x86 PE weak function symbols are neither PC-relative
12835          nor do they set S_IS_FUNCTION.  So the only reliable way
12836          to detect them is to check the flags of their containing
12837          section.  */
12838       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
12839           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
12840         ;
12841       else
12842 #endif
12843       value -= S_GET_VALUE (fixP->fx_addsy);
12844     }
12845 #endif
12846
12847   /* Fix a few things - the dynamic linker expects certain values here,
12848      and we must not disappoint it.  */
12849 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12850   if (IS_ELF && fixP->fx_addsy)
12851     switch (fixP->fx_r_type)
12852       {
12853       case BFD_RELOC_386_PLT32:
12854       case BFD_RELOC_X86_64_PLT32:
12855         /* Make the jump instruction point to the address of the operand.
12856            At runtime we merely add the offset to the actual PLT entry.
12857            NB: Subtract the offset size only for jump instructions.  */
12858         if (fixP->fx_pcrel)
12859           value = -4;
12860         break;
12861
12862       case BFD_RELOC_386_TLS_GD:
12863       case BFD_RELOC_386_TLS_LDM:
12864       case BFD_RELOC_386_TLS_IE_32:
12865       case BFD_RELOC_386_TLS_IE:
12866       case BFD_RELOC_386_TLS_GOTIE:
12867       case BFD_RELOC_386_TLS_GOTDESC:
12868       case BFD_RELOC_X86_64_TLSGD:
12869       case BFD_RELOC_X86_64_TLSLD:
12870       case BFD_RELOC_X86_64_GOTTPOFF:
12871       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
12872         value = 0; /* Fully resolved at runtime.  No addend.  */
12873         /* Fallthrough */
12874       case BFD_RELOC_386_TLS_LE:
12875       case BFD_RELOC_386_TLS_LDO_32:
12876       case BFD_RELOC_386_TLS_LE_32:
12877       case BFD_RELOC_X86_64_DTPOFF32:
12878       case BFD_RELOC_X86_64_DTPOFF64:
12879       case BFD_RELOC_X86_64_TPOFF32:
12880       case BFD_RELOC_X86_64_TPOFF64:
12881         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12882         break;
12883
12884       case BFD_RELOC_386_TLS_DESC_CALL:
12885       case BFD_RELOC_X86_64_TLSDESC_CALL:
12886         value = 0; /* Fully resolved at runtime.  No addend.  */
12887         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12888         fixP->fx_done = 0;
12889         return;
12890
12891       case BFD_RELOC_VTABLE_INHERIT:
12892       case BFD_RELOC_VTABLE_ENTRY:
12893         fixP->fx_done = 0;
12894         return;
12895
12896       default:
12897         break;
12898       }
12899 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
12900
12901   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
12902   if (!object_64bit)
12903     value = extend_to_32bit_address (value);
12904
12905   *valP = value;
12906 #endif /* !defined (TE_Mach)  */
12907
12908   /* Are we finished with this relocation now?  */
12909   if (fixP->fx_addsy == NULL)
12910     {
12911       fixP->fx_done = 1;
12912       switch (fixP->fx_r_type)
12913         {
12914         case BFD_RELOC_X86_64_32S:
12915           fixP->fx_signed = 1;
12916           break;
12917
12918         default:
12919           break;
12920         }
12921     }
12922 #if defined (OBJ_COFF) && defined (TE_PE)
12923   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
12924     {
12925       fixP->fx_done = 0;
12926       /* Remember value for tc_gen_reloc.  */
12927       fixP->fx_addnumber = value;
12928       /* Clear out the frag for now.  */
12929       value = 0;
12930     }
12931 #endif
12932   else if (use_rela_relocations)
12933     {
12934       if (!disallow_64bit_reloc || fixP->fx_r_type == NO_RELOC)
12935         fixP->fx_no_overflow = 1;
12936       /* Remember value for tc_gen_reloc.  */
12937       fixP->fx_addnumber = value;
12938       value = 0;
12939     }
12940
12941   md_number_to_chars (p, value, fixP->fx_size);
12942 }
12943 \f
12944 const char *
12945 md_atof (int type, char *litP, int *sizeP)
12946 {
12947   /* This outputs the LITTLENUMs in REVERSE order;
12948      in accord with the bigendian 386.  */
12949   return ieee_md_atof (type, litP, sizeP, false);
12950 }
12951 \f
12952 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
12953
12954 static char *
12955 output_invalid (int c)
12956 {
12957   if (ISPRINT (c))
12958     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12959               "'%c'", c);
12960   else
12961     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12962               "(0x%x)", (unsigned char) c);
12963   return output_invalid_buf;
12964 }
12965
12966 /* Verify that @r can be used in the current context.  */
12967
12968 static bool check_register (const reg_entry *r)
12969 {
12970   if (allow_pseudo_reg)
12971     return true;
12972
12973   if (operand_type_all_zero (&r->reg_type))
12974     return false;
12975
12976   if ((r->reg_type.bitfield.dword
12977        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
12978        || r->reg_type.bitfield.class == RegCR
12979        || r->reg_type.bitfield.class == RegDR)
12980       && !cpu_arch_flags.bitfield.cpui386)
12981     return false;
12982
12983   if (r->reg_type.bitfield.class == RegTR
12984       && (flag_code == CODE_64BIT
12985           || !cpu_arch_flags.bitfield.cpui386
12986           || cpu_arch_isa_flags.bitfield.cpui586
12987           || cpu_arch_isa_flags.bitfield.cpui686))
12988     return false;
12989
12990   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
12991     return false;
12992
12993   if (!cpu_arch_flags.bitfield.cpuavx512f)
12994     {
12995       if (r->reg_type.bitfield.zmmword
12996           || r->reg_type.bitfield.class == RegMask)
12997         return false;
12998
12999       if (!cpu_arch_flags.bitfield.cpuavx)
13000         {
13001           if (r->reg_type.bitfield.ymmword)
13002             return false;
13003
13004           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
13005             return false;
13006         }
13007     }
13008
13009   if (r->reg_type.bitfield.tmmword
13010       && (!cpu_arch_flags.bitfield.cpuamx_tile
13011           || flag_code != CODE_64BIT))
13012     return false;
13013
13014   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
13015     return false;
13016
13017   /* Don't allow fake index register unless allow_index_reg isn't 0. */
13018   if (!allow_index_reg && r->reg_num == RegIZ)
13019     return false;
13020
13021   /* Upper 16 vector registers are only available with VREX in 64bit
13022      mode, and require EVEX encoding.  */
13023   if (r->reg_flags & RegVRex)
13024     {
13025       if (!cpu_arch_flags.bitfield.cpuavx512f
13026           || flag_code != CODE_64BIT)
13027         return false;
13028
13029       if (i.vec_encoding == vex_encoding_default)
13030         i.vec_encoding = vex_encoding_evex;
13031       else if (i.vec_encoding != vex_encoding_evex)
13032         i.vec_encoding = vex_encoding_error;
13033     }
13034
13035   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
13036       && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
13037       && flag_code != CODE_64BIT)
13038     return false;
13039
13040   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
13041       && !intel_syntax)
13042     return false;
13043
13044   return true;
13045 }
13046
13047 /* REG_STRING starts *before* REGISTER_PREFIX.  */
13048
13049 static const reg_entry *
13050 parse_real_register (char *reg_string, char **end_op)
13051 {
13052   char *s = reg_string;
13053   char *p;
13054   char reg_name_given[MAX_REG_NAME_SIZE + 1];
13055   const reg_entry *r;
13056
13057   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
13058   if (*s == REGISTER_PREFIX)
13059     ++s;
13060
13061   if (is_space_char (*s))
13062     ++s;
13063
13064   p = reg_name_given;
13065   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
13066     {
13067       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
13068         return (const reg_entry *) NULL;
13069       s++;
13070     }
13071
13072   /* For naked regs, make sure that we are not dealing with an identifier.
13073      This prevents confusing an identifier like `eax_var' with register
13074      `eax'.  */
13075   if (allow_naked_reg && identifier_chars[(unsigned char) *s])
13076     return (const reg_entry *) NULL;
13077
13078   *end_op = s;
13079
13080   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
13081
13082   /* Handle floating point regs, allowing spaces in the (i) part.  */
13083   if (r == reg_st0)
13084     {
13085       if (!cpu_arch_flags.bitfield.cpu8087
13086           && !cpu_arch_flags.bitfield.cpu287
13087           && !cpu_arch_flags.bitfield.cpu387
13088           && !allow_pseudo_reg)
13089         return (const reg_entry *) NULL;
13090
13091       if (is_space_char (*s))
13092         ++s;
13093       if (*s == '(')
13094         {
13095           ++s;
13096           if (is_space_char (*s))
13097             ++s;
13098           if (*s >= '0' && *s <= '7')
13099             {
13100               int fpr = *s - '0';
13101               ++s;
13102               if (is_space_char (*s))
13103                 ++s;
13104               if (*s == ')')
13105                 {
13106                   *end_op = s + 1;
13107                   know (r[fpr].reg_num == fpr);
13108                   return r + fpr;
13109                 }
13110             }
13111           /* We have "%st(" then garbage.  */
13112           return (const reg_entry *) NULL;
13113         }
13114     }
13115
13116   return r && check_register (r) ? r : NULL;
13117 }
13118
13119 /* REG_STRING starts *before* REGISTER_PREFIX.  */
13120
13121 static const reg_entry *
13122 parse_register (char *reg_string, char **end_op)
13123 {
13124   const reg_entry *r;
13125
13126   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
13127     r = parse_real_register (reg_string, end_op);
13128   else
13129     r = NULL;
13130   if (!r)
13131     {
13132       char *save = input_line_pointer;
13133       char c;
13134       symbolS *symbolP;
13135
13136       input_line_pointer = reg_string;
13137       c = get_symbol_name (&reg_string);
13138       symbolP = symbol_find (reg_string);
13139       while (symbolP && S_GET_SEGMENT (symbolP) != reg_section)
13140         {
13141           const expressionS *e = symbol_get_value_expression(symbolP);
13142
13143           if (e->X_op != O_symbol || e->X_add_number)
13144             break;
13145           symbolP = e->X_add_symbol;
13146         }
13147       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
13148         {
13149           const expressionS *e = symbol_get_value_expression (symbolP);
13150
13151           know (e->X_op == O_register);
13152           know (e->X_add_number >= 0
13153                 && (valueT) e->X_add_number < i386_regtab_size);
13154           r = i386_regtab + e->X_add_number;
13155           if (!check_register (r))
13156             {
13157               as_bad (_("register '%s%s' cannot be used here"),
13158                       register_prefix, r->reg_name);
13159               r = &bad_reg;
13160             }
13161           *end_op = input_line_pointer;
13162         }
13163       *input_line_pointer = c;
13164       input_line_pointer = save;
13165     }
13166   return r;
13167 }
13168
13169 int
13170 i386_parse_name (char *name, expressionS *e, char *nextcharP)
13171 {
13172   const reg_entry *r = NULL;
13173   char *end = input_line_pointer;
13174
13175   *end = *nextcharP;
13176   if (*name == REGISTER_PREFIX || allow_naked_reg)
13177     r = parse_real_register (name, &input_line_pointer);
13178   if (r && end <= input_line_pointer)
13179     {
13180       *nextcharP = *input_line_pointer;
13181       *input_line_pointer = 0;
13182       if (r != &bad_reg)
13183         {
13184           e->X_op = O_register;
13185           e->X_add_number = r - i386_regtab;
13186         }
13187       else
13188           e->X_op = O_illegal;
13189       return 1;
13190     }
13191   input_line_pointer = end;
13192   *end = 0;
13193   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
13194 }
13195
13196 void
13197 md_operand (expressionS *e)
13198 {
13199   char *end;
13200   const reg_entry *r;
13201
13202   switch (*input_line_pointer)
13203     {
13204     case REGISTER_PREFIX:
13205       r = parse_real_register (input_line_pointer, &end);
13206       if (r)
13207         {
13208           e->X_op = O_register;
13209           e->X_add_number = r - i386_regtab;
13210           input_line_pointer = end;
13211         }
13212       break;
13213
13214     case '[':
13215       gas_assert (intel_syntax);
13216       end = input_line_pointer++;
13217       expression (e);
13218       if (*input_line_pointer == ']')
13219         {
13220           ++input_line_pointer;
13221           e->X_op_symbol = make_expr_symbol (e);
13222           e->X_add_symbol = NULL;
13223           e->X_add_number = 0;
13224           e->X_op = O_index;
13225         }
13226       else
13227         {
13228           e->X_op = O_absent;
13229           input_line_pointer = end;
13230         }
13231       break;
13232     }
13233 }
13234
13235 \f
13236 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13237 const char *md_shortopts = "kVQ:sqnO::";
13238 #else
13239 const char *md_shortopts = "qnO::";
13240 #endif
13241
13242 #define OPTION_32 (OPTION_MD_BASE + 0)
13243 #define OPTION_64 (OPTION_MD_BASE + 1)
13244 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
13245 #define OPTION_MARCH (OPTION_MD_BASE + 3)
13246 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
13247 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
13248 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
13249 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
13250 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
13251 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
13252 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
13253 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
13254 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
13255 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
13256 #define OPTION_X32 (OPTION_MD_BASE + 14)
13257 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
13258 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
13259 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
13260 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
13261 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
13262 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
13263 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
13264 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
13265 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
13266 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
13267 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
13268 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
13269 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
13270 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
13271 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
13272 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
13273 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
13274 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
13275 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
13276 #define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34)
13277
13278 struct option md_longopts[] =
13279 {
13280   {"32", no_argument, NULL, OPTION_32},
13281 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13282      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13283   {"64", no_argument, NULL, OPTION_64},
13284 #endif
13285 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13286   {"x32", no_argument, NULL, OPTION_X32},
13287   {"mshared", no_argument, NULL, OPTION_MSHARED},
13288   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
13289 #endif
13290   {"divide", no_argument, NULL, OPTION_DIVIDE},
13291   {"march", required_argument, NULL, OPTION_MARCH},
13292   {"mtune", required_argument, NULL, OPTION_MTUNE},
13293   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
13294   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
13295   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
13296   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
13297   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
13298   {"muse-unaligned-vector-move", no_argument, NULL, OPTION_MUSE_UNALIGNED_VECTOR_MOVE},
13299   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
13300   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
13301   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
13302   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
13303   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
13304   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
13305   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
13306 # if defined (TE_PE) || defined (TE_PEP)
13307   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
13308 #endif
13309   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
13310   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
13311   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
13312   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
13313   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
13314   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
13315   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
13316   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
13317   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
13318   {"mlfence-before-indirect-branch", required_argument, NULL,
13319    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
13320   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
13321   {"mamd64", no_argument, NULL, OPTION_MAMD64},
13322   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
13323   {NULL, no_argument, NULL, 0}
13324 };
13325 size_t md_longopts_size = sizeof (md_longopts);
13326
13327 int
13328 md_parse_option (int c, const char *arg)
13329 {
13330   unsigned int j;
13331   char *arch, *next, *saved, *type;
13332
13333   switch (c)
13334     {
13335     case 'n':
13336       optimize_align_code = 0;
13337       break;
13338
13339     case 'q':
13340       quiet_warnings = 1;
13341       break;
13342
13343 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13344       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
13345          should be emitted or not.  FIXME: Not implemented.  */
13346     case 'Q':
13347       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
13348         return 0;
13349       break;
13350
13351       /* -V: SVR4 argument to print version ID.  */
13352     case 'V':
13353       print_version_id ();
13354       break;
13355
13356       /* -k: Ignore for FreeBSD compatibility.  */
13357     case 'k':
13358       break;
13359
13360     case 's':
13361       /* -s: On i386 Solaris, this tells the native assembler to use
13362          .stab instead of .stab.excl.  We always use .stab anyhow.  */
13363       break;
13364
13365     case OPTION_MSHARED:
13366       shared = 1;
13367       break;
13368
13369     case OPTION_X86_USED_NOTE:
13370       if (strcasecmp (arg, "yes") == 0)
13371         x86_used_note = 1;
13372       else if (strcasecmp (arg, "no") == 0)
13373         x86_used_note = 0;
13374       else
13375         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
13376       break;
13377
13378
13379 #endif
13380 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13381      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13382     case OPTION_64:
13383       {
13384         const char **list, **l;
13385
13386         list = bfd_target_list ();
13387         for (l = list; *l != NULL; l++)
13388           if (startswith (*l, "elf64-x86-64")
13389               || strcmp (*l, "coff-x86-64") == 0
13390               || strcmp (*l, "pe-x86-64") == 0
13391               || strcmp (*l, "pei-x86-64") == 0
13392               || strcmp (*l, "mach-o-x86-64") == 0)
13393             {
13394               default_arch = "x86_64";
13395               break;
13396             }
13397         if (*l == NULL)
13398           as_fatal (_("no compiled in support for x86_64"));
13399         free (list);
13400       }
13401       break;
13402 #endif
13403
13404 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13405     case OPTION_X32:
13406       if (IS_ELF)
13407         {
13408           const char **list, **l;
13409
13410           list = bfd_target_list ();
13411           for (l = list; *l != NULL; l++)
13412             if (startswith (*l, "elf32-x86-64"))
13413               {
13414                 default_arch = "x86_64:32";
13415                 break;
13416               }
13417           if (*l == NULL)
13418             as_fatal (_("no compiled in support for 32bit x86_64"));
13419           free (list);
13420         }
13421       else
13422         as_fatal (_("32bit x86_64 is only supported for ELF"));
13423       break;
13424 #endif
13425
13426     case OPTION_32:
13427       default_arch = "i386";
13428       break;
13429
13430     case OPTION_DIVIDE:
13431 #ifdef SVR4_COMMENT_CHARS
13432       {
13433         char *n, *t;
13434         const char *s;
13435
13436         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
13437         t = n;
13438         for (s = i386_comment_chars; *s != '\0'; s++)
13439           if (*s != '/')
13440             *t++ = *s;
13441         *t = '\0';
13442         i386_comment_chars = n;
13443       }
13444 #endif
13445       break;
13446
13447     case OPTION_MARCH:
13448       saved = xstrdup (arg);
13449       arch = saved;
13450       /* Allow -march=+nosse.  */
13451       if (*arch == '+')
13452         arch++;
13453       do
13454         {
13455           if (*arch == '.')
13456             as_fatal (_("invalid -march= option: `%s'"), arg);
13457           next = strchr (arch, '+');
13458           if (next)
13459             *next++ = '\0';
13460           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13461             {
13462               if (arch == saved && cpu_arch[j].type != PROCESSOR_NONE
13463                   && strcmp (arch, cpu_arch[j].name) == 0)
13464                 {
13465                   /* Processor.  */
13466                   if (! cpu_arch[j].enable.bitfield.cpui386)
13467                     continue;
13468
13469                   cpu_arch_name = cpu_arch[j].name;
13470                   free (cpu_sub_arch_name);
13471                   cpu_sub_arch_name = NULL;
13472                   cpu_arch_flags = cpu_arch[j].enable;
13473                   cpu_arch_isa = cpu_arch[j].type;
13474                   cpu_arch_isa_flags = cpu_arch[j].enable;
13475                   if (!cpu_arch_tune_set)
13476                     {
13477                       cpu_arch_tune = cpu_arch_isa;
13478                       cpu_arch_tune_flags = cpu_arch_isa_flags;
13479                     }
13480                   break;
13481                 }
13482               else if (cpu_arch[j].type == PROCESSOR_NONE
13483                        && strcmp (arch, cpu_arch[j].name) == 0
13484                        && !cpu_flags_all_zero (&cpu_arch[j].enable))
13485                 {
13486                   /* ISA extension.  */
13487                   i386_cpu_flags flags;
13488
13489                   flags = cpu_flags_or (cpu_arch_flags,
13490                                         cpu_arch[j].enable);
13491
13492                   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13493                     {
13494                       extend_cpu_sub_arch_name (arch);
13495                       cpu_arch_flags = flags;
13496                       cpu_arch_isa_flags = flags;
13497                     }
13498                   else
13499                     cpu_arch_isa_flags
13500                       = cpu_flags_or (cpu_arch_isa_flags,
13501                                       cpu_arch[j].enable);
13502                   break;
13503                 }
13504             }
13505
13506           if (j >= ARRAY_SIZE (cpu_arch) && startswith (arch, "no"))
13507             {
13508               /* Disable an ISA extension.  */
13509               for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13510                 if (cpu_arch[j].type == PROCESSOR_NONE
13511                     && strcmp (arch + 2, cpu_arch[j].name) == 0)
13512                   {
13513                     i386_cpu_flags flags;
13514
13515                     flags = cpu_flags_and_not (cpu_arch_flags,
13516                                                cpu_arch[j].disable);
13517                     if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13518                       {
13519                         extend_cpu_sub_arch_name (arch);
13520                         cpu_arch_flags = flags;
13521                         cpu_arch_isa_flags = flags;
13522                       }
13523                     break;
13524                   }
13525             }
13526
13527           if (j >= ARRAY_SIZE (cpu_arch))
13528             as_fatal (_("invalid -march= option: `%s'"), arg);
13529
13530           arch = next;
13531         }
13532       while (next != NULL);
13533       free (saved);
13534       break;
13535
13536     case OPTION_MTUNE:
13537       if (*arg == '.')
13538         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13539       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13540         {
13541           if (cpu_arch[j].type != PROCESSOR_NONE
13542               && strcmp (arg, cpu_arch[j].name) == 0)
13543             {
13544               cpu_arch_tune_set = 1;
13545               cpu_arch_tune = cpu_arch [j].type;
13546               cpu_arch_tune_flags = cpu_arch[j].enable;
13547               break;
13548             }
13549         }
13550       if (j >= ARRAY_SIZE (cpu_arch))
13551         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13552       break;
13553
13554     case OPTION_MMNEMONIC:
13555       if (strcasecmp (arg, "att") == 0)
13556         intel_mnemonic = 0;
13557       else if (strcasecmp (arg, "intel") == 0)
13558         intel_mnemonic = 1;
13559       else
13560         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
13561       break;
13562
13563     case OPTION_MSYNTAX:
13564       if (strcasecmp (arg, "att") == 0)
13565         intel_syntax = 0;
13566       else if (strcasecmp (arg, "intel") == 0)
13567         intel_syntax = 1;
13568       else
13569         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
13570       break;
13571
13572     case OPTION_MINDEX_REG:
13573       allow_index_reg = 1;
13574       break;
13575
13576     case OPTION_MNAKED_REG:
13577       allow_naked_reg = 1;
13578       break;
13579
13580     case OPTION_MSSE2AVX:
13581       sse2avx = 1;
13582       break;
13583
13584     case OPTION_MUSE_UNALIGNED_VECTOR_MOVE:
13585       use_unaligned_vector_move = 1;
13586       break;
13587
13588     case OPTION_MSSE_CHECK:
13589       if (strcasecmp (arg, "error") == 0)
13590         sse_check = check_error;
13591       else if (strcasecmp (arg, "warning") == 0)
13592         sse_check = check_warning;
13593       else if (strcasecmp (arg, "none") == 0)
13594         sse_check = check_none;
13595       else
13596         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
13597       break;
13598
13599     case OPTION_MOPERAND_CHECK:
13600       if (strcasecmp (arg, "error") == 0)
13601         operand_check = check_error;
13602       else if (strcasecmp (arg, "warning") == 0)
13603         operand_check = check_warning;
13604       else if (strcasecmp (arg, "none") == 0)
13605         operand_check = check_none;
13606       else
13607         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
13608       break;
13609
13610     case OPTION_MAVXSCALAR:
13611       if (strcasecmp (arg, "128") == 0)
13612         avxscalar = vex128;
13613       else if (strcasecmp (arg, "256") == 0)
13614         avxscalar = vex256;
13615       else
13616         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
13617       break;
13618
13619     case OPTION_MVEXWIG:
13620       if (strcmp (arg, "0") == 0)
13621         vexwig = vexw0;
13622       else if (strcmp (arg, "1") == 0)
13623         vexwig = vexw1;
13624       else
13625         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
13626       break;
13627
13628     case OPTION_MADD_BND_PREFIX:
13629       add_bnd_prefix = 1;
13630       break;
13631
13632     case OPTION_MEVEXLIG:
13633       if (strcmp (arg, "128") == 0)
13634         evexlig = evexl128;
13635       else if (strcmp (arg, "256") == 0)
13636         evexlig = evexl256;
13637       else  if (strcmp (arg, "512") == 0)
13638         evexlig = evexl512;
13639       else
13640         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
13641       break;
13642
13643     case OPTION_MEVEXRCIG:
13644       if (strcmp (arg, "rne") == 0)
13645         evexrcig = rne;
13646       else if (strcmp (arg, "rd") == 0)
13647         evexrcig = rd;
13648       else if (strcmp (arg, "ru") == 0)
13649         evexrcig = ru;
13650       else if (strcmp (arg, "rz") == 0)
13651         evexrcig = rz;
13652       else
13653         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
13654       break;
13655
13656     case OPTION_MEVEXWIG:
13657       if (strcmp (arg, "0") == 0)
13658         evexwig = evexw0;
13659       else if (strcmp (arg, "1") == 0)
13660         evexwig = evexw1;
13661       else
13662         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
13663       break;
13664
13665 # if defined (TE_PE) || defined (TE_PEP)
13666     case OPTION_MBIG_OBJ:
13667       use_big_obj = 1;
13668       break;
13669 #endif
13670
13671     case OPTION_MOMIT_LOCK_PREFIX:
13672       if (strcasecmp (arg, "yes") == 0)
13673         omit_lock_prefix = 1;
13674       else if (strcasecmp (arg, "no") == 0)
13675         omit_lock_prefix = 0;
13676       else
13677         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
13678       break;
13679
13680     case OPTION_MFENCE_AS_LOCK_ADD:
13681       if (strcasecmp (arg, "yes") == 0)
13682         avoid_fence = 1;
13683       else if (strcasecmp (arg, "no") == 0)
13684         avoid_fence = 0;
13685       else
13686         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
13687       break;
13688
13689     case OPTION_MLFENCE_AFTER_LOAD:
13690       if (strcasecmp (arg, "yes") == 0)
13691         lfence_after_load = 1;
13692       else if (strcasecmp (arg, "no") == 0)
13693         lfence_after_load = 0;
13694       else
13695         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
13696       break;
13697
13698     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
13699       if (strcasecmp (arg, "all") == 0)
13700         {
13701           lfence_before_indirect_branch = lfence_branch_all;
13702           if (lfence_before_ret == lfence_before_ret_none)
13703             lfence_before_ret = lfence_before_ret_shl;
13704         }
13705       else if (strcasecmp (arg, "memory") == 0)
13706         lfence_before_indirect_branch = lfence_branch_memory;
13707       else if (strcasecmp (arg, "register") == 0)
13708         lfence_before_indirect_branch = lfence_branch_register;
13709       else if (strcasecmp (arg, "none") == 0)
13710         lfence_before_indirect_branch = lfence_branch_none;
13711       else
13712         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
13713                   arg);
13714       break;
13715
13716     case OPTION_MLFENCE_BEFORE_RET:
13717       if (strcasecmp (arg, "or") == 0)
13718         lfence_before_ret = lfence_before_ret_or;
13719       else if (strcasecmp (arg, "not") == 0)
13720         lfence_before_ret = lfence_before_ret_not;
13721       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
13722         lfence_before_ret = lfence_before_ret_shl;
13723       else if (strcasecmp (arg, "none") == 0)
13724         lfence_before_ret = lfence_before_ret_none;
13725       else
13726         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
13727                   arg);
13728       break;
13729
13730     case OPTION_MRELAX_RELOCATIONS:
13731       if (strcasecmp (arg, "yes") == 0)
13732         generate_relax_relocations = 1;
13733       else if (strcasecmp (arg, "no") == 0)
13734         generate_relax_relocations = 0;
13735       else
13736         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
13737       break;
13738
13739     case OPTION_MALIGN_BRANCH_BOUNDARY:
13740       {
13741         char *end;
13742         long int align = strtoul (arg, &end, 0);
13743         if (*end == '\0')
13744           {
13745             if (align == 0)
13746               {
13747                 align_branch_power = 0;
13748                 break;
13749               }
13750             else if (align >= 16)
13751               {
13752                 int align_power;
13753                 for (align_power = 0;
13754                      (align & 1) == 0;
13755                      align >>= 1, align_power++)
13756                   continue;
13757                 /* Limit alignment power to 31.  */
13758                 if (align == 1 && align_power < 32)
13759                   {
13760                     align_branch_power = align_power;
13761                     break;
13762                   }
13763               }
13764           }
13765         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
13766       }
13767       break;
13768
13769     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
13770       {
13771         char *end;
13772         int align = strtoul (arg, &end, 0);
13773         /* Some processors only support 5 prefixes.  */
13774         if (*end == '\0' && align >= 0 && align < 6)
13775           {
13776             align_branch_prefix_size = align;
13777             break;
13778           }
13779         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
13780                   arg);
13781       }
13782       break;
13783
13784     case OPTION_MALIGN_BRANCH:
13785       align_branch = 0;
13786       saved = xstrdup (arg);
13787       type = saved;
13788       do
13789         {
13790           next = strchr (type, '+');
13791           if (next)
13792             *next++ = '\0';
13793           if (strcasecmp (type, "jcc") == 0)
13794             align_branch |= align_branch_jcc_bit;
13795           else if (strcasecmp (type, "fused") == 0)
13796             align_branch |= align_branch_fused_bit;
13797           else if (strcasecmp (type, "jmp") == 0)
13798             align_branch |= align_branch_jmp_bit;
13799           else if (strcasecmp (type, "call") == 0)
13800             align_branch |= align_branch_call_bit;
13801           else if (strcasecmp (type, "ret") == 0)
13802             align_branch |= align_branch_ret_bit;
13803           else if (strcasecmp (type, "indirect") == 0)
13804             align_branch |= align_branch_indirect_bit;
13805           else
13806             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
13807           type = next;
13808         }
13809       while (next != NULL);
13810       free (saved);
13811       break;
13812
13813     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
13814       align_branch_power = 5;
13815       align_branch_prefix_size = 5;
13816       align_branch = (align_branch_jcc_bit
13817                       | align_branch_fused_bit
13818                       | align_branch_jmp_bit);
13819       break;
13820
13821     case OPTION_MAMD64:
13822       isa64 = amd64;
13823       break;
13824
13825     case OPTION_MINTEL64:
13826       isa64 = intel64;
13827       break;
13828
13829     case 'O':
13830       if (arg == NULL)
13831         {
13832           optimize = 1;
13833           /* Turn off -Os.  */
13834           optimize_for_space = 0;
13835         }
13836       else if (*arg == 's')
13837         {
13838           optimize_for_space = 1;
13839           /* Turn on all encoding optimizations.  */
13840           optimize = INT_MAX;
13841         }
13842       else
13843         {
13844           optimize = atoi (arg);
13845           /* Turn off -Os.  */
13846           optimize_for_space = 0;
13847         }
13848       break;
13849
13850     default:
13851       return 0;
13852     }
13853   return 1;
13854 }
13855
13856 #define MESSAGE_TEMPLATE \
13857 "                                                                                "
13858
13859 static char *
13860 output_message (FILE *stream, char *p, char *message, char *start,
13861                 int *left_p, const char *name, int len)
13862 {
13863   int size = sizeof (MESSAGE_TEMPLATE);
13864   int left = *left_p;
13865
13866   /* Reserve 2 spaces for ", " or ",\0" */
13867   left -= len + 2;
13868
13869   /* Check if there is any room.  */
13870   if (left >= 0)
13871     {
13872       if (p != start)
13873         {
13874           *p++ = ',';
13875           *p++ = ' ';
13876         }
13877       p = mempcpy (p, name, len);
13878     }
13879   else
13880     {
13881       /* Output the current message now and start a new one.  */
13882       *p++ = ',';
13883       *p = '\0';
13884       fprintf (stream, "%s\n", message);
13885       p = start;
13886       left = size - (start - message) - len - 2;
13887
13888       gas_assert (left >= 0);
13889
13890       p = mempcpy (p, name, len);
13891     }
13892
13893   *left_p = left;
13894   return p;
13895 }
13896
13897 static void
13898 show_arch (FILE *stream, int ext, int check)
13899 {
13900   static char message[] = MESSAGE_TEMPLATE;
13901   char *start = message + 27;
13902   char *p;
13903   int size = sizeof (MESSAGE_TEMPLATE);
13904   int left;
13905   const char *name;
13906   int len;
13907   unsigned int j;
13908
13909   p = start;
13910   left = size - (start - message);
13911
13912   if (!ext && check)
13913     {
13914       p = output_message (stream, p, message, start, &left,
13915                           STRING_COMMA_LEN ("default"));
13916       p = output_message (stream, p, message, start, &left,
13917                           STRING_COMMA_LEN ("push"));
13918       p = output_message (stream, p, message, start, &left,
13919                           STRING_COMMA_LEN ("pop"));
13920     }
13921
13922   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13923     {
13924       /* Should it be skipped?  */
13925       if (cpu_arch [j].skip)
13926         continue;
13927
13928       name = cpu_arch [j].name;
13929       len = cpu_arch [j].len;
13930       if (cpu_arch[j].type == PROCESSOR_NONE)
13931         {
13932           /* It is an extension.  Skip if we aren't asked to show it.  */
13933           if (!ext || cpu_flags_all_zero (&cpu_arch[j].enable))
13934             continue;
13935         }
13936       else if (ext)
13937         {
13938           /* It is an processor.  Skip if we show only extension.  */
13939           continue;
13940         }
13941       else if (check && ! cpu_arch[j].enable.bitfield.cpui386)
13942         {
13943           /* It is an impossible processor - skip.  */
13944           continue;
13945         }
13946
13947       p = output_message (stream, p, message, start, &left, name, len);
13948     }
13949
13950   /* Display disabled extensions.  */
13951   if (ext)
13952     for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13953       {
13954         char *str;
13955
13956         if (cpu_arch[j].type != PROCESSOR_NONE
13957             || !cpu_flags_all_zero (&cpu_arch[j].enable))
13958           continue;
13959         str = xasprintf ("no%s", cpu_arch[j].name);
13960         p = output_message (stream, p, message, start, &left, str,
13961                             strlen (str));
13962         free (str);
13963       }
13964
13965   *p = '\0';
13966   fprintf (stream, "%s\n", message);
13967 }
13968
13969 void
13970 md_show_usage (FILE *stream)
13971 {
13972 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13973   fprintf (stream, _("\
13974   -Qy, -Qn                ignored\n\
13975   -V                      print assembler version number\n\
13976   -k                      ignored\n"));
13977 #endif
13978   fprintf (stream, _("\
13979   -n                      do not optimize code alignment\n\
13980   -O{012s}                attempt some code optimizations\n\
13981   -q                      quieten some warnings\n"));
13982 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13983   fprintf (stream, _("\
13984   -s                      ignored\n"));
13985 #endif
13986 #ifdef BFD64
13987 # if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13988   fprintf (stream, _("\
13989   --32/--64/--x32         generate 32bit/64bit/x32 object\n"));
13990 # elif defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O)
13991   fprintf (stream, _("\
13992   --32/--64               generate 32bit/64bit object\n"));
13993 # endif
13994 #endif
13995 #ifdef SVR4_COMMENT_CHARS
13996   fprintf (stream, _("\
13997   --divide                do not treat `/' as a comment character\n"));
13998 #else
13999   fprintf (stream, _("\
14000   --divide                ignored\n"));
14001 #endif
14002   fprintf (stream, _("\
14003   -march=CPU[,+EXTENSION...]\n\
14004                           generate code for CPU and EXTENSION, CPU is one of:\n"));
14005   show_arch (stream, 0, 1);
14006   fprintf (stream, _("\
14007                           EXTENSION is combination of (possibly \"no\"-prefixed):\n"));
14008   show_arch (stream, 1, 0);
14009   fprintf (stream, _("\
14010   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
14011   show_arch (stream, 0, 0);
14012   fprintf (stream, _("\
14013   -msse2avx               encode SSE instructions with VEX prefix\n"));
14014   fprintf (stream, _("\
14015   -muse-unaligned-vector-move\n\
14016                           encode aligned vector move as unaligned vector move\n"));
14017   fprintf (stream, _("\
14018   -msse-check=[none|error|warning] (default: warning)\n\
14019                           check SSE instructions\n"));
14020   fprintf (stream, _("\
14021   -moperand-check=[none|error|warning] (default: warning)\n\
14022                           check operand combinations for validity\n"));
14023   fprintf (stream, _("\
14024   -mavxscalar=[128|256] (default: 128)\n\
14025                           encode scalar AVX instructions with specific vector\n\
14026                            length\n"));
14027   fprintf (stream, _("\
14028   -mvexwig=[0|1] (default: 0)\n\
14029                           encode VEX instructions with specific VEX.W value\n\
14030                            for VEX.W bit ignored instructions\n"));
14031   fprintf (stream, _("\
14032   -mevexlig=[128|256|512] (default: 128)\n\
14033                           encode scalar EVEX instructions with specific vector\n\
14034                            length\n"));
14035   fprintf (stream, _("\
14036   -mevexwig=[0|1] (default: 0)\n\
14037                           encode EVEX instructions with specific EVEX.W value\n\
14038                            for EVEX.W bit ignored instructions\n"));
14039   fprintf (stream, _("\
14040   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
14041                           encode EVEX instructions with specific EVEX.RC value\n\
14042                            for SAE-only ignored instructions\n"));
14043   fprintf (stream, _("\
14044   -mmnemonic=[att|intel] "));
14045   if (SYSV386_COMPAT)
14046     fprintf (stream, _("(default: att)\n"));
14047   else
14048     fprintf (stream, _("(default: intel)\n"));
14049   fprintf (stream, _("\
14050                           use AT&T/Intel mnemonic\n"));
14051   fprintf (stream, _("\
14052   -msyntax=[att|intel] (default: att)\n\
14053                           use AT&T/Intel syntax\n"));
14054   fprintf (stream, _("\
14055   -mindex-reg             support pseudo index registers\n"));
14056   fprintf (stream, _("\
14057   -mnaked-reg             don't require `%%' prefix for registers\n"));
14058   fprintf (stream, _("\
14059   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
14060 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14061   fprintf (stream, _("\
14062   -mshared                disable branch optimization for shared code\n"));
14063   fprintf (stream, _("\
14064   -mx86-used-note=[no|yes] "));
14065   if (DEFAULT_X86_USED_NOTE)
14066     fprintf (stream, _("(default: yes)\n"));
14067   else
14068     fprintf (stream, _("(default: no)\n"));
14069   fprintf (stream, _("\
14070                           generate x86 used ISA and feature properties\n"));
14071 #endif
14072 #if defined (TE_PE) || defined (TE_PEP)
14073   fprintf (stream, _("\
14074   -mbig-obj               generate big object files\n"));
14075 #endif
14076   fprintf (stream, _("\
14077   -momit-lock-prefix=[no|yes] (default: no)\n\
14078                           strip all lock prefixes\n"));
14079   fprintf (stream, _("\
14080   -mfence-as-lock-add=[no|yes] (default: no)\n\
14081                           encode lfence, mfence and sfence as\n\
14082                            lock addl $0x0, (%%{re}sp)\n"));
14083   fprintf (stream, _("\
14084   -mrelax-relocations=[no|yes] "));
14085   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
14086     fprintf (stream, _("(default: yes)\n"));
14087   else
14088     fprintf (stream, _("(default: no)\n"));
14089   fprintf (stream, _("\
14090                           generate relax relocations\n"));
14091   fprintf (stream, _("\
14092   -malign-branch-boundary=NUM (default: 0)\n\
14093                           align branches within NUM byte boundary\n"));
14094   fprintf (stream, _("\
14095   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
14096                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
14097                            indirect\n\
14098                           specify types of branches to align\n"));
14099   fprintf (stream, _("\
14100   -malign-branch-prefix-size=NUM (default: 5)\n\
14101                           align branches with NUM prefixes per instruction\n"));
14102   fprintf (stream, _("\
14103   -mbranches-within-32B-boundaries\n\
14104                           align branches within 32 byte boundary\n"));
14105   fprintf (stream, _("\
14106   -mlfence-after-load=[no|yes] (default: no)\n\
14107                           generate lfence after load\n"));
14108   fprintf (stream, _("\
14109   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
14110                           generate lfence before indirect near branch\n"));
14111   fprintf (stream, _("\
14112   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
14113                           generate lfence before ret\n"));
14114   fprintf (stream, _("\
14115   -mamd64                 accept only AMD64 ISA [default]\n"));
14116   fprintf (stream, _("\
14117   -mintel64               accept only Intel64 ISA\n"));
14118 }
14119
14120 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
14121      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
14122      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
14123
14124 /* Pick the target format to use.  */
14125
14126 const char *
14127 i386_target_format (void)
14128 {
14129   if (startswith (default_arch, "x86_64"))
14130     {
14131       update_code_flag (CODE_64BIT, 1);
14132       if (default_arch[6] == '\0')
14133         x86_elf_abi = X86_64_ABI;
14134       else
14135         x86_elf_abi = X86_64_X32_ABI;
14136     }
14137   else if (!strcmp (default_arch, "i386"))
14138     update_code_flag (CODE_32BIT, 1);
14139   else if (!strcmp (default_arch, "iamcu"))
14140     {
14141       update_code_flag (CODE_32BIT, 1);
14142       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
14143         {
14144           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
14145           cpu_arch_name = "iamcu";
14146           free (cpu_sub_arch_name);
14147           cpu_sub_arch_name = NULL;
14148           cpu_arch_flags = iamcu_flags;
14149           cpu_arch_isa = PROCESSOR_IAMCU;
14150           cpu_arch_isa_flags = iamcu_flags;
14151           if (!cpu_arch_tune_set)
14152             {
14153               cpu_arch_tune = cpu_arch_isa;
14154               cpu_arch_tune_flags = cpu_arch_isa_flags;
14155             }
14156         }
14157       else if (cpu_arch_isa != PROCESSOR_IAMCU)
14158         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
14159                   cpu_arch_name);
14160     }
14161   else
14162     as_fatal (_("unknown architecture"));
14163
14164   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
14165     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
14166   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
14167     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].enable;
14168
14169   switch (OUTPUT_FLAVOR)
14170     {
14171 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
14172     case bfd_target_aout_flavour:
14173       return AOUT_TARGET_FORMAT;
14174 #endif
14175 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
14176 # if defined (TE_PE) || defined (TE_PEP)
14177     case bfd_target_coff_flavour:
14178       if (flag_code == CODE_64BIT)
14179         {
14180           object_64bit = 1;
14181           return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
14182         }
14183       return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
14184 # elif defined (TE_GO32)
14185     case bfd_target_coff_flavour:
14186       return "coff-go32";
14187 # else
14188     case bfd_target_coff_flavour:
14189       return "coff-i386";
14190 # endif
14191 #endif
14192 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14193     case bfd_target_elf_flavour:
14194       {
14195         const char *format;
14196
14197         switch (x86_elf_abi)
14198           {
14199           default:
14200             format = ELF_TARGET_FORMAT;
14201 #ifndef TE_SOLARIS
14202             tls_get_addr = "___tls_get_addr";
14203 #endif
14204             break;
14205           case X86_64_ABI:
14206             use_rela_relocations = 1;
14207             object_64bit = 1;
14208 #ifndef TE_SOLARIS
14209             tls_get_addr = "__tls_get_addr";
14210 #endif
14211             format = ELF_TARGET_FORMAT64;
14212             break;
14213           case X86_64_X32_ABI:
14214             use_rela_relocations = 1;
14215             object_64bit = 1;
14216 #ifndef TE_SOLARIS
14217             tls_get_addr = "__tls_get_addr";
14218 #endif
14219             disallow_64bit_reloc = 1;
14220             format = ELF_TARGET_FORMAT32;
14221             break;
14222           }
14223         if (cpu_arch_isa == PROCESSOR_IAMCU)
14224           {
14225             if (x86_elf_abi != I386_ABI)
14226               as_fatal (_("Intel MCU is 32bit only"));
14227             return ELF_TARGET_IAMCU_FORMAT;
14228           }
14229         else
14230           return format;
14231       }
14232 #endif
14233 #if defined (OBJ_MACH_O)
14234     case bfd_target_mach_o_flavour:
14235       if (flag_code == CODE_64BIT)
14236         {
14237           use_rela_relocations = 1;
14238           object_64bit = 1;
14239           return "mach-o-x86-64";
14240         }
14241       else
14242         return "mach-o-i386";
14243 #endif
14244     default:
14245       abort ();
14246       return NULL;
14247     }
14248 }
14249
14250 #endif /* OBJ_MAYBE_ more than one  */
14251 \f
14252 symbolS *
14253 md_undefined_symbol (char *name)
14254 {
14255   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
14256       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
14257       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
14258       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
14259     {
14260       if (!GOT_symbol)
14261         {
14262           if (symbol_find (name))
14263             as_bad (_("GOT already in symbol table"));
14264           GOT_symbol = symbol_new (name, undefined_section,
14265                                    &zero_address_frag, 0);
14266         };
14267       return GOT_symbol;
14268     }
14269   return 0;
14270 }
14271
14272 /* Round up a section size to the appropriate boundary.  */
14273
14274 valueT
14275 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
14276 {
14277 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
14278   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
14279     {
14280       /* For a.out, force the section size to be aligned.  If we don't do
14281          this, BFD will align it for us, but it will not write out the
14282          final bytes of the section.  This may be a bug in BFD, but it is
14283          easier to fix it here since that is how the other a.out targets
14284          work.  */
14285       int align;
14286
14287       align = bfd_section_alignment (segment);
14288       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
14289     }
14290 #endif
14291
14292   return size;
14293 }
14294
14295 /* On the i386, PC-relative offsets are relative to the start of the
14296    next instruction.  That is, the address of the offset, plus its
14297    size, since the offset is always the last part of the insn.  */
14298
14299 long
14300 md_pcrel_from (fixS *fixP)
14301 {
14302   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
14303 }
14304
14305 #ifndef I386COFF
14306
14307 static void
14308 s_bss (int ignore ATTRIBUTE_UNUSED)
14309 {
14310   int temp;
14311
14312 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14313   if (IS_ELF)
14314     obj_elf_section_change_hook ();
14315 #endif
14316   temp = get_absolute_expression ();
14317   subseg_set (bss_section, (subsegT) temp);
14318   demand_empty_rest_of_line ();
14319 }
14320
14321 #endif
14322
14323 /* Remember constant directive.  */
14324
14325 void
14326 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
14327 {
14328   if (last_insn.kind != last_insn_directive
14329       && (bfd_section_flags (now_seg) & SEC_CODE))
14330     {
14331       last_insn.seg = now_seg;
14332       last_insn.kind = last_insn_directive;
14333       last_insn.name = "constant directive";
14334       last_insn.file = as_where (&last_insn.line);
14335       if (lfence_before_ret != lfence_before_ret_none)
14336         {
14337           if (lfence_before_indirect_branch != lfence_branch_none)
14338             as_warn (_("constant directive skips -mlfence-before-ret "
14339                        "and -mlfence-before-indirect-branch"));
14340           else
14341             as_warn (_("constant directive skips -mlfence-before-ret"));
14342         }
14343       else if (lfence_before_indirect_branch != lfence_branch_none)
14344         as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
14345     }
14346 }
14347
14348 int
14349 i386_validate_fix (fixS *fixp)
14350 {
14351   if (fixp->fx_addsy && S_GET_SEGMENT(fixp->fx_addsy) == reg_section)
14352     {
14353       reloc_howto_type *howto;
14354
14355       howto = bfd_reloc_type_lookup (stdoutput, fixp->fx_r_type);
14356       as_bad_where (fixp->fx_file, fixp->fx_line,
14357                     _("invalid %s relocation against register"),
14358                     howto ? howto->name : "<unknown>");
14359       return 0;
14360     }
14361
14362 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14363   if (fixp->fx_r_type == BFD_RELOC_SIZE32
14364       || fixp->fx_r_type == BFD_RELOC_SIZE64)
14365     return IS_ELF && fixp->fx_addsy
14366            && (!S_IS_DEFINED (fixp->fx_addsy)
14367                || S_IS_EXTERNAL (fixp->fx_addsy));
14368 #endif
14369
14370   if (fixp->fx_subsy)
14371     {
14372       if (fixp->fx_subsy == GOT_symbol)
14373         {
14374           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
14375             {
14376               if (!object_64bit)
14377                 abort ();
14378 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14379               if (fixp->fx_tcbit2)
14380                 fixp->fx_r_type = (fixp->fx_tcbit
14381                                    ? BFD_RELOC_X86_64_REX_GOTPCRELX
14382                                    : BFD_RELOC_X86_64_GOTPCRELX);
14383               else
14384 #endif
14385                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
14386             }
14387           else
14388             {
14389               if (!object_64bit)
14390                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
14391               else
14392                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
14393             }
14394           fixp->fx_subsy = 0;
14395         }
14396     }
14397 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14398   else
14399     {
14400       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
14401          to section.  Since PLT32 relocation must be against symbols,
14402          turn such PLT32 relocation into PC32 relocation.  */
14403       if (fixp->fx_addsy
14404           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
14405               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
14406           && symbol_section_p (fixp->fx_addsy))
14407         fixp->fx_r_type = BFD_RELOC_32_PCREL;
14408       if (!object_64bit)
14409         {
14410           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
14411               && fixp->fx_tcbit2)
14412             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
14413         }
14414     }
14415 #endif
14416
14417   return 1;
14418 }
14419
14420 arelent *
14421 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
14422 {
14423   arelent *rel;
14424   bfd_reloc_code_real_type code;
14425
14426   switch (fixp->fx_r_type)
14427     {
14428 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14429       symbolS *sym;
14430
14431     case BFD_RELOC_SIZE32:
14432     case BFD_RELOC_SIZE64:
14433       if (fixp->fx_addsy
14434           && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))
14435           && (!fixp->fx_subsy
14436               || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))))
14437         sym = fixp->fx_addsy;
14438       else if (fixp->fx_subsy
14439                && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))
14440                && (!fixp->fx_addsy
14441                    || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))))
14442         sym = fixp->fx_subsy;
14443       else
14444         sym = NULL;
14445       if (IS_ELF && sym && S_IS_DEFINED (sym) && !S_IS_EXTERNAL (sym))
14446         {
14447           /* Resolve size relocation against local symbol to size of
14448              the symbol plus addend.  */
14449           valueT value = S_GET_SIZE (sym);
14450
14451           if (symbol_get_bfdsym (sym)->flags & BSF_SECTION_SYM)
14452             value = bfd_section_size (S_GET_SEGMENT (sym));
14453           if (sym == fixp->fx_subsy)
14454             {
14455               value = -value;
14456               if (fixp->fx_addsy)
14457                 value += S_GET_VALUE (fixp->fx_addsy);
14458             }
14459           else if (fixp->fx_subsy)
14460             value -= S_GET_VALUE (fixp->fx_subsy);
14461           value += fixp->fx_offset;
14462           if (fixp->fx_r_type == BFD_RELOC_SIZE32
14463               && object_64bit
14464               && !fits_in_unsigned_long (value))
14465             as_bad_where (fixp->fx_file, fixp->fx_line,
14466                           _("symbol size computation overflow"));
14467           fixp->fx_addsy = NULL;
14468           fixp->fx_subsy = NULL;
14469           md_apply_fix (fixp, (valueT *) &value, NULL);
14470           return NULL;
14471         }
14472       if (!fixp->fx_addsy || fixp->fx_subsy)
14473         {
14474           as_bad_where (fixp->fx_file, fixp->fx_line,
14475                         "unsupported expression involving @size");
14476           return NULL;
14477         }
14478 #endif
14479       /* Fall through.  */
14480
14481     case BFD_RELOC_X86_64_PLT32:
14482     case BFD_RELOC_X86_64_GOT32:
14483     case BFD_RELOC_X86_64_GOTPCREL:
14484     case BFD_RELOC_X86_64_GOTPCRELX:
14485     case BFD_RELOC_X86_64_REX_GOTPCRELX:
14486     case BFD_RELOC_386_PLT32:
14487     case BFD_RELOC_386_GOT32:
14488     case BFD_RELOC_386_GOT32X:
14489     case BFD_RELOC_386_GOTOFF:
14490     case BFD_RELOC_386_GOTPC:
14491     case BFD_RELOC_386_TLS_GD:
14492     case BFD_RELOC_386_TLS_LDM:
14493     case BFD_RELOC_386_TLS_LDO_32:
14494     case BFD_RELOC_386_TLS_IE_32:
14495     case BFD_RELOC_386_TLS_IE:
14496     case BFD_RELOC_386_TLS_GOTIE:
14497     case BFD_RELOC_386_TLS_LE_32:
14498     case BFD_RELOC_386_TLS_LE:
14499     case BFD_RELOC_386_TLS_GOTDESC:
14500     case BFD_RELOC_386_TLS_DESC_CALL:
14501     case BFD_RELOC_X86_64_TLSGD:
14502     case BFD_RELOC_X86_64_TLSLD:
14503     case BFD_RELOC_X86_64_DTPOFF32:
14504     case BFD_RELOC_X86_64_DTPOFF64:
14505     case BFD_RELOC_X86_64_GOTTPOFF:
14506     case BFD_RELOC_X86_64_TPOFF32:
14507     case BFD_RELOC_X86_64_TPOFF64:
14508     case BFD_RELOC_X86_64_GOTOFF64:
14509     case BFD_RELOC_X86_64_GOTPC32:
14510     case BFD_RELOC_X86_64_GOT64:
14511     case BFD_RELOC_X86_64_GOTPCREL64:
14512     case BFD_RELOC_X86_64_GOTPC64:
14513     case BFD_RELOC_X86_64_GOTPLT64:
14514     case BFD_RELOC_X86_64_PLTOFF64:
14515     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14516     case BFD_RELOC_X86_64_TLSDESC_CALL:
14517     case BFD_RELOC_RVA:
14518     case BFD_RELOC_VTABLE_ENTRY:
14519     case BFD_RELOC_VTABLE_INHERIT:
14520 #ifdef TE_PE
14521     case BFD_RELOC_32_SECREL:
14522     case BFD_RELOC_16_SECIDX:
14523 #endif
14524       code = fixp->fx_r_type;
14525       break;
14526     case BFD_RELOC_X86_64_32S:
14527       if (!fixp->fx_pcrel)
14528         {
14529           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
14530           code = fixp->fx_r_type;
14531           break;
14532         }
14533       /* Fall through.  */
14534     default:
14535       if (fixp->fx_pcrel)
14536         {
14537           switch (fixp->fx_size)
14538             {
14539             default:
14540               as_bad_where (fixp->fx_file, fixp->fx_line,
14541                             _("can not do %d byte pc-relative relocation"),
14542                             fixp->fx_size);
14543               code = BFD_RELOC_32_PCREL;
14544               break;
14545             case 1: code = BFD_RELOC_8_PCREL;  break;
14546             case 2: code = BFD_RELOC_16_PCREL; break;
14547             case 4: code = BFD_RELOC_32_PCREL; break;
14548 #ifdef BFD64
14549             case 8: code = BFD_RELOC_64_PCREL; break;
14550 #endif
14551             }
14552         }
14553       else
14554         {
14555           switch (fixp->fx_size)
14556             {
14557             default:
14558               as_bad_where (fixp->fx_file, fixp->fx_line,
14559                             _("can not do %d byte relocation"),
14560                             fixp->fx_size);
14561               code = BFD_RELOC_32;
14562               break;
14563             case 1: code = BFD_RELOC_8;  break;
14564             case 2: code = BFD_RELOC_16; break;
14565             case 4: code = BFD_RELOC_32; break;
14566 #ifdef BFD64
14567             case 8: code = BFD_RELOC_64; break;
14568 #endif
14569             }
14570         }
14571       break;
14572     }
14573
14574   if ((code == BFD_RELOC_32
14575        || code == BFD_RELOC_32_PCREL
14576        || code == BFD_RELOC_X86_64_32S)
14577       && GOT_symbol
14578       && fixp->fx_addsy == GOT_symbol)
14579     {
14580       if (!object_64bit)
14581         code = BFD_RELOC_386_GOTPC;
14582       else
14583         code = BFD_RELOC_X86_64_GOTPC32;
14584     }
14585   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
14586       && GOT_symbol
14587       && fixp->fx_addsy == GOT_symbol)
14588     {
14589       code = BFD_RELOC_X86_64_GOTPC64;
14590     }
14591
14592   rel = XNEW (arelent);
14593   rel->sym_ptr_ptr = XNEW (asymbol *);
14594   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
14595
14596   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
14597
14598   if (!use_rela_relocations)
14599     {
14600       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
14601          vtable entry to be used in the relocation's section offset.  */
14602       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
14603         rel->address = fixp->fx_offset;
14604 #if defined (OBJ_COFF) && defined (TE_PE)
14605       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
14606         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
14607       else
14608 #endif
14609       rel->addend = 0;
14610     }
14611   /* Use the rela in 64bit mode.  */
14612   else
14613     {
14614       if (disallow_64bit_reloc)
14615         switch (code)
14616           {
14617           case BFD_RELOC_X86_64_DTPOFF64:
14618           case BFD_RELOC_X86_64_TPOFF64:
14619           case BFD_RELOC_64_PCREL:
14620           case BFD_RELOC_X86_64_GOTOFF64:
14621           case BFD_RELOC_X86_64_GOT64:
14622           case BFD_RELOC_X86_64_GOTPCREL64:
14623           case BFD_RELOC_X86_64_GOTPC64:
14624           case BFD_RELOC_X86_64_GOTPLT64:
14625           case BFD_RELOC_X86_64_PLTOFF64:
14626             as_bad_where (fixp->fx_file, fixp->fx_line,
14627                           _("cannot represent relocation type %s in x32 mode"),
14628                           bfd_get_reloc_code_name (code));
14629             break;
14630           default:
14631             break;
14632           }
14633
14634       if (!fixp->fx_pcrel)
14635         rel->addend = fixp->fx_offset;
14636       else
14637         switch (code)
14638           {
14639           case BFD_RELOC_X86_64_PLT32:
14640           case BFD_RELOC_X86_64_GOT32:
14641           case BFD_RELOC_X86_64_GOTPCREL:
14642           case BFD_RELOC_X86_64_GOTPCRELX:
14643           case BFD_RELOC_X86_64_REX_GOTPCRELX:
14644           case BFD_RELOC_X86_64_TLSGD:
14645           case BFD_RELOC_X86_64_TLSLD:
14646           case BFD_RELOC_X86_64_GOTTPOFF:
14647           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14648           case BFD_RELOC_X86_64_TLSDESC_CALL:
14649             rel->addend = fixp->fx_offset - fixp->fx_size;
14650             break;
14651           default:
14652             rel->addend = (section->vma
14653                            - fixp->fx_size
14654                            + fixp->fx_addnumber
14655                            + md_pcrel_from (fixp));
14656             break;
14657           }
14658     }
14659
14660   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
14661   if (rel->howto == NULL)
14662     {
14663       as_bad_where (fixp->fx_file, fixp->fx_line,
14664                     _("cannot represent relocation type %s"),
14665                     bfd_get_reloc_code_name (code));
14666       /* Set howto to a garbage value so that we can keep going.  */
14667       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
14668       gas_assert (rel->howto != NULL);
14669     }
14670
14671   return rel;
14672 }
14673
14674 #include "tc-i386-intel.c"
14675
14676 void
14677 tc_x86_parse_to_dw2regnum (expressionS *exp)
14678 {
14679   int saved_naked_reg;
14680   char saved_register_dot;
14681
14682   saved_naked_reg = allow_naked_reg;
14683   allow_naked_reg = 1;
14684   saved_register_dot = register_chars['.'];
14685   register_chars['.'] = '.';
14686   allow_pseudo_reg = 1;
14687   expression_and_evaluate (exp);
14688   allow_pseudo_reg = 0;
14689   register_chars['.'] = saved_register_dot;
14690   allow_naked_reg = saved_naked_reg;
14691
14692   if (exp->X_op == O_register && exp->X_add_number >= 0)
14693     {
14694       if ((addressT) exp->X_add_number < i386_regtab_size)
14695         {
14696           exp->X_op = O_constant;
14697           exp->X_add_number = i386_regtab[exp->X_add_number]
14698                               .dw2_regnum[flag_code >> 1];
14699         }
14700       else
14701         exp->X_op = O_illegal;
14702     }
14703 }
14704
14705 void
14706 tc_x86_frame_initial_instructions (void)
14707 {
14708   static unsigned int sp_regno[2];
14709
14710   if (!sp_regno[flag_code >> 1])
14711     {
14712       char *saved_input = input_line_pointer;
14713       char sp[][4] = {"esp", "rsp"};
14714       expressionS exp;
14715
14716       input_line_pointer = sp[flag_code >> 1];
14717       tc_x86_parse_to_dw2regnum (&exp);
14718       gas_assert (exp.X_op == O_constant);
14719       sp_regno[flag_code >> 1] = exp.X_add_number;
14720       input_line_pointer = saved_input;
14721     }
14722
14723   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
14724   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
14725 }
14726
14727 int
14728 x86_dwarf2_addr_size (void)
14729 {
14730 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14731   if (x86_elf_abi == X86_64_X32_ABI)
14732     return 4;
14733 #endif
14734   return bfd_arch_bits_per_address (stdoutput) / 8;
14735 }
14736
14737 int
14738 i386_elf_section_type (const char *str, size_t len)
14739 {
14740   if (flag_code == CODE_64BIT
14741       && len == sizeof ("unwind") - 1
14742       && startswith (str, "unwind"))
14743     return SHT_X86_64_UNWIND;
14744
14745   return -1;
14746 }
14747
14748 #ifdef TE_SOLARIS
14749 void
14750 i386_solaris_fix_up_eh_frame (segT sec)
14751 {
14752   if (flag_code == CODE_64BIT)
14753     elf_section_type (sec) = SHT_X86_64_UNWIND;
14754 }
14755 #endif
14756
14757 #ifdef TE_PE
14758 void
14759 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
14760 {
14761   expressionS exp;
14762
14763   exp.X_op = O_secrel;
14764   exp.X_add_symbol = symbol;
14765   exp.X_add_number = 0;
14766   emit_expr (&exp, size);
14767 }
14768 #endif
14769
14770 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14771 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
14772
14773 bfd_vma
14774 x86_64_section_letter (int letter, const char **ptr_msg)
14775 {
14776   if (flag_code == CODE_64BIT)
14777     {
14778       if (letter == 'l')
14779         return SHF_X86_64_LARGE;
14780
14781       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
14782     }
14783   else
14784     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
14785   return -1;
14786 }
14787
14788 bfd_vma
14789 x86_64_section_word (char *str, size_t len)
14790 {
14791   if (len == 5 && flag_code == CODE_64BIT && startswith (str, "large"))
14792     return SHF_X86_64_LARGE;
14793
14794   return -1;
14795 }
14796
14797 static void
14798 handle_large_common (int small ATTRIBUTE_UNUSED)
14799 {
14800   if (flag_code != CODE_64BIT)
14801     {
14802       s_comm_internal (0, elf_common_parse);
14803       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
14804     }
14805   else
14806     {
14807       static segT lbss_section;
14808       asection *saved_com_section_ptr = elf_com_section_ptr;
14809       asection *saved_bss_section = bss_section;
14810
14811       if (lbss_section == NULL)
14812         {
14813           flagword applicable;
14814           segT seg = now_seg;
14815           subsegT subseg = now_subseg;
14816
14817           /* The .lbss section is for local .largecomm symbols.  */
14818           lbss_section = subseg_new (".lbss", 0);
14819           applicable = bfd_applicable_section_flags (stdoutput);
14820           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
14821           seg_info (lbss_section)->bss = 1;
14822
14823           subseg_set (seg, subseg);
14824         }
14825
14826       elf_com_section_ptr = &_bfd_elf_large_com_section;
14827       bss_section = lbss_section;
14828
14829       s_comm_internal (0, elf_common_parse);
14830
14831       elf_com_section_ptr = saved_com_section_ptr;
14832       bss_section = saved_bss_section;
14833     }
14834 }
14835 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */