gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2023 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
  23    x86_64 support by Jan Hubicka (jh@suse.cz)
  24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "gen-sframe.h"
  34 #include "sframe.h"
  35 #include "elf/x86-64.h"
  36 #include "opcodes/i386-init.h"
  37 #include "opcodes/i386-mnem.h"
  38 #include <limits.h>
  39
  40 #ifndef INFER_ADDR_PREFIX
  41 #define INFER_ADDR_PREFIX 1
  42 #endif
  43
  44 #ifndef DEFAULT_ARCH
  45 #define DEFAULT_ARCH "i386"
  46 #endif
  47
  48 #ifndef INLINE
  49 #if __GNUC__ >= 2
  50 #define INLINE __inline__
  51 #else
  52 #define INLINE
  53 #endif
  54 #endif
  55
  56 /* Prefixes will be emitted in the order defined below.
  57    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  58    instruction, and so must come before any prefixes.
  59    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  60    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  61 #define WAIT_PREFIX     0
  62 #define SEG_PREFIX      1
  63 #define ADDR_PREFIX     2
  64 #define DATA_PREFIX     3
  65 #define REP_PREFIX      4
  66 #define HLE_PREFIX      REP_PREFIX
  67 #define BND_PREFIX      REP_PREFIX
  68 #define LOCK_PREFIX     5
  69 #define REX_PREFIX      6       /* must come last.  */
  70 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  71
  72 /* we define the syntax here (modulo base,index,scale syntax) */
  73 #define REGISTER_PREFIX '%'
  74 #define IMMEDIATE_PREFIX '$'
  75 #define ABSOLUTE_PREFIX '*'
  76
  77 /* these are the instruction mnemonic suffixes in AT&T syntax or
  78    memory operand size in Intel syntax.  */
  79 #define WORD_MNEM_SUFFIX  'w'
  80 #define BYTE_MNEM_SUFFIX  'b'
  81 #define SHORT_MNEM_SUFFIX 's'
  82 #define LONG_MNEM_SUFFIX  'l'
  83 #define QWORD_MNEM_SUFFIX  'q'
  84
  85 #define END_OF_INSN '\0'
  86
  87 #define OPERAND_TYPE_NONE { .bitfield = { .class = ClassNone } }
  88
  89 /* This matches the C -> StaticRounding alias in the opcode table.  */
  90 #define commutative staticrounding
  91
  92 /*
  93   'templates' is for grouping together 'template' structures for opcodes
  94   of the same name.  This is only used for storing the insns in the grand
  95   ole hash table of insns.
  96   The templates themselves start at START and range up to (but not including)
  97   END.
  98   */
  99 typedef struct
 100 {
 101   const insn_template *start;
 102   const insn_template *end;
 103 }
 104 templates;
 105
 106 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 107 typedef struct
 108 {
 109   unsigned int regmem;  /* codes register or memory operand */
 110   unsigned int reg;     /* codes register operand (or extended opcode) */
 111   unsigned int mode;    /* how to interpret regmem & reg */
 112 }
 113 modrm_byte;
 114
 115 /* x86-64 extension prefix.  */
 116 typedef int rex_byte;
 117
 118 /* 386 opcode byte to code indirect addressing.  */
 119 typedef struct
 120 {
 121   unsigned base;
 122   unsigned index;
 123   unsigned scale;
 124 }
 125 sib_byte;
 126
 127 /* x86 arch names, types and features */
 128 typedef struct
 129 {
 130   const char *name;             /* arch name */
 131   unsigned int len:8;           /* arch string length */
 132   bool skip:1;                  /* show_arch should skip this. */
 133   enum processor_type type;     /* arch type */
 134   i386_cpu_flags enable;                /* cpu feature enable flags */
 135   i386_cpu_flags disable;       /* cpu feature disable flags */
 136 }
 137 arch_entry;
 138
 139 static void update_code_flag (int, int);
 140 static void set_code_flag (int);
 141 static void set_16bit_gcc_code_flag (int);
 142 static void set_intel_syntax (int);
 143 static void set_intel_mnemonic (int);
 144 static void set_allow_index_reg (int);
 145 static void set_check (int);
 146 static void set_cpu_arch (int);
 147 #ifdef TE_PE
 148 static void pe_directive_secrel (int);
 149 static void pe_directive_secidx (int);
 150 #endif
 151 static void signed_cons (int);
 152 static char *output_invalid (int c);
 153 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 154                                     const char *);
 155 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 156                                        const char *);
 157 static int i386_att_operand (char *);
 158 static int i386_intel_operand (char *, int);
 159 static int i386_intel_simplify (expressionS *);
 160 static int i386_intel_parse_name (const char *, expressionS *);
 161 static const reg_entry *parse_register (char *, char **);
 162 static const char *parse_insn (const char *, char *);
 163 static char *parse_operands (char *, const char *);
 164 static void swap_operands (void);
 165 static void swap_2_operands (unsigned int, unsigned int);
 166 static enum flag_code i386_addressing_mode (void);
 167 static void optimize_imm (void);
 168 static bool optimize_disp (const insn_template *t);
 169 static const insn_template *match_template (char);
 170 static int check_string (void);
 171 static int process_suffix (void);
 172 static int check_byte_reg (void);
 173 static int check_long_reg (void);
 174 static int check_qword_reg (void);
 175 static int check_word_reg (void);
 176 static int finalize_imm (void);
 177 static int process_operands (void);
 178 static const reg_entry *build_modrm_byte (void);
 179 static void output_insn (void);
 180 static void output_imm (fragS *, offsetT);
 181 static void output_disp (fragS *, offsetT);
 182 #ifndef I386COFF
 183 static void s_bss (int);
 184 #endif
 185 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 186 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 187
 188 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 189 static unsigned int x86_isa_1_used;
 190 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 191 static unsigned int x86_feature_2_used;
 192 /* Generate x86 used ISA and feature properties.  */
 193 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 194 #endif
 195
 196 static const char *default_arch = DEFAULT_ARCH;
 197
 198 /* parse_register() returns this when a register alias cannot be used.  */
 199 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 200                                    { Dw2Inval, Dw2Inval } };
 201
 202 static const reg_entry *reg_eax;
 203 static const reg_entry *reg_ds;
 204 static const reg_entry *reg_es;
 205 static const reg_entry *reg_ss;
 206 static const reg_entry *reg_st0;
 207 static const reg_entry *reg_k0;
 208
 209 /* VEX prefix.  */
 210 typedef struct
 211 {
 212   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 213   unsigned char bytes[4];
 214   unsigned int length;
 215   /* Destination or source register specifier.  */
 216   const reg_entry *register_specifier;
 217 } vex_prefix;
 218
 219 /* 'md_assemble ()' gathers together information and puts it into a
 220    i386_insn.  */
 221
 222 union i386_op
 223   {
 224     expressionS *disps;
 225     expressionS *imms;
 226     const reg_entry *regs;
 227   };
 228
 229 enum i386_error
 230   {
 231     no_error, /* Must be first.  */
 232     operand_size_mismatch,
 233     operand_type_mismatch,
 234     register_type_mismatch,
 235     number_of_operands_mismatch,
 236     invalid_instruction_suffix,
 237     bad_imm4,
 238     unsupported_with_intel_mnemonic,
 239     unsupported_syntax,
 240     unsupported,
 241     unsupported_on_arch,
 242     unsupported_64bit,
 243     invalid_sib_address,
 244     invalid_vsib_address,
 245     invalid_vector_register_set,
 246     invalid_tmm_register_set,
 247     invalid_dest_and_src_register_set,
 248     unsupported_vector_index_register,
 249     unsupported_broadcast,
 250     broadcast_needed,
 251     unsupported_masking,
 252     mask_not_on_destination,
 253     no_default_mask,
 254     unsupported_rc_sae,
 255     invalid_register_operand,
 256   };
 257
 258 struct _i386_insn
 259   {
 260     /* TM holds the template for the insn were currently assembling.  */
 261     insn_template tm;
 262
 263     /* SUFFIX holds the instruction size suffix for byte, word, dword
 264        or qword, if given.  */
 265     char suffix;
 266
 267     /* OPCODE_LENGTH holds the number of base opcode bytes.  */
 268     unsigned char opcode_length;
 269
 270     /* OPERANDS gives the number of given operands.  */
 271     unsigned int operands;
 272
 273     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 274        of given register, displacement, memory operands and immediate
 275        operands.  */
 276     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 277
 278     /* TYPES [i] is the type (see above #defines) which tells us how to
 279        use OP[i] for the corresponding operand.  */
 280     i386_operand_type types[MAX_OPERANDS];
 281
 282     /* Displacement expression, immediate expression, or register for each
 283        operand.  */
 284     union i386_op op[MAX_OPERANDS];
 285
 286     /* Flags for operands.  */
 287     unsigned int flags[MAX_OPERANDS];
 288 #define Operand_PCrel 1
 289 #define Operand_Mem   2
 290
 291     /* Relocation type for operand */
 292     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 293
 294     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 295        the base index byte below.  */
 296     const reg_entry *base_reg;
 297     const reg_entry *index_reg;
 298     unsigned int log2_scale_factor;
 299
 300     /* SEG gives the seg_entries of this insn.  They are zero unless
 301        explicit segment overrides are given.  */
 302     const reg_entry *seg[2];
 303
 304     /* PREFIX holds all the given prefix opcodes (usually null).
 305        PREFIXES is the number of prefix opcodes.  */
 306     unsigned int prefixes;
 307     unsigned char prefix[MAX_PREFIXES];
 308
 309     /* Register is in low 3 bits of opcode.  */
 310     bool short_form;
 311
 312     /* The operand to a branch insn indicates an absolute branch.  */
 313     bool jumpabsolute;
 314
 315     /* The operand to a branch insn indicates a far branch.  */
 316     bool far_branch;
 317
 318     /* There is a memory operand of (%dx) which should be only used
 319        with input/output instructions.  */
 320     bool input_output_operand;
 321
 322     /* Extended states.  */
 323     enum
 324       {
 325         /* Use MMX state.  */
 326         xstate_mmx = 1 << 0,
 327         /* Use XMM state.  */
 328         xstate_xmm = 1 << 1,
 329         /* Use YMM state.  */
 330         xstate_ymm = 1 << 2 | xstate_xmm,
 331         /* Use ZMM state.  */
 332         xstate_zmm = 1 << 3 | xstate_ymm,
 333         /* Use TMM state.  */
 334         xstate_tmm = 1 << 4,
 335         /* Use MASK state.  */
 336         xstate_mask = 1 << 5
 337       } xstate;
 338
 339     /* Has GOTPC or TLS relocation.  */
 340     bool has_gotpc_tls_reloc;
 341
 342     /* RM and SIB are the modrm byte and the sib byte where the
 343        addressing modes of this insn are encoded.  */
 344     modrm_byte rm;
 345     rex_byte rex;
 346     rex_byte vrex;
 347     sib_byte sib;
 348     vex_prefix vex;
 349
 350     /* Masking attributes.
 351
 352        The struct describes masking, applied to OPERAND in the instruction.
 353        REG is a pointer to the corresponding mask register.  ZEROING tells
 354        whether merging or zeroing mask is used.  */
 355     struct Mask_Operation
 356     {
 357       const reg_entry *reg;
 358       unsigned int zeroing;
 359       /* The operand where this operation is associated.  */
 360       unsigned int operand;
 361     } mask;
 362
 363     /* Rounding control and SAE attributes.  */
 364     struct RC_Operation
 365     {
 366       enum rc_type
 367         {
 368           rc_none = -1,
 369           rne,
 370           rd,
 371           ru,
 372           rz,
 373           saeonly
 374         } type;
 375       /* In Intel syntax the operand modifier form is supposed to be used, but
 376          we continue to accept the immediate forms as well.  */
 377       bool modifier;
 378     } rounding;
 379
 380     /* Broadcasting attributes.
 381
 382        The struct describes broadcasting, applied to OPERAND.  TYPE is
 383        expresses the broadcast factor.  */
 384     struct Broadcast_Operation
 385     {
 386       /* Type of broadcast: {1to2}, {1to4}, {1to8}, {1to16} or {1to32}.  */
 387       unsigned int type;
 388
 389       /* Index of broadcasted operand.  */
 390       unsigned int operand;
 391
 392       /* Number of bytes to broadcast.  */
 393       unsigned int bytes;
 394     } broadcast;
 395
 396     /* Compressed disp8*N attribute.  */
 397     unsigned int memshift;
 398
 399     /* Prefer load or store in encoding.  */
 400     enum
 401       {
 402         dir_encoding_default = 0,
 403         dir_encoding_load,
 404         dir_encoding_store,
 405         dir_encoding_swap
 406       } dir_encoding;
 407
 408     /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 409     enum
 410       {
 411         disp_encoding_default = 0,
 412         disp_encoding_8bit,
 413         disp_encoding_16bit,
 414         disp_encoding_32bit
 415       } disp_encoding;
 416
 417     /* Prefer the REX byte in encoding.  */
 418     bool rex_encoding;
 419
 420     /* Disable instruction size optimization.  */
 421     bool no_optimize;
 422
 423     /* How to encode vector instructions.  */
 424     enum
 425       {
 426         vex_encoding_default = 0,
 427         vex_encoding_vex,
 428         vex_encoding_vex3,
 429         vex_encoding_evex,
 430         vex_encoding_error
 431       } vec_encoding;
 432
 433     /* REP prefix.  */
 434     const char *rep_prefix;
 435
 436     /* HLE prefix.  */
 437     const char *hle_prefix;
 438
 439     /* Have BND prefix.  */
 440     const char *bnd_prefix;
 441
 442     /* Have NOTRACK prefix.  */
 443     const char *notrack_prefix;
 444
 445     /* Error message.  */
 446     enum i386_error error;
 447   };
 448
 449 typedef struct _i386_insn i386_insn;
 450
 451 /* Link RC type with corresponding string, that'll be looked for in
 452    asm.  */
 453 struct RC_name
 454 {
 455   enum rc_type type;
 456   const char *name;
 457   unsigned int len;
 458 };
 459
 460 static const struct RC_name RC_NamesTable[] =
 461 {
 462   {  rne, STRING_COMMA_LEN ("rn-sae") },
 463   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 464   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 465   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 466   {  saeonly,  STRING_COMMA_LEN ("sae") },
 467 };
 468
 469 /* To be indexed by segment register number.  */
 470 static const unsigned char i386_seg_prefixes[] = {
 471   ES_PREFIX_OPCODE,
 472   CS_PREFIX_OPCODE,
 473   SS_PREFIX_OPCODE,
 474   DS_PREFIX_OPCODE,
 475   FS_PREFIX_OPCODE,
 476   GS_PREFIX_OPCODE
 477 };
 478
 479 /* List of chars besides those in app.c:symbol_chars that can start an
 480    operand.  Used to prevent the scrubber eating vital white-space.  */
 481 const char extra_symbol_chars[] = "*%-([{}"
 482 #ifdef LEX_AT
 483         "@"
 484 #endif
 485 #ifdef LEX_QM
 486         "?"
 487 #endif
 488         ;
 489
 490 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))     \
 491      && !defined (TE_GNU)                               \
 492      && !defined (TE_LINUX)                             \
 493      && !defined (TE_Haiku)                             \
 494      && !defined (TE_FreeBSD)                           \
 495      && !defined (TE_DragonFly)                         \
 496      && !defined (TE_NetBSD))
 497 /* This array holds the chars that always start a comment.  If the
 498    pre-processor is disabled, these aren't very useful.  The option
 499    --divide will remove '/' from this list.  */
 500 const char *i386_comment_chars = "#/";
 501 #define SVR4_COMMENT_CHARS 1
 502 #define PREFIX_SEPARATOR '\\'
 503
 504 #else
 505 const char *i386_comment_chars = "#";
 506 #define PREFIX_SEPARATOR '/'
 507 #endif
 508
 509 /* This array holds the chars that only start a comment at the beginning of
 510    a line.  If the line seems to have the form '# 123 filename'
 511    .line and .file directives will appear in the pre-processed output.
 512    Note that input_file.c hand checks for '#' at the beginning of the
 513    first line of the input file.  This is because the compiler outputs
 514    #NO_APP at the beginning of its output.
 515    Also note that comments started like this one will always work if
 516    '/' isn't otherwise defined.  */
 517 const char line_comment_chars[] = "#/";
 518
 519 const char line_separator_chars[] = ";";
 520
 521 /* Chars that can be used to separate mant from exp in floating point
 522    nums.  */
 523 const char EXP_CHARS[] = "eE";
 524
 525 /* Chars that mean this number is a floating point constant
 526    As in 0f12.456
 527    or    0d1.2345e12.  */
 528 const char FLT_CHARS[] = "fFdDxXhHbB";
 529
 530 /* Tables for lexical analysis.  */
 531 static char mnemonic_chars[256];
 532 static char register_chars[256];
 533 static char operand_chars[256];
 534 static char identifier_chars[256];
 535
 536 /* Lexical macros.  */
 537 #define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
 538 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 539 #define is_register_char(x) (register_chars[(unsigned char) x])
 540 #define is_space_char(x) ((x) == ' ')
 541 #define is_identifier_char(x) (identifier_chars[(unsigned char) x])
 542
 543 /* All non-digit non-letter characters that may occur in an operand.  */
 544 static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!:[@]";
 545
 546 /* md_assemble() always leaves the strings it's passed unaltered.  To
 547    effect this we maintain a stack of saved characters that we've smashed
 548    with '\0's (indicating end of strings for various sub-fields of the
 549    assembler instruction).  */
 550 static char save_stack[32];
 551 static char *save_stack_p;
 552 #define END_STRING_AND_SAVE(s) \
 553         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 554 #define RESTORE_END_STRING(s) \
 555         do { *(s) = *--save_stack_p; } while (0)
 556
 557 /* The instruction we're assembling.  */
 558 static i386_insn i;
 559
 560 /* Possible templates for current insn.  */
 561 static const templates *current_templates;
 562
 563 /* Per instruction expressionS buffers: max displacements & immediates.  */
 564 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 565 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 566
 567 /* Current operand we are working on.  */
 568 static int this_operand = -1;
 569
 570 /* We support four different modes.  FLAG_CODE variable is used to distinguish
 571    these.  */
 572
 573 enum flag_code {
 574         CODE_32BIT,
 575         CODE_16BIT,
 576         CODE_64BIT };
 577
 578 static enum flag_code flag_code;
 579 static unsigned int object_64bit;
 580 static unsigned int disallow_64bit_reloc;
 581 static int use_rela_relocations = 0;
 582 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 583 static const char *tls_get_addr;
 584
 585 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
 586      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
 587      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
 588
 589 /* The ELF ABI to use.  */
 590 enum x86_elf_abi
 591 {
 592   I386_ABI,
 593   X86_64_ABI,
 594   X86_64_X32_ABI
 595 };
 596
 597 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 598 #endif
 599
 600 #if defined (TE_PE) || defined (TE_PEP)
 601 /* Use big object file format.  */
 602 static int use_big_obj = 0;
 603 #endif
 604
 605 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 606 /* 1 if generating code for a shared library.  */
 607 static int shared = 0;
 608
 609 unsigned int x86_sframe_cfa_sp_reg;
 610 /* The other CFA base register for SFrame stack trace info.  */
 611 unsigned int x86_sframe_cfa_fp_reg;
 612 unsigned int x86_sframe_cfa_ra_reg;
 613
 614 #endif
 615
 616 /* 1 for intel syntax,
 617    0 if att syntax.  */
 618 static int intel_syntax = 0;
 619
 620 static enum x86_64_isa
 621 {
 622   amd64 = 1,    /* AMD64 ISA.  */
 623   intel64       /* Intel64 ISA.  */
 624 } isa64;
 625
 626 /* 1 for intel mnemonic,
 627    0 if att mnemonic.  */
 628 static int intel_mnemonic = !SYSV386_COMPAT;
 629
 630 /* 1 if pseudo registers are permitted.  */
 631 static int allow_pseudo_reg = 0;
 632
 633 /* 1 if register prefix % not required.  */
 634 static int allow_naked_reg = 0;
 635
 636 /* 1 if the assembler should add BND prefix for all control-transferring
 637    instructions supporting it, even if this prefix wasn't specified
 638    explicitly.  */
 639 static int add_bnd_prefix = 0;
 640
 641 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 642 static int allow_index_reg = 0;
 643
 644 /* 1 if the assembler should ignore LOCK prefix, even if it was
 645    specified explicitly.  */
 646 static int omit_lock_prefix = 0;
 647
 648 /* 1 if the assembler should encode lfence, mfence, and sfence as
 649    "lock addl $0, (%{re}sp)".  */
 650 static int avoid_fence = 0;
 651
 652 /* 1 if lfence should be inserted after every load.  */
 653 static int lfence_after_load = 0;
 654
 655 /* Non-zero if lfence should be inserted before indirect branch.  */
 656 static enum lfence_before_indirect_branch_kind
 657   {
 658     lfence_branch_none = 0,
 659     lfence_branch_register,
 660     lfence_branch_memory,
 661     lfence_branch_all
 662   }
 663 lfence_before_indirect_branch;
 664
 665 /* Non-zero if lfence should be inserted before ret.  */
 666 static enum lfence_before_ret_kind
 667   {
 668     lfence_before_ret_none = 0,
 669     lfence_before_ret_not,
 670     lfence_before_ret_or,
 671     lfence_before_ret_shl
 672   }
 673 lfence_before_ret;
 674
 675 /* Types of previous instruction is .byte or prefix.  */
 676 static struct
 677   {
 678     segT seg;
 679     const char *file;
 680     const char *name;
 681     unsigned int line;
 682     enum last_insn_kind
 683       {
 684         last_insn_other = 0,
 685         last_insn_directive,
 686         last_insn_prefix
 687       } kind;
 688   } last_insn;
 689
 690 /* 1 if the assembler should generate relax relocations.  */
 691
 692 static int generate_relax_relocations
 693   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 694
 695 static enum check_kind
 696   {
 697     check_none = 0,
 698     check_warning,
 699     check_error
 700   }
 701 sse_check, operand_check = check_warning;
 702
 703 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 704 static int align_branch_power = 0;
 705
 706 /* Types of branches to align.  */
 707 enum align_branch_kind
 708   {
 709     align_branch_none = 0,
 710     align_branch_jcc = 1,
 711     align_branch_fused = 2,
 712     align_branch_jmp = 3,
 713     align_branch_call = 4,
 714     align_branch_indirect = 5,
 715     align_branch_ret = 6
 716   };
 717
 718 /* Type bits of branches to align.  */
 719 enum align_branch_bit
 720   {
 721     align_branch_jcc_bit = 1 << align_branch_jcc,
 722     align_branch_fused_bit = 1 << align_branch_fused,
 723     align_branch_jmp_bit = 1 << align_branch_jmp,
 724     align_branch_call_bit = 1 << align_branch_call,
 725     align_branch_indirect_bit = 1 << align_branch_indirect,
 726     align_branch_ret_bit = 1 << align_branch_ret
 727   };
 728
 729 static unsigned int align_branch = (align_branch_jcc_bit
 730                                     | align_branch_fused_bit
 731                                     | align_branch_jmp_bit);
 732
 733 /* Types of condition jump used by macro-fusion.  */
 734 enum mf_jcc_kind
 735   {
 736     mf_jcc_jo = 0,  /* base opcode 0x70  */
 737     mf_jcc_jc,      /* base opcode 0x72  */
 738     mf_jcc_je,      /* base opcode 0x74  */
 739     mf_jcc_jna,     /* base opcode 0x76  */
 740     mf_jcc_js,      /* base opcode 0x78  */
 741     mf_jcc_jp,      /* base opcode 0x7a  */
 742     mf_jcc_jl,      /* base opcode 0x7c  */
 743     mf_jcc_jle,     /* base opcode 0x7e  */
 744   };
 745
 746 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 747 enum mf_cmp_kind
 748   {
 749     mf_cmp_test_and,  /* test/cmp */
 750     mf_cmp_alu_cmp,  /* add/sub/cmp */
 751     mf_cmp_incdec  /* inc/dec */
 752   };
 753
 754 /* The maximum padding size for fused jcc.  CMP like instruction can
 755    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 756    prefixes.   */
 757 #define MAX_FUSED_JCC_PADDING_SIZE 20
 758
 759 /* The maximum number of prefixes added for an instruction.  */
 760 static unsigned int align_branch_prefix_size = 5;
 761
 762 /* Optimization:
 763    1. Clear the REX_W bit with register operand if possible.
 764    2. Above plus use 128bit vector instruction to clear the full vector
 765       register.
 766  */
 767 static int optimize = 0;
 768
 769 /* Optimization:
 770    1. Clear the REX_W bit with register operand if possible.
 771    2. Above plus use 128bit vector instruction to clear the full vector
 772       register.
 773    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 774       "testb $imm7,%r8".
 775  */
 776 static int optimize_for_space = 0;
 777
 778 /* Register prefix used for error message.  */
 779 static const char *register_prefix = "%";
 780
 781 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 782    leave, push, and pop instructions so that gcc has the same stack
 783    frame as in 32 bit mode.  */
 784 static char stackop_size = '\0';
 785
 786 /* Non-zero to optimize code alignment.  */
 787 int optimize_align_code = 1;
 788
 789 /* Non-zero to quieten some warnings.  */
 790 static int quiet_warnings = 0;
 791
 792 /* Guard to avoid repeated warnings about non-16-bit code on 16-bit CPUs.  */
 793 static bool pre_386_16bit_warned;
 794
 795 /* CPU name.  */
 796 static const char *cpu_arch_name = NULL;
 797 static char *cpu_sub_arch_name = NULL;
 798
 799 /* CPU feature flags.  */
 800 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 801
 802 /* If we have selected a cpu we are generating instructions for.  */
 803 static int cpu_arch_tune_set = 0;
 804
 805 /* Cpu we are generating instructions for.  */
 806 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 807
 808 /* CPU feature flags of cpu we are generating instructions for.  */
 809 static i386_cpu_flags cpu_arch_tune_flags;
 810
 811 /* CPU instruction set architecture used.  */
 812 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 813
 814 /* CPU feature flags of instruction set architecture used.  */
 815 i386_cpu_flags cpu_arch_isa_flags;
 816
 817 /* If set, conditional jumps are not automatically promoted to handle
 818    larger than a byte offset.  */
 819 static bool no_cond_jump_promotion = false;
 820
 821 /* Encode SSE instructions with VEX prefix.  */
 822 static unsigned int sse2avx;
 823
 824 /* Encode aligned vector move as unaligned vector move.  */
 825 static unsigned int use_unaligned_vector_move;
 826
 827 /* Encode scalar AVX instructions with specific vector length.  */
 828 static enum
 829   {
 830     vex128 = 0,
 831     vex256
 832   } avxscalar;
 833
 834 /* Encode VEX WIG instructions with specific vex.w.  */
 835 static enum
 836   {
 837     vexw0 = 0,
 838     vexw1
 839   } vexwig;
 840
 841 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 842 static enum
 843   {
 844     evexl128 = 0,
 845     evexl256,
 846     evexl512
 847   } evexlig;
 848
 849 /* Encode EVEX WIG instructions with specific evex.w.  */
 850 static enum
 851   {
 852     evexw0 = 0,
 853     evexw1
 854   } evexwig;
 855
 856 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 857 static enum rc_type evexrcig = rne;
 858
 859 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 860 static symbolS *GOT_symbol;
 861
 862 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 863 unsigned int x86_dwarf2_return_column;
 864
 865 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 866 int x86_cie_data_alignment;
 867
 868 /* Interface to relax_segment.
 869    There are 3 major relax states for 386 jump insns because the
 870    different types of jumps add different sizes to frags when we're
 871    figuring out what sort of jump to choose to reach a given label.
 872
 873    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 874    branches which are handled by md_estimate_size_before_relax() and
 875    i386_generic_table_relax_frag().  */
 876
 877 /* Types.  */
 878 #define UNCOND_JUMP 0
 879 #define COND_JUMP 1
 880 #define COND_JUMP86 2
 881 #define BRANCH_PADDING 3
 882 #define BRANCH_PREFIX 4
 883 #define FUSED_JCC_PADDING 5
 884
 885 /* Sizes.  */
 886 #define CODE16  1
 887 #define SMALL   0
 888 #define SMALL16 (SMALL | CODE16)
 889 #define BIG     2
 890 #define BIG16   (BIG | CODE16)
 891
 892 #ifndef INLINE
 893 #ifdef __GNUC__
 894 #define INLINE __inline__
 895 #else
 896 #define INLINE
 897 #endif
 898 #endif
 899
 900 #define ENCODE_RELAX_STATE(type, size) \
 901   ((relax_substateT) (((type) << 2) | (size)))
 902 #define TYPE_FROM_RELAX_STATE(s) \
 903   ((s) >> 2)
 904 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 905     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 906
 907 /* This table is used by relax_frag to promote short jumps to long
 908    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 909    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 910    don't allow a short jump in a 32 bit code segment to be promoted to
 911    a 16 bit offset jump because it's slower (requires data size
 912    prefix), and doesn't work, unless the destination is in the bottom
 913    64k of the code segment (The top 16 bits of eip are zeroed).  */
 914
 915 const relax_typeS md_relax_table[] =
 916 {
 917   /* The fields are:
 918      1) most positive reach of this state,
 919      2) most negative reach of this state,
 920      3) how many bytes this mode will have in the variable part of the frag
 921      4) which index into the table to try if we can't fit into this one.  */
 922
 923   /* UNCOND_JUMP states.  */
 924   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 925   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 926   /* dword jmp adds 4 bytes to frag:
 927      0 extra opcode bytes, 4 displacement bytes.  */
 928   {0, 0, 4, 0},
 929   /* word jmp adds 2 byte2 to frag:
 930      0 extra opcode bytes, 2 displacement bytes.  */
 931   {0, 0, 2, 0},
 932
 933   /* COND_JUMP states.  */
 934   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 935   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 936   /* dword conditionals adds 5 bytes to frag:
 937      1 extra opcode byte, 4 displacement bytes.  */
 938   {0, 0, 5, 0},
 939   /* word conditionals add 3 bytes to frag:
 940      1 extra opcode byte, 2 displacement bytes.  */
 941   {0, 0, 3, 0},
 942
 943   /* COND_JUMP86 states.  */
 944   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 945   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 946   /* dword conditionals adds 5 bytes to frag:
 947      1 extra opcode byte, 4 displacement bytes.  */
 948   {0, 0, 5, 0},
 949   /* word conditionals add 4 bytes to frag:
 950      1 displacement byte and a 3 byte long branch insn.  */
 951   {0, 0, 4, 0}
 952 };
 953
 954 #define ARCH(n, t, f, s) \
 955   { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, CPU_ ## f ## _FLAGS, \
 956     CPU_NONE_FLAGS }
 957 #define SUBARCH(n, e, d, s) \
 958   { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, CPU_ ## e ## _FLAGS, \
 959     CPU_ ## d ## _FLAGS }
 960
 961 static const arch_entry cpu_arch[] =
 962 {
 963   /* Do not replace the first two entries - i386_target_format() and
 964      set_cpu_arch() rely on them being there in this order.  */
 965   ARCH (generic32, GENERIC32, GENERIC32, false),
 966   ARCH (generic64, GENERIC64, GENERIC64, false),
 967   ARCH (i8086, UNKNOWN, NONE, false),
 968   ARCH (i186, UNKNOWN, 186, false),
 969   ARCH (i286, UNKNOWN, 286, false),
 970   ARCH (i386, I386, 386, false),
 971   ARCH (i486, I486, 486, false),
 972   ARCH (i586, PENTIUM, 586, false),
 973   ARCH (i686, PENTIUMPRO, 686, false),
 974   ARCH (pentium, PENTIUM, 586, false),
 975   ARCH (pentiumpro, PENTIUMPRO, PENTIUMPRO, false),
 976   ARCH (pentiumii, PENTIUMPRO, P2, false),
 977   ARCH (pentiumiii, PENTIUMPRO, P3, false),
 978   ARCH (pentium4, PENTIUM4, P4, false),
 979   ARCH (prescott, NOCONA, CORE, false),
 980   ARCH (nocona, NOCONA, NOCONA, false),
 981   ARCH (yonah, CORE, CORE, true),
 982   ARCH (core, CORE, CORE, false),
 983   ARCH (merom, CORE2, CORE2, true),
 984   ARCH (core2, CORE2, CORE2, false),
 985   ARCH (corei7, COREI7, COREI7, false),
 986   ARCH (iamcu, IAMCU, IAMCU, false),
 987   ARCH (k6, K6, K6, false),
 988   ARCH (k6_2, K6, K6_2, false),
 989   ARCH (athlon, ATHLON, ATHLON, false),
 990   ARCH (sledgehammer, K8, K8, true),
 991   ARCH (opteron, K8, K8, false),
 992   ARCH (k8, K8, K8, false),
 993   ARCH (amdfam10, AMDFAM10, AMDFAM10, false),
 994   ARCH (bdver1, BD, BDVER1, false),
 995   ARCH (bdver2, BD, BDVER2, false),
 996   ARCH (bdver3, BD, BDVER3, false),
 997   ARCH (bdver4, BD, BDVER4, false),
 998   ARCH (znver1, ZNVER, ZNVER1, false),
 999   ARCH (znver2, ZNVER, ZNVER2, false),
1000   ARCH (znver3, ZNVER, ZNVER3, false),
1001   ARCH (znver4, ZNVER, ZNVER4, false),
1002   ARCH (btver1, BT, BTVER1, false),
1003   ARCH (btver2, BT, BTVER2, false),
1004
1005   SUBARCH (8087, 8087, ANY_8087, false),
1006   SUBARCH (87, NONE, ANY_8087, false), /* Disable only!  */
1007   SUBARCH (287, 287, ANY_287, false),
1008   SUBARCH (387, 387, ANY_387, false),
1009   SUBARCH (687, 687, ANY_687, false),
1010   SUBARCH (cmov, CMOV, CMOV, false),
1011   SUBARCH (fxsr, FXSR, ANY_FXSR, false),
1012   SUBARCH (mmx, MMX, ANY_MMX, false),
1013   SUBARCH (sse, SSE, ANY_SSE, false),
1014   SUBARCH (sse2, SSE2, ANY_SSE2, false),
1015   SUBARCH (sse3, SSE3, ANY_SSE3, false),
1016   SUBARCH (sse4a, SSE4A, ANY_SSE4A, false),
1017   SUBARCH (ssse3, SSSE3, ANY_SSSE3, false),
1018   SUBARCH (sse4.1, SSE4_1, ANY_SSE4_1, false),
1019   SUBARCH (sse4.2, SSE4_2, ANY_SSE4_2, false),
1020   SUBARCH (sse4, SSE4_2, ANY_SSE4_1, false),
1021   SUBARCH (avx, AVX, ANY_AVX, false),
1022   SUBARCH (avx2, AVX2, ANY_AVX2, false),
1023   SUBARCH (avx512f, AVX512F, ANY_AVX512F, false),
1024   SUBARCH (avx512cd, AVX512CD, ANY_AVX512CD, false),
1025   SUBARCH (avx512er, AVX512ER, ANY_AVX512ER, false),
1026   SUBARCH (avx512pf, AVX512PF, ANY_AVX512PF, false),
1027   SUBARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, false),
1028   SUBARCH (avx512bw, AVX512BW, ANY_AVX512BW, false),
1029   SUBARCH (avx512vl, AVX512VL, ANY_AVX512VL, false),
1030   SUBARCH (monitor, MONITOR, MONITOR, false),
1031   SUBARCH (vmx, VMX, ANY_VMX, false),
1032   SUBARCH (vmfunc, VMFUNC, ANY_VMFUNC, false),
1033   SUBARCH (smx, SMX, SMX, false),
1034   SUBARCH (xsave, XSAVE, ANY_XSAVE, false),
1035   SUBARCH (xsaveopt, XSAVEOPT, ANY_XSAVEOPT, false),
1036   SUBARCH (xsavec, XSAVEC, ANY_XSAVEC, false),
1037   SUBARCH (xsaves, XSAVES, ANY_XSAVES, false),
1038   SUBARCH (aes, AES, ANY_AES, false),
1039   SUBARCH (pclmul, PCLMUL, ANY_PCLMUL, false),
1040   SUBARCH (clmul, PCLMUL, ANY_PCLMUL, true),
1041   SUBARCH (fsgsbase, FSGSBASE, FSGSBASE, false),
1042   SUBARCH (rdrnd, RDRND, RDRND, false),
1043   SUBARCH (f16c, F16C, ANY_F16C, false),
1044   SUBARCH (bmi2, BMI2, BMI2, false),
1045   SUBARCH (fma, FMA, ANY_FMA, false),
1046   SUBARCH (fma4, FMA4, ANY_FMA4, false),
1047   SUBARCH (xop, XOP, ANY_XOP, false),
1048   SUBARCH (lwp, LWP, ANY_LWP, false),
1049   SUBARCH (movbe, MOVBE, MOVBE, false),
1050   SUBARCH (cx16, CX16, CX16, false),
1051   SUBARCH (lahf_sahf, LAHF_SAHF, LAHF_SAHF, false),
1052   SUBARCH (ept, EPT, ANY_EPT, false),
1053   SUBARCH (lzcnt, LZCNT, LZCNT, false),
1054   SUBARCH (popcnt, POPCNT, POPCNT, false),
1055   SUBARCH (hle, HLE, HLE, false),
1056   SUBARCH (rtm, RTM, ANY_RTM, false),
1057   SUBARCH (tsx, TSX, TSX, false),
1058   SUBARCH (invpcid, INVPCID, INVPCID, false),
1059   SUBARCH (clflush, CLFLUSH, CLFLUSH, false),
1060   SUBARCH (nop, NOP, NOP, false),
1061   SUBARCH (syscall, SYSCALL, SYSCALL, false),
1062   SUBARCH (rdtscp, RDTSCP, RDTSCP, false),
1063   SUBARCH (3dnow, 3DNOW, ANY_3DNOW, false),
1064   SUBARCH (3dnowa, 3DNOWA, ANY_3DNOWA, false),
1065   SUBARCH (padlock, PADLOCK, PADLOCK, false),
1066   SUBARCH (pacifica, SVME, ANY_SVME, true),
1067   SUBARCH (svme, SVME, ANY_SVME, false),
1068   SUBARCH (abm, ABM, ABM, false),
1069   SUBARCH (bmi, BMI, BMI, false),
1070   SUBARCH (tbm, TBM, TBM, false),
1071   SUBARCH (adx, ADX, ADX, false),
1072   SUBARCH (rdseed, RDSEED, RDSEED, false),
1073   SUBARCH (prfchw, PRFCHW, PRFCHW, false),
1074   SUBARCH (smap, SMAP, SMAP, false),
1075   SUBARCH (mpx, MPX, ANY_MPX, false),
1076   SUBARCH (sha, SHA, ANY_SHA, false),
1077   SUBARCH (clflushopt, CLFLUSHOPT, CLFLUSHOPT, false),
1078   SUBARCH (prefetchwt1, PREFETCHWT1, PREFETCHWT1, false),
1079   SUBARCH (se1, SE1, SE1, false),
1080   SUBARCH (clwb, CLWB, CLWB, false),
1081   SUBARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, false),
1082   SUBARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, false),
1083   SUBARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, false),
1084   SUBARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, false),
1085   SUBARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, false),
1086   SUBARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, false),
1087   SUBARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, false),
1088   SUBARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, false),
1089   SUBARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, false),
1090   SUBARCH (clzero, CLZERO, CLZERO, false),
1091   SUBARCH (mwaitx, MWAITX, MWAITX, false),
1092   SUBARCH (ospke, OSPKE, ANY_OSPKE, false),
1093   SUBARCH (rdpid, RDPID, RDPID, false),
1094   SUBARCH (ptwrite, PTWRITE, PTWRITE, false),
1095   SUBARCH (ibt, IBT, IBT, false),
1096   SUBARCH (shstk, SHSTK, SHSTK, false),
1097   SUBARCH (gfni, GFNI, ANY_GFNI, false),
1098   SUBARCH (vaes, VAES, ANY_VAES, false),
1099   SUBARCH (vpclmulqdq, VPCLMULQDQ, ANY_VPCLMULQDQ, false),
1100   SUBARCH (wbnoinvd, WBNOINVD, WBNOINVD, false),
1101   SUBARCH (pconfig, PCONFIG, PCONFIG, false),
1102   SUBARCH (waitpkg, WAITPKG, WAITPKG, false),
1103   SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
1104   SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
1105   SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
1106   SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
1107   SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
1108   SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
1109   SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
1110   SUBARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, false),
1111   SUBARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
1112            ANY_AVX512_VP2INTERSECT, false),
1113   SUBARCH (tdx, TDX, TDX, false),
1114   SUBARCH (enqcmd, ENQCMD, ENQCMD, false),
1115   SUBARCH (serialize, SERIALIZE, SERIALIZE, false),
1116   SUBARCH (rdpru, RDPRU, RDPRU, false),
1117   SUBARCH (mcommit, MCOMMIT, MCOMMIT, false),
1118   SUBARCH (sev_es, SEV_ES, ANY_SEV_ES, false),
1119   SUBARCH (tsxldtrk, TSXLDTRK, ANY_TSXLDTRK, false),
1120   SUBARCH (kl, KL, ANY_KL, false),
1121   SUBARCH (widekl, WIDEKL, ANY_WIDEKL, false),
1122   SUBARCH (uintr, UINTR, UINTR, false),
1123   SUBARCH (hreset, HRESET, HRESET, false),
1124   SUBARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, false),
1125   SUBARCH (prefetchi, PREFETCHI, PREFETCHI, false),
1126   SUBARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, false),
1127   SUBARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, false),
1128   SUBARCH (cmpccxadd, CMPCCXADD, CMPCCXADD, false),
1129   SUBARCH (wrmsrns, WRMSRNS, WRMSRNS, false),
1130   SUBARCH (msrlist, MSRLIST, MSRLIST, false),
1131   SUBARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, false),
1132   SUBARCH (rao_int, RAO_INT, RAO_INT, false),
1133   SUBARCH (rmpquery, RMPQUERY, ANY_RMPQUERY, false),
1134 };
1135
1136 #undef SUBARCH
1137 #undef ARCH
1138
1139 #ifdef I386COFF
1140 /* Like s_lcomm_internal in gas/read.c but the alignment string
1141    is allowed to be optional.  */
1142
1143 static symbolS *
1144 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1145 {
1146   addressT align = 0;
1147
1148   SKIP_WHITESPACE ();
1149
1150   if (needs_align
1151       && *input_line_pointer == ',')
1152     {
1153       align = parse_align (needs_align - 1);
1154
1155       if (align == (addressT) -1)
1156         return NULL;
1157     }
1158   else
1159     {
1160       if (size >= 8)
1161         align = 3;
1162       else if (size >= 4)
1163         align = 2;
1164       else if (size >= 2)
1165         align = 1;
1166       else
1167         align = 0;
1168     }
1169
1170   bss_alloc (symbolP, size, align);
1171   return symbolP;
1172 }
1173
1174 static void
1175 pe_lcomm (int needs_align)
1176 {
1177   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1178 }
1179 #endif
1180
1181 const pseudo_typeS md_pseudo_table[] =
1182 {
1183 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1184   {"align", s_align_bytes, 0},
1185 #else
1186   {"align", s_align_ptwo, 0},
1187 #endif
1188   {"arch", set_cpu_arch, 0},
1189 #ifndef I386COFF
1190   {"bss", s_bss, 0},
1191 #else
1192   {"lcomm", pe_lcomm, 1},
1193 #endif
1194   {"ffloat", float_cons, 'f'},
1195   {"dfloat", float_cons, 'd'},
1196   {"tfloat", float_cons, 'x'},
1197   {"hfloat", float_cons, 'h'},
1198   {"bfloat16", float_cons, 'b'},
1199   {"value", cons, 2},
1200   {"slong", signed_cons, 4},
1201   {"noopt", s_ignore, 0},
1202   {"optim", s_ignore, 0},
1203   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1204   {"code16", set_code_flag, CODE_16BIT},
1205   {"code32", set_code_flag, CODE_32BIT},
1206 #ifdef BFD64
1207   {"code64", set_code_flag, CODE_64BIT},
1208 #endif
1209   {"intel_syntax", set_intel_syntax, 1},
1210   {"att_syntax", set_intel_syntax, 0},
1211   {"intel_mnemonic", set_intel_mnemonic, 1},
1212   {"att_mnemonic", set_intel_mnemonic, 0},
1213   {"allow_index_reg", set_allow_index_reg, 1},
1214   {"disallow_index_reg", set_allow_index_reg, 0},
1215   {"sse_check", set_check, 0},
1216   {"operand_check", set_check, 1},
1217 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1218   {"largecomm", handle_large_common, 0},
1219 #else
1220   {"file", dwarf2_directive_file, 0},
1221   {"loc", dwarf2_directive_loc, 0},
1222   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1223 #endif
1224 #ifdef TE_PE
1225   {"secrel32", pe_directive_secrel, 0},
1226   {"secidx", pe_directive_secidx, 0},
1227 #endif
1228   {0, 0, 0}
1229 };
1230
1231 /* For interface with expression ().  */
1232 extern char *input_line_pointer;
1233
1234 /* Hash table for instruction mnemonic lookup.  */
1235 static htab_t op_hash;
1236
1237 /* Hash table for register lookup.  */
1238 static htab_t reg_hash;
1239 \f
1240   /* Various efficient no-op patterns for aligning code labels.
1241      Note: Don't try to assemble the instructions in the comments.
1242      0L and 0w are not legal.  */
1243 static const unsigned char f32_1[] =
1244   {0x90};                               /* nop                  */
1245 static const unsigned char f32_2[] =
1246   {0x66,0x90};                          /* xchg %ax,%ax         */
1247 static const unsigned char f32_3[] =
1248   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1249 static const unsigned char f32_4[] =
1250   {0x8d,0x74,0x26,0x00};                /* leal 0(%esi,1),%esi  */
1251 static const unsigned char f32_6[] =
1252   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1253 static const unsigned char f32_7[] =
1254   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
1255 static const unsigned char f16_3[] =
1256   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1257 static const unsigned char f16_4[] =
1258   {0x8d,0xb4,0x00,0x00};                /* lea 0W(%si),%si      */
1259 static const unsigned char jump_disp8[] =
1260   {0xeb};                               /* jmp disp8           */
1261 static const unsigned char jump32_disp32[] =
1262   {0xe9};                               /* jmp disp32          */
1263 static const unsigned char jump16_disp32[] =
1264   {0x66,0xe9};                          /* jmp disp32          */
1265 /* 32-bit NOPs patterns.  */
1266 static const unsigned char *const f32_patt[] = {
1267   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1268 };
1269 /* 16-bit NOPs patterns.  */
1270 static const unsigned char *const f16_patt[] = {
1271   f32_1, f32_2, f16_3, f16_4
1272 };
1273 /* nopl (%[re]ax) */
1274 static const unsigned char alt_3[] =
1275   {0x0f,0x1f,0x00};
1276 /* nopl 0(%[re]ax) */
1277 static const unsigned char alt_4[] =
1278   {0x0f,0x1f,0x40,0x00};
1279 /* nopl 0(%[re]ax,%[re]ax,1) */
1280 static const unsigned char alt_5[] =
1281   {0x0f,0x1f,0x44,0x00,0x00};
1282 /* nopw 0(%[re]ax,%[re]ax,1) */
1283 static const unsigned char alt_6[] =
1284   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1285 /* nopl 0L(%[re]ax) */
1286 static const unsigned char alt_7[] =
1287   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1288 /* nopl 0L(%[re]ax,%[re]ax,1) */
1289 static const unsigned char alt_8[] =
1290   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1291 /* nopw 0L(%[re]ax,%[re]ax,1) */
1292 static const unsigned char alt_9[] =
1293   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1294 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1295 static const unsigned char alt_10[] =
1296   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1297 /* data16 nopw %cs:0L(%eax,%eax,1) */
1298 static const unsigned char alt_11[] =
1299   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1300 /* 32-bit and 64-bit NOPs patterns.  */
1301 static const unsigned char *const alt_patt[] = {
1302   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1303   alt_9, alt_10, alt_11
1304 };
1305
1306 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1307    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1308
1309 static void
1310 i386_output_nops (char *where, const unsigned char *const *patt,
1311                   int count, int max_single_nop_size)
1312
1313 {
1314   /* Place the longer NOP first.  */
1315   int last;
1316   int offset;
1317   const unsigned char *nops;
1318
1319   if (max_single_nop_size < 1)
1320     {
1321       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1322                 max_single_nop_size);
1323       return;
1324     }
1325
1326   nops = patt[max_single_nop_size - 1];
1327
1328   /* Use the smaller one if the requsted one isn't available.  */
1329   if (nops == NULL)
1330     {
1331       max_single_nop_size--;
1332       nops = patt[max_single_nop_size - 1];
1333     }
1334
1335   last = count % max_single_nop_size;
1336
1337   count -= last;
1338   for (offset = 0; offset < count; offset += max_single_nop_size)
1339     memcpy (where + offset, nops, max_single_nop_size);
1340
1341   if (last)
1342     {
1343       nops = patt[last - 1];
1344       if (nops == NULL)
1345         {
1346           /* Use the smaller one plus one-byte NOP if the needed one
1347              isn't available.  */
1348           last--;
1349           nops = patt[last - 1];
1350           memcpy (where + offset, nops, last);
1351           where[offset + last] = *patt[0];
1352         }
1353       else
1354         memcpy (where + offset, nops, last);
1355     }
1356 }
1357
1358 static INLINE int
1359 fits_in_imm7 (offsetT num)
1360 {
1361   return (num & 0x7f) == num;
1362 }
1363
1364 static INLINE int
1365 fits_in_imm31 (offsetT num)
1366 {
1367   return (num & 0x7fffffff) == num;
1368 }
1369
1370 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1371    single NOP instruction LIMIT.  */
1372
1373 void
1374 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1375 {
1376   const unsigned char *const *patt = NULL;
1377   int max_single_nop_size;
1378   /* Maximum number of NOPs before switching to jump over NOPs.  */
1379   int max_number_of_nops;
1380
1381   switch (fragP->fr_type)
1382     {
1383     case rs_fill_nop:
1384     case rs_align_code:
1385       break;
1386     case rs_machine_dependent:
1387       /* Allow NOP padding for jumps and calls.  */
1388       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1389           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1390         break;
1391       /* Fall through.  */
1392     default:
1393       return;
1394     }
1395
1396   /* We need to decide which NOP sequence to use for 32bit and
1397      64bit. When -mtune= is used:
1398
1399      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1400      PROCESSOR_GENERIC32, f32_patt will be used.
1401      2. For the rest, alt_patt will be used.
1402
1403      When -mtune= isn't used, alt_patt will be used if
1404      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1405      be used.
1406
1407      When -march= or .arch is used, we can't use anything beyond
1408      cpu_arch_isa_flags.   */
1409
1410   if (flag_code == CODE_16BIT)
1411     {
1412       patt = f16_patt;
1413       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1414       /* Limit number of NOPs to 2 in 16-bit mode.  */
1415       max_number_of_nops = 2;
1416     }
1417   else
1418     {
1419       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1420         {
1421           /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1422           switch (cpu_arch_tune)
1423             {
1424             case PROCESSOR_UNKNOWN:
1425               /* We use cpu_arch_isa_flags to check if we SHOULD
1426                  optimize with nops.  */
1427               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1428                 patt = alt_patt;
1429               else
1430                 patt = f32_patt;
1431               break;
1432             case PROCESSOR_PENTIUM4:
1433             case PROCESSOR_NOCONA:
1434             case PROCESSOR_CORE:
1435             case PROCESSOR_CORE2:
1436             case PROCESSOR_COREI7:
1437             case PROCESSOR_GENERIC64:
1438             case PROCESSOR_K6:
1439             case PROCESSOR_ATHLON:
1440             case PROCESSOR_K8:
1441             case PROCESSOR_AMDFAM10:
1442             case PROCESSOR_BD:
1443             case PROCESSOR_ZNVER:
1444             case PROCESSOR_BT:
1445               patt = alt_patt;
1446               break;
1447             case PROCESSOR_I386:
1448             case PROCESSOR_I486:
1449             case PROCESSOR_PENTIUM:
1450             case PROCESSOR_PENTIUMPRO:
1451             case PROCESSOR_IAMCU:
1452             case PROCESSOR_GENERIC32:
1453               patt = f32_patt;
1454               break;
1455             case PROCESSOR_NONE:
1456               abort ();
1457             }
1458         }
1459       else
1460         {
1461           switch (fragP->tc_frag_data.tune)
1462             {
1463             case PROCESSOR_UNKNOWN:
1464               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1465                  PROCESSOR_UNKNOWN.  */
1466               abort ();
1467               break;
1468
1469             case PROCESSOR_I386:
1470             case PROCESSOR_I486:
1471             case PROCESSOR_PENTIUM:
1472             case PROCESSOR_IAMCU:
1473             case PROCESSOR_K6:
1474             case PROCESSOR_ATHLON:
1475             case PROCESSOR_K8:
1476             case PROCESSOR_AMDFAM10:
1477             case PROCESSOR_BD:
1478             case PROCESSOR_ZNVER:
1479             case PROCESSOR_BT:
1480             case PROCESSOR_GENERIC32:
1481               /* We use cpu_arch_isa_flags to check if we CAN optimize
1482                  with nops.  */
1483               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1484                 patt = alt_patt;
1485               else
1486                 patt = f32_patt;
1487               break;
1488             case PROCESSOR_PENTIUMPRO:
1489             case PROCESSOR_PENTIUM4:
1490             case PROCESSOR_NOCONA:
1491             case PROCESSOR_CORE:
1492             case PROCESSOR_CORE2:
1493             case PROCESSOR_COREI7:
1494               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1495                 patt = alt_patt;
1496               else
1497                 patt = f32_patt;
1498               break;
1499             case PROCESSOR_GENERIC64:
1500               patt = alt_patt;
1501               break;
1502             case PROCESSOR_NONE:
1503               abort ();
1504             }
1505         }
1506
1507       if (patt == f32_patt)
1508         {
1509           max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1510           /* Limit number of NOPs to 2 for older processors.  */
1511           max_number_of_nops = 2;
1512         }
1513       else
1514         {
1515           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1516           /* Limit number of NOPs to 7 for newer processors.  */
1517           max_number_of_nops = 7;
1518         }
1519     }
1520
1521   if (limit == 0)
1522     limit = max_single_nop_size;
1523
1524   if (fragP->fr_type == rs_fill_nop)
1525     {
1526       /* Output NOPs for .nop directive.  */
1527       if (limit > max_single_nop_size)
1528         {
1529           as_bad_where (fragP->fr_file, fragP->fr_line,
1530                         _("invalid single nop size: %d "
1531                           "(expect within [0, %d])"),
1532                         limit, max_single_nop_size);
1533           return;
1534         }
1535     }
1536   else if (fragP->fr_type != rs_machine_dependent)
1537     fragP->fr_var = count;
1538
1539   if ((count / max_single_nop_size) > max_number_of_nops)
1540     {
1541       /* Generate jump over NOPs.  */
1542       offsetT disp = count - 2;
1543       if (fits_in_imm7 (disp))
1544         {
1545           /* Use "jmp disp8" if possible.  */
1546           count = disp;
1547           where[0] = jump_disp8[0];
1548           where[1] = count;
1549           where += 2;
1550         }
1551       else
1552         {
1553           unsigned int size_of_jump;
1554
1555           if (flag_code == CODE_16BIT)
1556             {
1557               where[0] = jump16_disp32[0];
1558               where[1] = jump16_disp32[1];
1559               size_of_jump = 2;
1560             }
1561           else
1562             {
1563               where[0] = jump32_disp32[0];
1564               size_of_jump = 1;
1565             }
1566
1567           count -= size_of_jump + 4;
1568           if (!fits_in_imm31 (count))
1569             {
1570               as_bad_where (fragP->fr_file, fragP->fr_line,
1571                             _("jump over nop padding out of range"));
1572               return;
1573             }
1574
1575           md_number_to_chars (where + size_of_jump, count, 4);
1576           where += size_of_jump + 4;
1577         }
1578     }
1579
1580   /* Generate multiple NOPs.  */
1581   i386_output_nops (where, patt, count, limit);
1582 }
1583
1584 static INLINE int
1585 operand_type_all_zero (const union i386_operand_type *x)
1586 {
1587   switch (ARRAY_SIZE(x->array))
1588     {
1589     case 3:
1590       if (x->array[2])
1591         return 0;
1592       /* Fall through.  */
1593     case 2:
1594       if (x->array[1])
1595         return 0;
1596       /* Fall through.  */
1597     case 1:
1598       return !x->array[0];
1599     default:
1600       abort ();
1601     }
1602 }
1603
1604 static INLINE void
1605 operand_type_set (union i386_operand_type *x, unsigned int v)
1606 {
1607   switch (ARRAY_SIZE(x->array))
1608     {
1609     case 3:
1610       x->array[2] = v;
1611       /* Fall through.  */
1612     case 2:
1613       x->array[1] = v;
1614       /* Fall through.  */
1615     case 1:
1616       x->array[0] = v;
1617       /* Fall through.  */
1618       break;
1619     default:
1620       abort ();
1621     }
1622
1623   x->bitfield.class = ClassNone;
1624   x->bitfield.instance = InstanceNone;
1625 }
1626
1627 static INLINE int
1628 operand_type_equal (const union i386_operand_type *x,
1629                     const union i386_operand_type *y)
1630 {
1631   switch (ARRAY_SIZE(x->array))
1632     {
1633     case 3:
1634       if (x->array[2] != y->array[2])
1635         return 0;
1636       /* Fall through.  */
1637     case 2:
1638       if (x->array[1] != y->array[1])
1639         return 0;
1640       /* Fall through.  */
1641     case 1:
1642       return x->array[0] == y->array[0];
1643       break;
1644     default:
1645       abort ();
1646     }
1647 }
1648
1649 static INLINE int
1650 cpu_flags_all_zero (const union i386_cpu_flags *x)
1651 {
1652   switch (ARRAY_SIZE(x->array))
1653     {
1654     case 5:
1655       if (x->array[4])
1656         return 0;
1657       /* Fall through.  */
1658     case 4:
1659       if (x->array[3])
1660         return 0;
1661       /* Fall through.  */
1662     case 3:
1663       if (x->array[2])
1664         return 0;
1665       /* Fall through.  */
1666     case 2:
1667       if (x->array[1])
1668         return 0;
1669       /* Fall through.  */
1670     case 1:
1671       return !x->array[0];
1672     default:
1673       abort ();
1674     }
1675 }
1676
1677 static INLINE int
1678 cpu_flags_equal (const union i386_cpu_flags *x,
1679                  const union i386_cpu_flags *y)
1680 {
1681   switch (ARRAY_SIZE(x->array))
1682     {
1683     case 5:
1684       if (x->array[4] != y->array[4])
1685         return 0;
1686       /* Fall through.  */
1687     case 4:
1688       if (x->array[3] != y->array[3])
1689         return 0;
1690       /* Fall through.  */
1691     case 3:
1692       if (x->array[2] != y->array[2])
1693         return 0;
1694       /* Fall through.  */
1695     case 2:
1696       if (x->array[1] != y->array[1])
1697         return 0;
1698       /* Fall through.  */
1699     case 1:
1700       return x->array[0] == y->array[0];
1701       break;
1702     default:
1703       abort ();
1704     }
1705 }
1706
1707 static INLINE int
1708 cpu_flags_check_cpu64 (i386_cpu_flags f)
1709 {
1710   return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
1711            || (flag_code != CODE_64BIT && f.bitfield.cpu64));
1712 }
1713
1714 static INLINE i386_cpu_flags
1715 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1716 {
1717   switch (ARRAY_SIZE (x.array))
1718     {
1719     case 5:
1720       x.array [4] &= y.array [4];
1721       /* Fall through.  */
1722     case 4:
1723       x.array [3] &= y.array [3];
1724       /* Fall through.  */
1725     case 3:
1726       x.array [2] &= y.array [2];
1727       /* Fall through.  */
1728     case 2:
1729       x.array [1] &= y.array [1];
1730       /* Fall through.  */
1731     case 1:
1732       x.array [0] &= y.array [0];
1733       break;
1734     default:
1735       abort ();
1736     }
1737   return x;
1738 }
1739
1740 static INLINE i386_cpu_flags
1741 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1742 {
1743   switch (ARRAY_SIZE (x.array))
1744     {
1745     case 5:
1746       x.array [4] |= y.array [4];
1747       /* Fall through.  */
1748     case 4:
1749       x.array [3] |= y.array [3];
1750       /* Fall through.  */
1751     case 3:
1752       x.array [2] |= y.array [2];
1753       /* Fall through.  */
1754     case 2:
1755       x.array [1] |= y.array [1];
1756       /* Fall through.  */
1757     case 1:
1758       x.array [0] |= y.array [0];
1759       break;
1760     default:
1761       abort ();
1762     }
1763   return x;
1764 }
1765
1766 static INLINE i386_cpu_flags
1767 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1768 {
1769   switch (ARRAY_SIZE (x.array))
1770     {
1771     case 5:
1772       x.array [4] &= ~y.array [4];
1773       /* Fall through.  */
1774     case 4:
1775       x.array [3] &= ~y.array [3];
1776       /* Fall through.  */
1777     case 3:
1778       x.array [2] &= ~y.array [2];
1779       /* Fall through.  */
1780     case 2:
1781       x.array [1] &= ~y.array [1];
1782       /* Fall through.  */
1783     case 1:
1784       x.array [0] &= ~y.array [0];
1785       break;
1786     default:
1787       abort ();
1788     }
1789   return x;
1790 }
1791
1792 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1793
1794 #define CPU_FLAGS_ARCH_MATCH            0x1
1795 #define CPU_FLAGS_64BIT_MATCH           0x2
1796
1797 #define CPU_FLAGS_PERFECT_MATCH \
1798   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1799
1800 /* Return CPU flags match bits. */
1801
1802 static int
1803 cpu_flags_match (const insn_template *t)
1804 {
1805   i386_cpu_flags x = t->cpu_flags;
1806   int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
1807
1808   x.bitfield.cpu64 = 0;
1809   x.bitfield.cpuno64 = 0;
1810
1811   if (cpu_flags_all_zero (&x))
1812     {
1813       /* This instruction is available on all archs.  */
1814       match |= CPU_FLAGS_ARCH_MATCH;
1815     }
1816   else
1817     {
1818       /* This instruction is available only on some archs.  */
1819       i386_cpu_flags cpu = cpu_arch_flags;
1820
1821       /* AVX512VL is no standalone feature - match it and then strip it.  */
1822       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1823         return match;
1824       x.bitfield.cpuavx512vl = 0;
1825
1826       /* AVX and AVX2 present at the same time express an operand size
1827          dependency - strip AVX2 for the purposes here.  The operand size
1828          dependent check occurs in check_vecOperands().  */
1829       if (x.bitfield.cpuavx && x.bitfield.cpuavx2)
1830         x.bitfield.cpuavx2 = 0;
1831
1832       cpu = cpu_flags_and (x, cpu);
1833       if (!cpu_flags_all_zero (&cpu))
1834         {
1835           if (x.bitfield.cpuavx)
1836             {
1837               /* We need to check a few extra flags with AVX.  */
1838               if (cpu.bitfield.cpuavx
1839                   && (!t->opcode_modifier.sse2avx
1840                       || (sse2avx && !i.prefix[DATA_PREFIX]))
1841                   && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1842                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1843                   && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
1844                 match |= CPU_FLAGS_ARCH_MATCH;
1845             }
1846           else if (x.bitfield.cpuavx512f)
1847             {
1848               /* We need to check a few extra flags with AVX512F.  */
1849               if (cpu.bitfield.cpuavx512f
1850                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1851                   && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1852                   && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1853                 match |= CPU_FLAGS_ARCH_MATCH;
1854             }
1855           else
1856             match |= CPU_FLAGS_ARCH_MATCH;
1857         }
1858     }
1859   return match;
1860 }
1861
1862 static INLINE i386_operand_type
1863 operand_type_and (i386_operand_type x, i386_operand_type y)
1864 {
1865   if (x.bitfield.class != y.bitfield.class)
1866     x.bitfield.class = ClassNone;
1867   if (x.bitfield.instance != y.bitfield.instance)
1868     x.bitfield.instance = InstanceNone;
1869
1870   switch (ARRAY_SIZE (x.array))
1871     {
1872     case 3:
1873       x.array [2] &= y.array [2];
1874       /* Fall through.  */
1875     case 2:
1876       x.array [1] &= y.array [1];
1877       /* Fall through.  */
1878     case 1:
1879       x.array [0] &= y.array [0];
1880       break;
1881     default:
1882       abort ();
1883     }
1884   return x;
1885 }
1886
1887 static INLINE i386_operand_type
1888 operand_type_and_not (i386_operand_type x, i386_operand_type y)
1889 {
1890   gas_assert (y.bitfield.class == ClassNone);
1891   gas_assert (y.bitfield.instance == InstanceNone);
1892
1893   switch (ARRAY_SIZE (x.array))
1894     {
1895     case 3:
1896       x.array [2] &= ~y.array [2];
1897       /* Fall through.  */
1898     case 2:
1899       x.array [1] &= ~y.array [1];
1900       /* Fall through.  */
1901     case 1:
1902       x.array [0] &= ~y.array [0];
1903       break;
1904     default:
1905       abort ();
1906     }
1907   return x;
1908 }
1909
1910 static INLINE i386_operand_type
1911 operand_type_or (i386_operand_type x, i386_operand_type y)
1912 {
1913   gas_assert (x.bitfield.class == ClassNone ||
1914               y.bitfield.class == ClassNone ||
1915               x.bitfield.class == y.bitfield.class);
1916   gas_assert (x.bitfield.instance == InstanceNone ||
1917               y.bitfield.instance == InstanceNone ||
1918               x.bitfield.instance == y.bitfield.instance);
1919
1920   switch (ARRAY_SIZE (x.array))
1921     {
1922     case 3:
1923       x.array [2] |= y.array [2];
1924       /* Fall through.  */
1925     case 2:
1926       x.array [1] |= y.array [1];
1927       /* Fall through.  */
1928     case 1:
1929       x.array [0] |= y.array [0];
1930       break;
1931     default:
1932       abort ();
1933     }
1934   return x;
1935 }
1936
1937 static INLINE i386_operand_type
1938 operand_type_xor (i386_operand_type x, i386_operand_type y)
1939 {
1940   gas_assert (y.bitfield.class == ClassNone);
1941   gas_assert (y.bitfield.instance == InstanceNone);
1942
1943   switch (ARRAY_SIZE (x.array))
1944     {
1945     case 3:
1946       x.array [2] ^= y.array [2];
1947       /* Fall through.  */
1948     case 2:
1949       x.array [1] ^= y.array [1];
1950       /* Fall through.  */
1951     case 1:
1952       x.array [0] ^= y.array [0];
1953       break;
1954     default:
1955       abort ();
1956     }
1957   return x;
1958 }
1959
1960 static const i386_operand_type anydisp = {
1961   .bitfield = { .disp8 = 1, .disp16 = 1, .disp32 = 1, .disp64 = 1 }
1962 };
1963
1964 enum operand_type
1965 {
1966   reg,
1967   imm,
1968   disp,
1969   anymem
1970 };
1971
1972 static INLINE int
1973 operand_type_check (i386_operand_type t, enum operand_type c)
1974 {
1975   switch (c)
1976     {
1977     case reg:
1978       return t.bitfield.class == Reg;
1979
1980     case imm:
1981       return (t.bitfield.imm8
1982               || t.bitfield.imm8s
1983               || t.bitfield.imm16
1984               || t.bitfield.imm32
1985               || t.bitfield.imm32s
1986               || t.bitfield.imm64);
1987
1988     case disp:
1989       return (t.bitfield.disp8
1990               || t.bitfield.disp16
1991               || t.bitfield.disp32
1992               || t.bitfield.disp64);
1993
1994     case anymem:
1995       return (t.bitfield.disp8
1996               || t.bitfield.disp16
1997               || t.bitfield.disp32
1998               || t.bitfield.disp64
1999               || t.bitfield.baseindex);
2000
2001     default:
2002       abort ();
2003     }
2004
2005   return 0;
2006 }
2007
2008 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2009    between operand GIVEN and opeand WANTED for instruction template T.  */
2010
2011 static INLINE int
2012 match_operand_size (const insn_template *t, unsigned int wanted,
2013                     unsigned int given)
2014 {
2015   return !((i.types[given].bitfield.byte
2016             && !t->operand_types[wanted].bitfield.byte)
2017            || (i.types[given].bitfield.word
2018                && !t->operand_types[wanted].bitfield.word)
2019            || (i.types[given].bitfield.dword
2020                && !t->operand_types[wanted].bitfield.dword)
2021            || (i.types[given].bitfield.qword
2022                && (!t->operand_types[wanted].bitfield.qword
2023                    /* Don't allow 64-bit (memory) operands outside of 64-bit
2024                       mode, when they're used where a 64-bit GPR could also
2025                       be used.  Checking is needed for Intel Syntax only.  */
2026                    || (intel_syntax
2027                        && flag_code != CODE_64BIT
2028                        && (t->operand_types[wanted].bitfield.class == Reg
2029                            || t->operand_types[wanted].bitfield.class == Accum
2030                            || t->opcode_modifier.isstring))))
2031            || (i.types[given].bitfield.tbyte
2032                && !t->operand_types[wanted].bitfield.tbyte));
2033 }
2034
2035 /* Return 1 if there is no conflict in SIMD register between operand
2036    GIVEN and opeand WANTED for instruction template T.  */
2037
2038 static INLINE int
2039 match_simd_size (const insn_template *t, unsigned int wanted,
2040                  unsigned int given)
2041 {
2042   return !((i.types[given].bitfield.xmmword
2043             && !t->operand_types[wanted].bitfield.xmmword)
2044            || (i.types[given].bitfield.ymmword
2045                && !t->operand_types[wanted].bitfield.ymmword)
2046            || (i.types[given].bitfield.zmmword
2047                && !t->operand_types[wanted].bitfield.zmmword)
2048            || (i.types[given].bitfield.tmmword
2049                && !t->operand_types[wanted].bitfield.tmmword));
2050 }
2051
2052 /* Return 1 if there is no conflict in any size between operand GIVEN
2053    and opeand WANTED for instruction template T.  */
2054
2055 static INLINE int
2056 match_mem_size (const insn_template *t, unsigned int wanted,
2057                 unsigned int given)
2058 {
2059   return (match_operand_size (t, wanted, given)
2060           && !((i.types[given].bitfield.unspecified
2061                 && !i.broadcast.type
2062                 && !i.broadcast.bytes
2063                 && !t->operand_types[wanted].bitfield.unspecified)
2064                || (i.types[given].bitfield.fword
2065                    && !t->operand_types[wanted].bitfield.fword)
2066                /* For scalar opcode templates to allow register and memory
2067                   operands at the same time, some special casing is needed
2068                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2069                   down-conversion vpmov*.  */
2070                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2071                     && t->operand_types[wanted].bitfield.byte
2072                        + t->operand_types[wanted].bitfield.word
2073                        + t->operand_types[wanted].bitfield.dword
2074                        + t->operand_types[wanted].bitfield.qword
2075                        > !!t->opcode_modifier.broadcast)
2076                    ? (i.types[given].bitfield.xmmword
2077                       || i.types[given].bitfield.ymmword
2078                       || i.types[given].bitfield.zmmword)
2079                    : !match_simd_size(t, wanted, given))));
2080 }
2081
2082 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2083    operands for instruction template T, and it has MATCH_REVERSE set if there
2084    is no size conflict on any operands for the template with operands reversed
2085    (and the template allows for reversing in the first place).  */
2086
2087 #define MATCH_STRAIGHT 1
2088 #define MATCH_REVERSE  2
2089
2090 static INLINE unsigned int
2091 operand_size_match (const insn_template *t)
2092 {
2093   unsigned int j, match = MATCH_STRAIGHT;
2094
2095   /* Don't check non-absolute jump instructions.  */
2096   if (t->opcode_modifier.jump
2097       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2098     return match;
2099
2100   /* Check memory and accumulator operand size.  */
2101   for (j = 0; j < i.operands; j++)
2102     {
2103       if (i.types[j].bitfield.class != Reg
2104           && i.types[j].bitfield.class != RegSIMD
2105           && t->opcode_modifier.operandconstraint == ANY_SIZE)
2106         continue;
2107
2108       if (t->operand_types[j].bitfield.class == Reg
2109           && !match_operand_size (t, j, j))
2110         {
2111           match = 0;
2112           break;
2113         }
2114
2115       if (t->operand_types[j].bitfield.class == RegSIMD
2116           && !match_simd_size (t, j, j))
2117         {
2118           match = 0;
2119           break;
2120         }
2121
2122       if (t->operand_types[j].bitfield.instance == Accum
2123           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2124         {
2125           match = 0;
2126           break;
2127         }
2128
2129       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2130         {
2131           match = 0;
2132           break;
2133         }
2134     }
2135
2136   if (!t->opcode_modifier.d)
2137     return match;
2138
2139   /* Check reverse.  */
2140   gas_assert (i.operands >= 2);
2141
2142   for (j = 0; j < i.operands; j++)
2143     {
2144       unsigned int given = i.operands - j - 1;
2145
2146       /* For FMA4 and XOP insns VEX.W controls just the first two
2147          register operands.  */
2148       if (t->cpu_flags.bitfield.cpufma4 || t->cpu_flags.bitfield.cpuxop)
2149         given = j < 2 ? 1 - j : j;
2150
2151       if (t->operand_types[j].bitfield.class == Reg
2152           && !match_operand_size (t, j, given))
2153         return match;
2154
2155       if (t->operand_types[j].bitfield.class == RegSIMD
2156           && !match_simd_size (t, j, given))
2157         return match;
2158
2159       if (t->operand_types[j].bitfield.instance == Accum
2160           && (!match_operand_size (t, j, given)
2161               || !match_simd_size (t, j, given)))
2162         return match;
2163
2164       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2165         return match;
2166     }
2167
2168   return match | MATCH_REVERSE;
2169 }
2170
2171 static INLINE int
2172 operand_type_match (i386_operand_type overlap,
2173                     i386_operand_type given)
2174 {
2175   i386_operand_type temp = overlap;
2176
2177   temp.bitfield.unspecified = 0;
2178   temp.bitfield.byte = 0;
2179   temp.bitfield.word = 0;
2180   temp.bitfield.dword = 0;
2181   temp.bitfield.fword = 0;
2182   temp.bitfield.qword = 0;
2183   temp.bitfield.tbyte = 0;
2184   temp.bitfield.xmmword = 0;
2185   temp.bitfield.ymmword = 0;
2186   temp.bitfield.zmmword = 0;
2187   temp.bitfield.tmmword = 0;
2188   if (operand_type_all_zero (&temp))
2189     goto mismatch;
2190
2191   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2192     return 1;
2193
2194  mismatch:
2195   i.error = operand_type_mismatch;
2196   return 0;
2197 }
2198
2199 /* If given types g0 and g1 are registers they must be of the same type
2200    unless the expected operand type register overlap is null.
2201    Intel syntax sized memory operands are also checked here.  */
2202
2203 static INLINE int
2204 operand_type_register_match (i386_operand_type g0,
2205                              i386_operand_type t0,
2206                              i386_operand_type g1,
2207                              i386_operand_type t1)
2208 {
2209   if (g0.bitfield.class != Reg
2210       && g0.bitfield.class != RegSIMD
2211       && (g0.bitfield.unspecified
2212           || !operand_type_check (g0, anymem)))
2213     return 1;
2214
2215   if (g1.bitfield.class != Reg
2216       && g1.bitfield.class != RegSIMD
2217       && (g1.bitfield.unspecified
2218           || !operand_type_check (g1, anymem)))
2219     return 1;
2220
2221   if (g0.bitfield.byte == g1.bitfield.byte
2222       && g0.bitfield.word == g1.bitfield.word
2223       && g0.bitfield.dword == g1.bitfield.dword
2224       && g0.bitfield.qword == g1.bitfield.qword
2225       && g0.bitfield.xmmword == g1.bitfield.xmmword
2226       && g0.bitfield.ymmword == g1.bitfield.ymmword
2227       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2228     return 1;
2229
2230   /* If expectations overlap in no more than a single size, all is fine. */
2231   g0 = operand_type_and (t0, t1);
2232   if (g0.bitfield.byte
2233       + g0.bitfield.word
2234       + g0.bitfield.dword
2235       + g0.bitfield.qword
2236       + g0.bitfield.xmmword
2237       + g0.bitfield.ymmword
2238       + g0.bitfield.zmmword <= 1)
2239     return 1;
2240
2241   i.error = register_type_mismatch;
2242
2243   return 0;
2244 }
2245
2246 static INLINE unsigned int
2247 register_number (const reg_entry *r)
2248 {
2249   unsigned int nr = r->reg_num;
2250
2251   if (r->reg_flags & RegRex)
2252     nr += 8;
2253
2254   if (r->reg_flags & RegVRex)
2255     nr += 16;
2256
2257   return nr;
2258 }
2259
2260 static INLINE unsigned int
2261 mode_from_disp_size (i386_operand_type t)
2262 {
2263   if (t.bitfield.disp8)
2264     return 1;
2265   else if (t.bitfield.disp16
2266            || t.bitfield.disp32)
2267     return 2;
2268   else
2269     return 0;
2270 }
2271
2272 static INLINE int
2273 fits_in_signed_byte (addressT num)
2274 {
2275   return num + 0x80 <= 0xff;
2276 }
2277
2278 static INLINE int
2279 fits_in_unsigned_byte (addressT num)
2280 {
2281   return num <= 0xff;
2282 }
2283
2284 static INLINE int
2285 fits_in_unsigned_word (addressT num)
2286 {
2287   return num <= 0xffff;
2288 }
2289
2290 static INLINE int
2291 fits_in_signed_word (addressT num)
2292 {
2293   return num + 0x8000 <= 0xffff;
2294 }
2295
2296 static INLINE int
2297 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2298 {
2299 #ifndef BFD64
2300   return 1;
2301 #else
2302   return num + 0x80000000 <= 0xffffffff;
2303 #endif
2304 }                               /* fits_in_signed_long() */
2305
2306 static INLINE int
2307 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2308 {
2309 #ifndef BFD64
2310   return 1;
2311 #else
2312   return num <= 0xffffffff;
2313 #endif
2314 }                               /* fits_in_unsigned_long() */
2315
2316 static INLINE valueT extend_to_32bit_address (addressT num)
2317 {
2318 #ifdef BFD64
2319   if (fits_in_unsigned_long(num))
2320     return (num ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2321
2322   if (!fits_in_signed_long (num))
2323     return num & 0xffffffff;
2324 #endif
2325
2326   return num;
2327 }
2328
2329 static INLINE int
2330 fits_in_disp8 (offsetT num)
2331 {
2332   int shift = i.memshift;
2333   unsigned int mask;
2334
2335   if (shift == -1)
2336     abort ();
2337
2338   mask = (1 << shift) - 1;
2339
2340   /* Return 0 if NUM isn't properly aligned.  */
2341   if ((num & mask))
2342     return 0;
2343
2344   /* Check if NUM will fit in 8bit after shift.  */
2345   return fits_in_signed_byte (num >> shift);
2346 }
2347
2348 static INLINE int
2349 fits_in_imm4 (offsetT num)
2350 {
2351   return (num & 0xf) == num;
2352 }
2353
2354 static i386_operand_type
2355 smallest_imm_type (offsetT num)
2356 {
2357   i386_operand_type t;
2358
2359   operand_type_set (&t, 0);
2360   t.bitfield.imm64 = 1;
2361
2362   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2363     {
2364       /* This code is disabled on the 486 because all the Imm1 forms
2365          in the opcode table are slower on the i486.  They're the
2366          versions with the implicitly specified single-position
2367          displacement, which has another syntax if you really want to
2368          use that form.  */
2369       t.bitfield.imm1 = 1;
2370       t.bitfield.imm8 = 1;
2371       t.bitfield.imm8s = 1;
2372       t.bitfield.imm16 = 1;
2373       t.bitfield.imm32 = 1;
2374       t.bitfield.imm32s = 1;
2375     }
2376   else if (fits_in_signed_byte (num))
2377     {
2378       if (fits_in_unsigned_byte (num))
2379         t.bitfield.imm8 = 1;
2380       t.bitfield.imm8s = 1;
2381       t.bitfield.imm16 = 1;
2382       t.bitfield.imm32 = 1;
2383       t.bitfield.imm32s = 1;
2384     }
2385   else if (fits_in_unsigned_byte (num))
2386     {
2387       t.bitfield.imm8 = 1;
2388       t.bitfield.imm16 = 1;
2389       t.bitfield.imm32 = 1;
2390       t.bitfield.imm32s = 1;
2391     }
2392   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2393     {
2394       t.bitfield.imm16 = 1;
2395       t.bitfield.imm32 = 1;
2396       t.bitfield.imm32s = 1;
2397     }
2398   else if (fits_in_signed_long (num))
2399     {
2400       t.bitfield.imm32 = 1;
2401       t.bitfield.imm32s = 1;
2402     }
2403   else if (fits_in_unsigned_long (num))
2404     t.bitfield.imm32 = 1;
2405
2406   return t;
2407 }
2408
2409 static offsetT
2410 offset_in_range (offsetT val, int size)
2411 {
2412   addressT mask;
2413
2414   switch (size)
2415     {
2416     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2417     case 2: mask = ((addressT) 1 << 16) - 1; break;
2418 #ifdef BFD64
2419     case 4: mask = ((addressT) 1 << 32) - 1; break;
2420 #endif
2421     case sizeof (val): return val;
2422     default: abort ();
2423     }
2424
2425   if ((val & ~mask) != 0 && (-val & ~mask) != 0)
2426     as_warn (_("0x%" PRIx64 " shortened to 0x%" PRIx64),
2427              (uint64_t) val, (uint64_t) (val & mask));
2428
2429   return val & mask;
2430 }
2431
2432 static INLINE const char *insn_name (const insn_template *t)
2433 {
2434   return &i386_mnemonics[t->mnem_off];
2435 }
2436
2437 enum PREFIX_GROUP
2438 {
2439   PREFIX_EXIST = 0,
2440   PREFIX_LOCK,
2441   PREFIX_REP,
2442   PREFIX_DS,
2443   PREFIX_OTHER
2444 };
2445
2446 /* Returns
2447    a. PREFIX_EXIST if attempting to add a prefix where one from the
2448    same class already exists.
2449    b. PREFIX_LOCK if lock prefix is added.
2450    c. PREFIX_REP if rep/repne prefix is added.
2451    d. PREFIX_DS if ds prefix is added.
2452    e. PREFIX_OTHER if other prefix is added.
2453  */
2454
2455 static enum PREFIX_GROUP
2456 add_prefix (unsigned int prefix)
2457 {
2458   enum PREFIX_GROUP ret = PREFIX_OTHER;
2459   unsigned int q;
2460
2461   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2462       && flag_code == CODE_64BIT)
2463     {
2464       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2465           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2466           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2467           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2468         ret = PREFIX_EXIST;
2469       q = REX_PREFIX;
2470     }
2471   else
2472     {
2473       switch (prefix)
2474         {
2475         default:
2476           abort ();
2477
2478         case DS_PREFIX_OPCODE:
2479           ret = PREFIX_DS;
2480           /* Fall through.  */
2481         case CS_PREFIX_OPCODE:
2482         case ES_PREFIX_OPCODE:
2483         case FS_PREFIX_OPCODE:
2484         case GS_PREFIX_OPCODE:
2485         case SS_PREFIX_OPCODE:
2486           q = SEG_PREFIX;
2487           break;
2488
2489         case REPNE_PREFIX_OPCODE:
2490         case REPE_PREFIX_OPCODE:
2491           q = REP_PREFIX;
2492           ret = PREFIX_REP;
2493           break;
2494
2495         case LOCK_PREFIX_OPCODE:
2496           q = LOCK_PREFIX;
2497           ret = PREFIX_LOCK;
2498           break;
2499
2500         case FWAIT_OPCODE:
2501           q = WAIT_PREFIX;
2502           break;
2503
2504         case ADDR_PREFIX_OPCODE:
2505           q = ADDR_PREFIX;
2506           break;
2507
2508         case DATA_PREFIX_OPCODE:
2509           q = DATA_PREFIX;
2510           break;
2511         }
2512       if (i.prefix[q] != 0)
2513         ret = PREFIX_EXIST;
2514     }
2515
2516   if (ret)
2517     {
2518       if (!i.prefix[q])
2519         ++i.prefixes;
2520       i.prefix[q] |= prefix;
2521     }
2522   else
2523     as_bad (_("same type of prefix used twice"));
2524
2525   return ret;
2526 }
2527
2528 static void
2529 update_code_flag (int value, int check)
2530 {
2531   PRINTF_LIKE ((*as_error));
2532
2533   flag_code = (enum flag_code) value;
2534   if (flag_code == CODE_64BIT)
2535     {
2536       cpu_arch_flags.bitfield.cpu64 = 1;
2537       cpu_arch_flags.bitfield.cpuno64 = 0;
2538     }
2539   else
2540     {
2541       cpu_arch_flags.bitfield.cpu64 = 0;
2542       cpu_arch_flags.bitfield.cpuno64 = 1;
2543     }
2544   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
2545     {
2546       if (check)
2547         as_error = as_fatal;
2548       else
2549         as_error = as_bad;
2550       (*as_error) (_("64bit mode not supported on `%s'."),
2551                    cpu_arch_name ? cpu_arch_name : default_arch);
2552     }
2553   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2554     {
2555       if (check)
2556         as_error = as_fatal;
2557       else
2558         as_error = as_bad;
2559       (*as_error) (_("32bit mode not supported on `%s'."),
2560                    cpu_arch_name ? cpu_arch_name : default_arch);
2561     }
2562   stackop_size = '\0';
2563 }
2564
2565 static void
2566 set_code_flag (int value)
2567 {
2568   update_code_flag (value, 0);
2569 }
2570
2571 static void
2572 set_16bit_gcc_code_flag (int new_code_flag)
2573 {
2574   flag_code = (enum flag_code) new_code_flag;
2575   if (flag_code != CODE_16BIT)
2576     abort ();
2577   cpu_arch_flags.bitfield.cpu64 = 0;
2578   cpu_arch_flags.bitfield.cpuno64 = 1;
2579   stackop_size = LONG_MNEM_SUFFIX;
2580 }
2581
2582 static void
2583 set_intel_syntax (int syntax_flag)
2584 {
2585   /* Find out if register prefixing is specified.  */
2586   int ask_naked_reg = 0;
2587
2588   SKIP_WHITESPACE ();
2589   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2590     {
2591       char *string;
2592       int e = get_symbol_name (&string);
2593
2594       if (strcmp (string, "prefix") == 0)
2595         ask_naked_reg = 1;
2596       else if (strcmp (string, "noprefix") == 0)
2597         ask_naked_reg = -1;
2598       else
2599         as_bad (_("bad argument to syntax directive."));
2600       (void) restore_line_pointer (e);
2601     }
2602   demand_empty_rest_of_line ();
2603
2604   intel_syntax = syntax_flag;
2605
2606   if (ask_naked_reg == 0)
2607     allow_naked_reg = (intel_syntax
2608                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2609   else
2610     allow_naked_reg = (ask_naked_reg < 0);
2611
2612   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2613
2614   identifier_chars['%'] = intel_syntax && allow_naked_reg ? '%' : 0;
2615   identifier_chars['$'] = intel_syntax ? '$' : 0;
2616   register_prefix = allow_naked_reg ? "" : "%";
2617 }
2618
2619 static void
2620 set_intel_mnemonic (int mnemonic_flag)
2621 {
2622   intel_mnemonic = mnemonic_flag;
2623 }
2624
2625 static void
2626 set_allow_index_reg (int flag)
2627 {
2628   allow_index_reg = flag;
2629 }
2630
2631 static void
2632 set_check (int what)
2633 {
2634   enum check_kind *kind;
2635   const char *str;
2636
2637   if (what)
2638     {
2639       kind = &operand_check;
2640       str = "operand";
2641     }
2642   else
2643     {
2644       kind = &sse_check;
2645       str = "sse";
2646     }
2647
2648   SKIP_WHITESPACE ();
2649
2650   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2651     {
2652       char *string;
2653       int e = get_symbol_name (&string);
2654
2655       if (strcmp (string, "none") == 0)
2656         *kind = check_none;
2657       else if (strcmp (string, "warning") == 0)
2658         *kind = check_warning;
2659       else if (strcmp (string, "error") == 0)
2660         *kind = check_error;
2661       else
2662         as_bad (_("bad argument to %s_check directive."), str);
2663       (void) restore_line_pointer (e);
2664     }
2665   else
2666     as_bad (_("missing argument for %s_check directive"), str);
2667
2668   demand_empty_rest_of_line ();
2669 }
2670
2671 static void
2672 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2673                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2674 {
2675 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2676   static const char *arch;
2677
2678   /* Intel MCU is only supported on ELF.  */
2679   if (!IS_ELF)
2680     return;
2681
2682   if (!arch)
2683     {
2684       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2685          use default_arch.  */
2686       arch = cpu_arch_name;
2687       if (!arch)
2688         arch = default_arch;
2689     }
2690
2691   /* If we are targeting Intel MCU, we must enable it.  */
2692   if ((get_elf_backend_data (stdoutput)->elf_machine_code == EM_IAMCU)
2693       == new_flag.bitfield.cpuiamcu)
2694     return;
2695
2696   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2697 #endif
2698 }
2699
2700 static void
2701 extend_cpu_sub_arch_name (const char *name)
2702 {
2703   if (cpu_sub_arch_name)
2704     cpu_sub_arch_name = reconcat (cpu_sub_arch_name, cpu_sub_arch_name,
2705                                   ".", name, (const char *) NULL);
2706   else
2707     cpu_sub_arch_name = concat (".", name, (const char *) NULL);
2708 }
2709
2710 static void
2711 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2712 {
2713   typedef struct arch_stack_entry
2714   {
2715     const struct arch_stack_entry *prev;
2716     const char *name;
2717     char *sub_name;
2718     i386_cpu_flags flags;
2719     i386_cpu_flags isa_flags;
2720     enum processor_type isa;
2721     enum flag_code flag_code;
2722     char stackop_size;
2723     bool no_cond_jump_promotion;
2724   } arch_stack_entry;
2725   static const arch_stack_entry *arch_stack_top;
2726
2727   SKIP_WHITESPACE ();
2728
2729   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2730     {
2731       char *s;
2732       int e = get_symbol_name (&s);
2733       const char *string = s;
2734       unsigned int j = 0;
2735       i386_cpu_flags flags;
2736
2737       if (strcmp (string, "default") == 0)
2738         {
2739           if (strcmp (default_arch, "iamcu") == 0)
2740             string = default_arch;
2741           else
2742             {
2743               static const i386_cpu_flags cpu_unknown_flags = CPU_UNKNOWN_FLAGS;
2744
2745               cpu_arch_name = NULL;
2746               free (cpu_sub_arch_name);
2747               cpu_sub_arch_name = NULL;
2748               cpu_arch_flags = cpu_unknown_flags;
2749               if (flag_code == CODE_64BIT)
2750                 {
2751                   cpu_arch_flags.bitfield.cpu64 = 1;
2752                   cpu_arch_flags.bitfield.cpuno64 = 0;
2753                 }
2754               else
2755                 {
2756                   cpu_arch_flags.bitfield.cpu64 = 0;
2757                   cpu_arch_flags.bitfield.cpuno64 = 1;
2758                 }
2759               cpu_arch_isa = PROCESSOR_UNKNOWN;
2760               cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
2761               if (!cpu_arch_tune_set)
2762                 {
2763                   cpu_arch_tune = cpu_arch_isa;
2764                   cpu_arch_tune_flags = cpu_arch_isa_flags;
2765                 }
2766
2767               j = ARRAY_SIZE (cpu_arch) + 1;
2768             }
2769         }
2770       else if (strcmp (string, "push") == 0)
2771         {
2772           arch_stack_entry *top = XNEW (arch_stack_entry);
2773
2774           top->name = cpu_arch_name;
2775           if (cpu_sub_arch_name)
2776             top->sub_name = xstrdup (cpu_sub_arch_name);
2777           else
2778             top->sub_name = NULL;
2779           top->flags = cpu_arch_flags;
2780           top->isa = cpu_arch_isa;
2781           top->isa_flags = cpu_arch_isa_flags;
2782           top->flag_code = flag_code;
2783           top->stackop_size = stackop_size;
2784           top->no_cond_jump_promotion = no_cond_jump_promotion;
2785
2786           top->prev = arch_stack_top;
2787           arch_stack_top = top;
2788
2789           (void) restore_line_pointer (e);
2790           demand_empty_rest_of_line ();
2791           return;
2792         }
2793       else if (strcmp (string, "pop") == 0)
2794         {
2795           const arch_stack_entry *top = arch_stack_top;
2796
2797           if (!top)
2798             as_bad (_(".arch stack is empty"));
2799           else if (top->flag_code != flag_code
2800                    || top->stackop_size != stackop_size)
2801             {
2802               static const unsigned int bits[] = {
2803                 [CODE_16BIT] = 16,
2804                 [CODE_32BIT] = 32,
2805                 [CODE_64BIT] = 64,
2806               };
2807
2808               as_bad (_("this `.arch pop' requires `.code%u%s' to be in effect"),
2809                       bits[top->flag_code],
2810                       top->stackop_size == LONG_MNEM_SUFFIX ? "gcc" : "");
2811             }
2812           else
2813             {
2814               arch_stack_top = top->prev;
2815
2816               cpu_arch_name = top->name;
2817               free (cpu_sub_arch_name);
2818               cpu_sub_arch_name = top->sub_name;
2819               cpu_arch_flags = top->flags;
2820               cpu_arch_isa = top->isa;
2821               cpu_arch_isa_flags = top->isa_flags;
2822               no_cond_jump_promotion = top->no_cond_jump_promotion;
2823
2824               XDELETE (top);
2825             }
2826
2827           (void) restore_line_pointer (e);
2828           demand_empty_rest_of_line ();
2829           return;
2830         }
2831
2832       for (; j < ARRAY_SIZE (cpu_arch); j++)
2833         {
2834           if (strcmp (string + (*string == '.'), cpu_arch[j].name) == 0
2835              && (*string == '.') == (cpu_arch[j].type == PROCESSOR_NONE))
2836             {
2837               if (*string != '.')
2838                 {
2839                   check_cpu_arch_compatible (string, cpu_arch[j].enable);
2840
2841                   cpu_arch_name = cpu_arch[j].name;
2842                   free (cpu_sub_arch_name);
2843                   cpu_sub_arch_name = NULL;
2844                   cpu_arch_flags = cpu_arch[j].enable;
2845                   if (flag_code == CODE_64BIT)
2846                     {
2847                       cpu_arch_flags.bitfield.cpu64 = 1;
2848                       cpu_arch_flags.bitfield.cpuno64 = 0;
2849                     }
2850                   else
2851                     {
2852                       cpu_arch_flags.bitfield.cpu64 = 0;
2853                       cpu_arch_flags.bitfield.cpuno64 = 1;
2854                     }
2855                   cpu_arch_isa = cpu_arch[j].type;
2856                   cpu_arch_isa_flags = cpu_arch[j].enable;
2857                   if (!cpu_arch_tune_set)
2858                     {
2859                       cpu_arch_tune = cpu_arch_isa;
2860                       cpu_arch_tune_flags = cpu_arch_isa_flags;
2861                     }
2862                   pre_386_16bit_warned = false;
2863                   break;
2864                 }
2865
2866               if (cpu_flags_all_zero (&cpu_arch[j].enable))
2867                 continue;
2868
2869               flags = cpu_flags_or (cpu_arch_flags,
2870                                     cpu_arch[j].enable);
2871
2872               if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2873                 {
2874                   extend_cpu_sub_arch_name (string + 1);
2875                   cpu_arch_flags = flags;
2876                   cpu_arch_isa_flags = flags;
2877                 }
2878               else
2879                 cpu_arch_isa_flags
2880                   = cpu_flags_or (cpu_arch_isa_flags,
2881                                   cpu_arch[j].enable);
2882               (void) restore_line_pointer (e);
2883               demand_empty_rest_of_line ();
2884               return;
2885             }
2886         }
2887
2888       if (startswith (string, ".no") && j >= ARRAY_SIZE (cpu_arch))
2889         {
2890           /* Disable an ISA extension.  */
2891           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
2892             if (cpu_arch[j].type == PROCESSOR_NONE
2893                 && strcmp (string + 3, cpu_arch[j].name) == 0)
2894               {
2895                 flags = cpu_flags_and_not (cpu_arch_flags,
2896                                            cpu_arch[j].disable);
2897                 if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2898                   {
2899                     extend_cpu_sub_arch_name (string + 1);
2900                     cpu_arch_flags = flags;
2901                     cpu_arch_isa_flags = flags;
2902                   }
2903                 (void) restore_line_pointer (e);
2904                 demand_empty_rest_of_line ();
2905                 return;
2906               }
2907         }
2908
2909       if (j == ARRAY_SIZE (cpu_arch))
2910         as_bad (_("no such architecture: `%s'"), string);
2911
2912       *input_line_pointer = e;
2913     }
2914   else
2915     as_bad (_("missing cpu architecture"));
2916
2917   no_cond_jump_promotion = 0;
2918   if (*input_line_pointer == ','
2919       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
2920     {
2921       char *string;
2922       char e;
2923
2924       ++input_line_pointer;
2925       e = get_symbol_name (&string);
2926
2927       if (strcmp (string, "nojumps") == 0)
2928         no_cond_jump_promotion = 1;
2929       else if (strcmp (string, "jumps") == 0)
2930         ;
2931       else
2932         as_bad (_("no such architecture modifier: `%s'"), string);
2933
2934       (void) restore_line_pointer (e);
2935     }
2936
2937   demand_empty_rest_of_line ();
2938 }
2939
2940 enum bfd_architecture
2941 i386_arch (void)
2942 {
2943   if (cpu_arch_isa == PROCESSOR_IAMCU)
2944     {
2945       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2946           || flag_code == CODE_64BIT)
2947         as_fatal (_("Intel MCU is 32bit ELF only"));
2948       return bfd_arch_iamcu;
2949     }
2950   else
2951     return bfd_arch_i386;
2952 }
2953
2954 unsigned long
2955 i386_mach (void)
2956 {
2957   if (startswith (default_arch, "x86_64"))
2958     {
2959       if (default_arch[6] == '\0')
2960         return bfd_mach_x86_64;
2961       else
2962         return bfd_mach_x64_32;
2963     }
2964   else if (!strcmp (default_arch, "i386")
2965            || !strcmp (default_arch, "iamcu"))
2966     {
2967       if (cpu_arch_isa == PROCESSOR_IAMCU)
2968         {
2969           if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
2970             as_fatal (_("Intel MCU is 32bit ELF only"));
2971           return bfd_mach_i386_iamcu;
2972         }
2973       else
2974         return bfd_mach_i386_i386;
2975     }
2976   else
2977     as_fatal (_("unknown architecture"));
2978 }
2979 \f
2980 #include "opcodes/i386-tbl.h"
2981
2982 void
2983 md_begin (void)
2984 {
2985   /* Support pseudo prefixes like {disp32}.  */
2986   lex_type ['{'] = LEX_BEGIN_NAME;
2987
2988   /* Initialize op_hash hash table.  */
2989   op_hash = str_htab_create ();
2990
2991   {
2992     const insn_template *const *sets = i386_op_sets;
2993     const insn_template *const *end = sets + ARRAY_SIZE (i386_op_sets) - 1;
2994
2995     /* Type checks to compensate for the conversion through void * which
2996        occurs during hash table insertion / lookup.  */
2997     (void) sizeof (sets == &current_templates->start);
2998     (void) sizeof (end == &current_templates->end);
2999     for (; sets < end; ++sets)
3000       if (str_hash_insert (op_hash, insn_name (*sets), sets, 0))
3001         as_fatal (_("duplicate %s"), insn_name (*sets));
3002   }
3003
3004   /* Initialize reg_hash hash table.  */
3005   reg_hash = str_htab_create ();
3006   {
3007     const reg_entry *regtab;
3008     unsigned int regtab_size = i386_regtab_size;
3009
3010     for (regtab = i386_regtab; regtab_size--; regtab++)
3011       {
3012         switch (regtab->reg_type.bitfield.class)
3013           {
3014           case Reg:
3015             if (regtab->reg_type.bitfield.dword)
3016               {
3017                 if (regtab->reg_type.bitfield.instance == Accum)
3018                   reg_eax = regtab;
3019               }
3020             else if (regtab->reg_type.bitfield.tbyte)
3021               {
3022                 /* There's no point inserting st(<N>) in the hash table, as
3023                    parentheses aren't included in register_chars[] anyway.  */
3024                 if (regtab->reg_type.bitfield.instance != Accum)
3025                   continue;
3026                 reg_st0 = regtab;
3027               }
3028             break;
3029
3030           case SReg:
3031             switch (regtab->reg_num)
3032               {
3033               case 0: reg_es = regtab; break;
3034               case 2: reg_ss = regtab; break;
3035               case 3: reg_ds = regtab; break;
3036               }
3037             break;
3038
3039           case RegMask:
3040             if (!regtab->reg_num)
3041               reg_k0 = regtab;
3042             break;
3043           }
3044
3045         if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3046           as_fatal (_("duplicate %s"), regtab->reg_name);
3047       }
3048   }
3049
3050   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3051   {
3052     int c;
3053     char *p;
3054
3055     for (c = 0; c < 256; c++)
3056       {
3057         if (ISDIGIT (c) || ISLOWER (c))
3058           {
3059             mnemonic_chars[c] = c;
3060             register_chars[c] = c;
3061             operand_chars[c] = c;
3062           }
3063         else if (ISUPPER (c))
3064           {
3065             mnemonic_chars[c] = TOLOWER (c);
3066             register_chars[c] = mnemonic_chars[c];
3067             operand_chars[c] = c;
3068           }
3069         else if (c == '{' || c == '}')
3070           {
3071             mnemonic_chars[c] = c;
3072             operand_chars[c] = c;
3073           }
3074 #ifdef SVR4_COMMENT_CHARS
3075         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3076           operand_chars[c] = c;
3077 #endif
3078
3079         if (ISALPHA (c) || ISDIGIT (c))
3080           identifier_chars[c] = c;
3081         else if (c >= 128)
3082           {
3083             identifier_chars[c] = c;
3084             operand_chars[c] = c;
3085           }
3086       }
3087
3088 #ifdef LEX_AT
3089     identifier_chars['@'] = '@';
3090 #endif
3091 #ifdef LEX_QM
3092     identifier_chars['?'] = '?';
3093     operand_chars['?'] = '?';
3094 #endif
3095     mnemonic_chars['_'] = '_';
3096     mnemonic_chars['-'] = '-';
3097     mnemonic_chars['.'] = '.';
3098     identifier_chars['_'] = '_';
3099     identifier_chars['.'] = '.';
3100
3101     for (p = operand_special_chars; *p != '\0'; p++)
3102       operand_chars[(unsigned char) *p] = *p;
3103   }
3104
3105   if (flag_code == CODE_64BIT)
3106     {
3107 #if defined (OBJ_COFF) && defined (TE_PE)
3108       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3109                                   ? 32 : 16);
3110 #else
3111       x86_dwarf2_return_column = 16;
3112 #endif
3113       x86_cie_data_alignment = -8;
3114 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3115       x86_sframe_cfa_sp_reg = 7;
3116       x86_sframe_cfa_fp_reg = 6;
3117 #endif
3118     }
3119   else
3120     {
3121       x86_dwarf2_return_column = 8;
3122       x86_cie_data_alignment = -4;
3123     }
3124
3125   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3126      can be turned into BRANCH_PREFIX frag.  */
3127   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3128     abort ();
3129 }
3130
3131 void
3132 i386_print_statistics (FILE *file)
3133 {
3134   htab_print_statistics (file, "i386 opcode", op_hash);
3135   htab_print_statistics (file, "i386 register", reg_hash);
3136 }
3137
3138 void
3139 i386_md_end (void)
3140 {
3141   htab_delete (op_hash);
3142   htab_delete (reg_hash);
3143 }
3144 \f
3145 #ifdef DEBUG386
3146
3147 /* Debugging routines for md_assemble.  */
3148 static void pte (insn_template *);
3149 static void pt (i386_operand_type);
3150 static void pe (expressionS *);
3151 static void ps (symbolS *);
3152
3153 static void
3154 pi (const char *line, i386_insn *x)
3155 {
3156   unsigned int j;
3157
3158   fprintf (stdout, "%s: template ", line);
3159   pte (&x->tm);
3160   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3161            x->base_reg ? x->base_reg->reg_name : "none",
3162            x->index_reg ? x->index_reg->reg_name : "none",
3163            x->log2_scale_factor);
3164   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3165            x->rm.mode, x->rm.reg, x->rm.regmem);
3166   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3167            x->sib.base, x->sib.index, x->sib.scale);
3168   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3169            (x->rex & REX_W) != 0,
3170            (x->rex & REX_R) != 0,
3171            (x->rex & REX_X) != 0,
3172            (x->rex & REX_B) != 0);
3173   for (j = 0; j < x->operands; j++)
3174     {
3175       fprintf (stdout, "    #%d:  ", j + 1);
3176       pt (x->types[j]);
3177       fprintf (stdout, "\n");
3178       if (x->types[j].bitfield.class == Reg
3179           || x->types[j].bitfield.class == RegMMX
3180           || x->types[j].bitfield.class == RegSIMD
3181           || x->types[j].bitfield.class == RegMask
3182           || x->types[j].bitfield.class == SReg
3183           || x->types[j].bitfield.class == RegCR
3184           || x->types[j].bitfield.class == RegDR
3185           || x->types[j].bitfield.class == RegTR
3186           || x->types[j].bitfield.class == RegBND)
3187         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3188       if (operand_type_check (x->types[j], imm))
3189         pe (x->op[j].imms);
3190       if (operand_type_check (x->types[j], disp))
3191         pe (x->op[j].disps);
3192     }
3193 }
3194
3195 static void
3196 pte (insn_template *t)
3197 {
3198   static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
3199   static const char *const opc_spc[] = {
3200     NULL, "0f", "0f38", "0f3a", NULL, "evexmap5", "evexmap6", NULL,
3201     "XOP08", "XOP09", "XOP0A",
3202   };
3203   unsigned int j;
3204
3205   fprintf (stdout, " %d operands ", t->operands);
3206   if (opc_pfx[t->opcode_modifier.opcodeprefix])
3207     fprintf (stdout, "pfx %x ", opc_pfx[t->opcode_modifier.opcodeprefix]);
3208   if (opc_spc[t->opcode_space])
3209     fprintf (stdout, "space %s ", opc_spc[t->opcode_space]);
3210   fprintf (stdout, "opcode %x ", t->base_opcode);
3211   if (t->extension_opcode != None)
3212     fprintf (stdout, "ext %x ", t->extension_opcode);
3213   if (t->opcode_modifier.d)
3214     fprintf (stdout, "D");
3215   if (t->opcode_modifier.w)
3216     fprintf (stdout, "W");
3217   fprintf (stdout, "\n");
3218   for (j = 0; j < t->operands; j++)
3219     {
3220       fprintf (stdout, "    #%d type ", j + 1);
3221       pt (t->operand_types[j]);
3222       fprintf (stdout, "\n");
3223     }
3224 }
3225
3226 static void
3227 pe (expressionS *e)
3228 {
3229   fprintf (stdout, "    operation     %d\n", e->X_op);
3230   fprintf (stdout, "    add_number    %" PRId64 " (%" PRIx64 ")\n",
3231            (int64_t) e->X_add_number, (uint64_t) (valueT) e->X_add_number);
3232   if (e->X_add_symbol)
3233     {
3234       fprintf (stdout, "    add_symbol    ");
3235       ps (e->X_add_symbol);
3236       fprintf (stdout, "\n");
3237     }
3238   if (e->X_op_symbol)
3239     {
3240       fprintf (stdout, "    op_symbol    ");
3241       ps (e->X_op_symbol);
3242       fprintf (stdout, "\n");
3243     }
3244 }
3245
3246 static void
3247 ps (symbolS *s)
3248 {
3249   fprintf (stdout, "%s type %s%s",
3250            S_GET_NAME (s),
3251            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3252            segment_name (S_GET_SEGMENT (s)));
3253 }
3254
3255 static struct type_name
3256   {
3257     i386_operand_type mask;
3258     const char *name;
3259   }
3260 const type_names[] =
3261 {
3262   { { .bitfield = { .class = Reg, .byte = 1 } }, "r8" },
3263   { { .bitfield = { .class = Reg, .word = 1 } }, "r16" },
3264   { { .bitfield = { .class = Reg, .dword = 1 } }, "r32" },
3265   { { .bitfield = { .class = Reg, .qword = 1 } }, "r64" },
3266   { { .bitfield = { .instance = Accum, .byte = 1 } }, "acc8" },
3267   { { .bitfield = { .instance = Accum, .word = 1 } }, "acc16" },
3268   { { .bitfield = { .instance = Accum, .dword = 1 } }, "acc32" },
3269   { { .bitfield = { .instance = Accum, .qword = 1 } }, "acc64" },
3270   { { .bitfield = { .imm8 = 1 } }, "i8" },
3271   { { .bitfield = { .imm8s = 1 } }, "i8s" },
3272   { { .bitfield = { .imm16 = 1 } }, "i16" },
3273   { { .bitfield = { .imm32 = 1 } }, "i32" },
3274   { { .bitfield = { .imm32s = 1 } }, "i32s" },
3275   { { .bitfield = { .imm64 = 1 } }, "i64" },
3276   { { .bitfield = { .imm1 = 1 } }, "i1" },
3277   { { .bitfield = { .baseindex = 1 } }, "BaseIndex" },
3278   { { .bitfield = { .disp8 = 1 } }, "d8" },
3279   { { .bitfield = { .disp16 = 1 } }, "d16" },
3280   { { .bitfield = { .disp32 = 1 } }, "d32" },
3281   { { .bitfield = { .disp64 = 1 } }, "d64" },
3282   { { .bitfield = { .instance = RegD, .word = 1 } }, "InOutPortReg" },
3283   { { .bitfield = { .instance = RegC, .byte = 1 } }, "ShiftCount" },
3284   { { .bitfield = { .class = RegCR } }, "control reg" },
3285   { { .bitfield = { .class = RegTR } }, "test reg" },
3286   { { .bitfield = { .class = RegDR } }, "debug reg" },
3287   { { .bitfield = { .class = Reg, .tbyte = 1 } }, "FReg" },
3288   { { .bitfield = { .instance = Accum, .tbyte = 1 } }, "FAcc" },
3289   { { .bitfield = { .class = SReg } }, "SReg" },
3290   { { .bitfield = { .class = RegMMX } }, "rMMX" },
3291   { { .bitfield = { .class = RegSIMD, .xmmword = 1 } }, "rXMM" },
3292   { { .bitfield = { .class = RegSIMD, .ymmword = 1 } }, "rYMM" },
3293   { { .bitfield = { .class = RegSIMD, .zmmword = 1 } }, "rZMM" },
3294   { { .bitfield = { .class = RegSIMD, .tmmword = 1 } }, "rTMM" },
3295   { { .bitfield = { .class = RegMask } }, "Mask reg" },
3296 };
3297
3298 static void
3299 pt (i386_operand_type t)
3300 {
3301   unsigned int j;
3302   i386_operand_type a;
3303
3304   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3305     {
3306       a = operand_type_and (t, type_names[j].mask);
3307       if (operand_type_equal (&a, &type_names[j].mask))
3308         fprintf (stdout, "%s, ",  type_names[j].name);
3309     }
3310   fflush (stdout);
3311 }
3312
3313 #endif /* DEBUG386 */
3314 \f
3315 static bfd_reloc_code_real_type
3316 reloc (unsigned int size,
3317        int pcrel,
3318        int sign,
3319        bfd_reloc_code_real_type other)
3320 {
3321   if (other != NO_RELOC)
3322     {
3323       reloc_howto_type *rel;
3324
3325       if (size == 8)
3326         switch (other)
3327           {
3328           case BFD_RELOC_X86_64_GOT32:
3329             return BFD_RELOC_X86_64_GOT64;
3330             break;
3331           case BFD_RELOC_X86_64_GOTPLT64:
3332             return BFD_RELOC_X86_64_GOTPLT64;
3333             break;
3334           case BFD_RELOC_X86_64_PLTOFF64:
3335             return BFD_RELOC_X86_64_PLTOFF64;
3336             break;
3337           case BFD_RELOC_X86_64_GOTPC32:
3338             other = BFD_RELOC_X86_64_GOTPC64;
3339             break;
3340           case BFD_RELOC_X86_64_GOTPCREL:
3341             other = BFD_RELOC_X86_64_GOTPCREL64;
3342             break;
3343           case BFD_RELOC_X86_64_TPOFF32:
3344             other = BFD_RELOC_X86_64_TPOFF64;
3345             break;
3346           case BFD_RELOC_X86_64_DTPOFF32:
3347             other = BFD_RELOC_X86_64_DTPOFF64;
3348             break;
3349           default:
3350             break;
3351           }
3352
3353 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3354       if (other == BFD_RELOC_SIZE32)
3355         {
3356           if (size == 8)
3357             other = BFD_RELOC_SIZE64;
3358           if (pcrel)
3359             {
3360               as_bad (_("there are no pc-relative size relocations"));
3361               return NO_RELOC;
3362             }
3363         }
3364 #endif
3365
3366       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3367       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3368         sign = -1;
3369
3370       rel = bfd_reloc_type_lookup (stdoutput, other);
3371       if (!rel)
3372         as_bad (_("unknown relocation (%u)"), other);
3373       else if (size != bfd_get_reloc_size (rel))
3374         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3375                 bfd_get_reloc_size (rel),
3376                 size);
3377       else if (pcrel && !rel->pc_relative)
3378         as_bad (_("non-pc-relative relocation for pc-relative field"));
3379       else if ((rel->complain_on_overflow == complain_overflow_signed
3380                 && !sign)
3381                || (rel->complain_on_overflow == complain_overflow_unsigned
3382                    && sign > 0))
3383         as_bad (_("relocated field and relocation type differ in signedness"));
3384       else
3385         return other;
3386       return NO_RELOC;
3387     }
3388
3389   if (pcrel)
3390     {
3391       if (!sign)
3392         as_bad (_("there are no unsigned pc-relative relocations"));
3393       switch (size)
3394         {
3395         case 1: return BFD_RELOC_8_PCREL;
3396         case 2: return BFD_RELOC_16_PCREL;
3397         case 4: return BFD_RELOC_32_PCREL;
3398         case 8: return BFD_RELOC_64_PCREL;
3399         }
3400       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3401     }
3402   else
3403     {
3404       if (sign > 0)
3405         switch (size)
3406           {
3407           case 4: return BFD_RELOC_X86_64_32S;
3408           }
3409       else
3410         switch (size)
3411           {
3412           case 1: return BFD_RELOC_8;
3413           case 2: return BFD_RELOC_16;
3414           case 4: return BFD_RELOC_32;
3415           case 8: return BFD_RELOC_64;
3416           }
3417       as_bad (_("cannot do %s %u byte relocation"),
3418               sign > 0 ? "signed" : "unsigned", size);
3419     }
3420
3421   return NO_RELOC;
3422 }
3423
3424 /* Here we decide which fixups can be adjusted to make them relative to
3425    the beginning of the section instead of the symbol.  Basically we need
3426    to make sure that the dynamic relocations are done correctly, so in
3427    some cases we force the original symbol to be used.  */
3428
3429 int
3430 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3431 {
3432 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3433   if (!IS_ELF)
3434     return 1;
3435
3436   /* Don't adjust pc-relative references to merge sections in 64-bit
3437      mode.  */
3438   if (use_rela_relocations
3439       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3440       && fixP->fx_pcrel)
3441     return 0;
3442
3443   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3444      and changed later by validate_fix.  */
3445   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3446       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3447     return 0;
3448
3449   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3450      for size relocations.  */
3451   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3452       || fixP->fx_r_type == BFD_RELOC_SIZE64
3453       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3454       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3455       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3456       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3457       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3458       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3459       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3460       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3461       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3462       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3463       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3464       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3465       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3466       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3467       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3468       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3469       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3470       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3471       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3472       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3473       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3474       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3475       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3476       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3477       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3478       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3479       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3480       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3481       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3482     return 0;
3483 #endif
3484   return 1;
3485 }
3486
3487 static INLINE bool
3488 want_disp32 (const insn_template *t)
3489 {
3490   return flag_code != CODE_64BIT
3491          || i.prefix[ADDR_PREFIX]
3492          || (t->mnem_off == MN_lea
3493              && (!i.types[1].bitfield.qword
3494                 || t->opcode_modifier.size == SIZE32));
3495 }
3496
3497 static int
3498 intel_float_operand (const char *mnemonic)
3499 {
3500   /* Note that the value returned is meaningful only for opcodes with (memory)
3501      operands, hence the code here is free to improperly handle opcodes that
3502      have no operands (for better performance and smaller code). */
3503
3504   if (mnemonic[0] != 'f')
3505     return 0; /* non-math */
3506
3507   switch (mnemonic[1])
3508     {
3509     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3510        the fs segment override prefix not currently handled because no
3511        call path can make opcodes without operands get here */
3512     case 'i':
3513       return 2 /* integer op */;
3514     case 'l':
3515       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3516         return 3; /* fldcw/fldenv */
3517       break;
3518     case 'n':
3519       if (mnemonic[2] != 'o' /* fnop */)
3520         return 3; /* non-waiting control op */
3521       break;
3522     case 'r':
3523       if (mnemonic[2] == 's')
3524         return 3; /* frstor/frstpm */
3525       break;
3526     case 's':
3527       if (mnemonic[2] == 'a')
3528         return 3; /* fsave */
3529       if (mnemonic[2] == 't')
3530         {
3531           switch (mnemonic[3])
3532             {
3533             case 'c': /* fstcw */
3534             case 'd': /* fstdw */
3535             case 'e': /* fstenv */
3536             case 's': /* fsts[gw] */
3537               return 3;
3538             }
3539         }
3540       break;
3541     case 'x':
3542       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3543         return 0; /* fxsave/fxrstor are not really math ops */
3544       break;
3545     }
3546
3547   return 1;
3548 }
3549
3550 static INLINE void
3551 install_template (const insn_template *t)
3552 {
3553   unsigned int l;
3554
3555   i.tm = *t;
3556
3557   /* Note that for pseudo prefixes this produces a length of 1. But for them
3558      the length isn't interesting at all.  */
3559   for (l = 1; l < 4; ++l)
3560     if (!(t->base_opcode >> (8 * l)))
3561       break;
3562
3563   i.opcode_length = l;
3564 }
3565
3566 /* Build the VEX prefix.  */
3567
3568 static void
3569 build_vex_prefix (const insn_template *t)
3570 {
3571   unsigned int register_specifier;
3572   unsigned int vector_length;
3573   unsigned int w;
3574
3575   /* Check register specifier.  */
3576   if (i.vex.register_specifier)
3577     {
3578       register_specifier =
3579         ~register_number (i.vex.register_specifier) & 0xf;
3580       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3581     }
3582   else
3583     register_specifier = 0xf;
3584
3585   /* Use 2-byte VEX prefix by swapping destination and source operand
3586      if there are more than 1 register operand.  */
3587   if (i.reg_operands > 1
3588       && i.vec_encoding != vex_encoding_vex3
3589       && i.dir_encoding == dir_encoding_default
3590       && i.operands == i.reg_operands
3591       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3592       && i.tm.opcode_space == SPACE_0F
3593       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3594       && i.rex == REX_B)
3595     {
3596       unsigned int xchg;
3597
3598       swap_2_operands (0, i.operands - 1);
3599
3600       gas_assert (i.rm.mode == 3);
3601
3602       i.rex = REX_R;
3603       xchg = i.rm.regmem;
3604       i.rm.regmem = i.rm.reg;
3605       i.rm.reg = xchg;
3606
3607       if (i.tm.opcode_modifier.d)
3608         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3609                             ? Opcode_ExtD : Opcode_SIMD_IntD;
3610       else /* Use the next insn.  */
3611         install_template (&t[1]);
3612     }
3613
3614   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3615      are no memory operands and at least 3 register ones.  */
3616   if (i.reg_operands >= 3
3617       && i.vec_encoding != vex_encoding_vex3
3618       && i.reg_operands == i.operands - i.imm_operands
3619       && i.tm.opcode_modifier.vex
3620       && i.tm.opcode_modifier.commutative
3621       && (i.tm.opcode_modifier.sse2avx
3622           || (optimize > 1 && !i.no_optimize))
3623       && i.rex == REX_B
3624       && i.vex.register_specifier
3625       && !(i.vex.register_specifier->reg_flags & RegRex))
3626     {
3627       unsigned int xchg = i.operands - i.reg_operands;
3628
3629       gas_assert (i.tm.opcode_space == SPACE_0F);
3630       gas_assert (!i.tm.opcode_modifier.sae);
3631       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3632                                       &i.types[i.operands - 3]));
3633       gas_assert (i.rm.mode == 3);
3634
3635       swap_2_operands (xchg, xchg + 1);
3636
3637       i.rex = 0;
3638       xchg = i.rm.regmem | 8;
3639       i.rm.regmem = ~register_specifier & 0xf;
3640       gas_assert (!(i.rm.regmem & 8));
3641       i.vex.register_specifier += xchg - i.rm.regmem;
3642       register_specifier = ~xchg & 0xf;
3643     }
3644
3645   if (i.tm.opcode_modifier.vex == VEXScalar)
3646     vector_length = avxscalar;
3647   else if (i.tm.opcode_modifier.vex == VEX256)
3648     vector_length = 1;
3649   else
3650     {
3651       unsigned int op;
3652
3653       /* Determine vector length from the last multi-length vector
3654          operand.  */
3655       vector_length = 0;
3656       for (op = t->operands; op--;)
3657         if (t->operand_types[op].bitfield.xmmword
3658             && t->operand_types[op].bitfield.ymmword
3659             && i.types[op].bitfield.ymmword)
3660           {
3661             vector_length = 1;
3662             break;
3663           }
3664     }
3665
3666   /* Check the REX.W bit and VEXW.  */
3667   if (i.tm.opcode_modifier.vexw == VEXWIG)
3668     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3669   else if (i.tm.opcode_modifier.vexw)
3670     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3671   else
3672     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3673
3674   /* Use 2-byte VEX prefix if possible.  */
3675   if (w == 0
3676       && i.vec_encoding != vex_encoding_vex3
3677       && i.tm.opcode_space == SPACE_0F
3678       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3679     {
3680       /* 2-byte VEX prefix.  */
3681       unsigned int r;
3682
3683       i.vex.length = 2;
3684       i.vex.bytes[0] = 0xc5;
3685
3686       /* Check the REX.R bit.  */
3687       r = (i.rex & REX_R) ? 0 : 1;
3688       i.vex.bytes[1] = (r << 7
3689                         | register_specifier << 3
3690                         | vector_length << 2
3691                         | i.tm.opcode_modifier.opcodeprefix);
3692     }
3693   else
3694     {
3695       /* 3-byte VEX prefix.  */
3696       i.vex.length = 3;
3697
3698       switch (i.tm.opcode_space)
3699         {
3700         case SPACE_0F:
3701         case SPACE_0F38:
3702         case SPACE_0F3A:
3703           i.vex.bytes[0] = 0xc4;
3704           break;
3705         case SPACE_XOP08:
3706         case SPACE_XOP09:
3707         case SPACE_XOP0A:
3708           i.vex.bytes[0] = 0x8f;
3709           break;
3710         default:
3711           abort ();
3712         }
3713
3714       /* The high 3 bits of the second VEX byte are 1's compliment
3715          of RXB bits from REX.  */
3716       i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_space;
3717
3718       i.vex.bytes[2] = (w << 7
3719                         | register_specifier << 3
3720                         | vector_length << 2
3721                         | i.tm.opcode_modifier.opcodeprefix);
3722     }
3723 }
3724
3725 static INLINE bool
3726 is_evex_encoding (const insn_template *t)
3727 {
3728   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3729          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3730          || t->opcode_modifier.sae;
3731 }
3732
3733 static INLINE bool
3734 is_any_vex_encoding (const insn_template *t)
3735 {
3736   return t->opcode_modifier.vex || is_evex_encoding (t);
3737 }
3738
3739 static unsigned int
3740 get_broadcast_bytes (const insn_template *t, bool diag)
3741 {
3742   unsigned int op, bytes;
3743   const i386_operand_type *types;
3744
3745   if (i.broadcast.type)
3746     return (1 << (t->opcode_modifier.broadcast - 1)) * i.broadcast.type;
3747
3748   gas_assert (intel_syntax);
3749
3750   for (op = 0; op < t->operands; ++op)
3751     if (t->operand_types[op].bitfield.baseindex)
3752       break;
3753
3754   gas_assert (op < t->operands);
3755
3756   if (t->opcode_modifier.evex
3757       && t->opcode_modifier.evex != EVEXDYN)
3758     switch (i.broadcast.bytes)
3759       {
3760       case 1:
3761         if (t->operand_types[op].bitfield.word)
3762           return 2;
3763       /* Fall through.  */
3764       case 2:
3765         if (t->operand_types[op].bitfield.dword)
3766           return 4;
3767       /* Fall through.  */
3768       case 4:
3769         if (t->operand_types[op].bitfield.qword)
3770           return 8;
3771       /* Fall through.  */
3772       case 8:
3773         if (t->operand_types[op].bitfield.xmmword)
3774           return 16;
3775         if (t->operand_types[op].bitfield.ymmword)
3776           return 32;
3777         if (t->operand_types[op].bitfield.zmmword)
3778           return 64;
3779       /* Fall through.  */
3780       default:
3781         abort ();
3782       }
3783
3784   gas_assert (op + 1 < t->operands);
3785
3786   if (t->operand_types[op + 1].bitfield.xmmword
3787       + t->operand_types[op + 1].bitfield.ymmword
3788       + t->operand_types[op + 1].bitfield.zmmword > 1)
3789     {
3790       types = &i.types[op + 1];
3791       diag = false;
3792     }
3793   else /* Ambiguous - guess with a preference to non-AVX512VL forms.  */
3794     types = &t->operand_types[op];
3795
3796   if (types->bitfield.zmmword)
3797     bytes = 64;
3798   else if (types->bitfield.ymmword)
3799     bytes = 32;
3800   else
3801     bytes = 16;
3802
3803   if (diag)
3804     as_warn (_("ambiguous broadcast for `%s', using %u-bit form"),
3805              insn_name (t), bytes * 8);
3806
3807   return bytes;
3808 }
3809
3810 /* Build the EVEX prefix.  */
3811
3812 static void
3813 build_evex_prefix (void)
3814 {
3815   unsigned int register_specifier, w;
3816   rex_byte vrex_used = 0;
3817
3818   /* Check register specifier.  */
3819   if (i.vex.register_specifier)
3820     {
3821       gas_assert ((i.vrex & REX_X) == 0);
3822
3823       register_specifier = i.vex.register_specifier->reg_num;
3824       if ((i.vex.register_specifier->reg_flags & RegRex))
3825         register_specifier += 8;
3826       /* The upper 16 registers are encoded in the fourth byte of the
3827          EVEX prefix.  */
3828       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3829         i.vex.bytes[3] = 0x8;
3830       register_specifier = ~register_specifier & 0xf;
3831     }
3832   else
3833     {
3834       register_specifier = 0xf;
3835
3836       /* Encode upper 16 vector index register in the fourth byte of
3837          the EVEX prefix.  */
3838       if (!(i.vrex & REX_X))
3839         i.vex.bytes[3] = 0x8;
3840       else
3841         vrex_used |= REX_X;
3842     }
3843
3844   /* 4 byte EVEX prefix.  */
3845   i.vex.length = 4;
3846   i.vex.bytes[0] = 0x62;
3847
3848   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3849      bits from REX.  */
3850   gas_assert (i.tm.opcode_space >= SPACE_0F);
3851   gas_assert (i.tm.opcode_space <= SPACE_EVEXMAP6);
3852   i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_space;
3853
3854   /* The fifth bit of the second EVEX byte is 1's compliment of the
3855      REX_R bit in VREX.  */
3856   if (!(i.vrex & REX_R))
3857     i.vex.bytes[1] |= 0x10;
3858   else
3859     vrex_used |= REX_R;
3860
3861   if ((i.reg_operands + i.imm_operands) == i.operands)
3862     {
3863       /* When all operands are registers, the REX_X bit in REX is not
3864          used.  We reuse it to encode the upper 16 registers, which is
3865          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3866          as 1's compliment.  */
3867       if ((i.vrex & REX_B))
3868         {
3869           vrex_used |= REX_B;
3870           i.vex.bytes[1] &= ~0x40;
3871         }
3872     }
3873
3874   /* EVEX instructions shouldn't need the REX prefix.  */
3875   i.vrex &= ~vrex_used;
3876   gas_assert (i.vrex == 0);
3877
3878   /* Check the REX.W bit and VEXW.  */
3879   if (i.tm.opcode_modifier.vexw == VEXWIG)
3880     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
3881   else if (i.tm.opcode_modifier.vexw)
3882     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3883   else
3884     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
3885
3886   /* The third byte of the EVEX prefix.  */
3887   i.vex.bytes[2] = ((w << 7)
3888                     | (register_specifier << 3)
3889                     | 4 /* Encode the U bit.  */
3890                     | i.tm.opcode_modifier.opcodeprefix);
3891
3892   /* The fourth byte of the EVEX prefix.  */
3893   /* The zeroing-masking bit.  */
3894   if (i.mask.reg && i.mask.zeroing)
3895     i.vex.bytes[3] |= 0x80;
3896
3897   /* Don't always set the broadcast bit if there is no RC.  */
3898   if (i.rounding.type == rc_none)
3899     {
3900       /* Encode the vector length.  */
3901       unsigned int vec_length;
3902
3903       if (!i.tm.opcode_modifier.evex
3904           || i.tm.opcode_modifier.evex == EVEXDYN)
3905         {
3906           unsigned int op;
3907
3908           /* Determine vector length from the last multi-length vector
3909              operand.  */
3910           for (op = i.operands; op--;)
3911             if (i.tm.operand_types[op].bitfield.xmmword
3912                 + i.tm.operand_types[op].bitfield.ymmword
3913                 + i.tm.operand_types[op].bitfield.zmmword > 1)
3914               {
3915                 if (i.types[op].bitfield.zmmword)
3916                   {
3917                     i.tm.opcode_modifier.evex = EVEX512;
3918                     break;
3919                   }
3920                 else if (i.types[op].bitfield.ymmword)
3921                   {
3922                     i.tm.opcode_modifier.evex = EVEX256;
3923                     break;
3924                   }
3925                 else if (i.types[op].bitfield.xmmword)
3926                   {
3927                     i.tm.opcode_modifier.evex = EVEX128;
3928                     break;
3929                   }
3930                 else if ((i.broadcast.type || i.broadcast.bytes)
3931                          && op == i.broadcast.operand)
3932                   {
3933                     switch (get_broadcast_bytes (&i.tm, true))
3934                       {
3935                         case 64:
3936                           i.tm.opcode_modifier.evex = EVEX512;
3937                           break;
3938                         case 32:
3939                           i.tm.opcode_modifier.evex = EVEX256;
3940                           break;
3941                         case 16:
3942                           i.tm.opcode_modifier.evex = EVEX128;
3943                           break;
3944                         default:
3945                           abort ();
3946                       }
3947                     break;
3948                   }
3949               }
3950
3951           if (op >= MAX_OPERANDS)
3952             abort ();
3953         }
3954
3955       switch (i.tm.opcode_modifier.evex)
3956         {
3957         case EVEXLIG: /* LL' is ignored */
3958           vec_length = evexlig << 5;
3959           break;
3960         case EVEX128:
3961           vec_length = 0 << 5;
3962           break;
3963         case EVEX256:
3964           vec_length = 1 << 5;
3965           break;
3966         case EVEX512:
3967           vec_length = 2 << 5;
3968           break;
3969         default:
3970           abort ();
3971           break;
3972         }
3973       i.vex.bytes[3] |= vec_length;
3974       /* Encode the broadcast bit.  */
3975       if (i.broadcast.type || i.broadcast.bytes)
3976         i.vex.bytes[3] |= 0x10;
3977     }
3978   else if (i.rounding.type != saeonly)
3979     i.vex.bytes[3] |= 0x10 | (i.rounding.type << 5);
3980   else
3981     i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
3982
3983   if (i.mask.reg)
3984     i.vex.bytes[3] |= i.mask.reg->reg_num;
3985 }
3986
3987 static void
3988 process_immext (void)
3989 {
3990   expressionS *exp;
3991
3992   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
3993      which is coded in the same place as an 8-bit immediate field
3994      would be.  Here we fake an 8-bit immediate operand from the
3995      opcode suffix stored in tm.extension_opcode.
3996
3997      AVX instructions also use this encoding, for some of
3998      3 argument instructions.  */
3999
4000   gas_assert (i.imm_operands <= 1
4001               && (i.operands <= 2
4002                   || (is_any_vex_encoding (&i.tm)
4003                       && i.operands <= 4)));
4004
4005   exp = &im_expressions[i.imm_operands++];
4006   i.op[i.operands].imms = exp;
4007   i.types[i.operands].bitfield.imm8 = 1;
4008   i.operands++;
4009   exp->X_op = O_constant;
4010   exp->X_add_number = i.tm.extension_opcode;
4011   i.tm.extension_opcode = None;
4012 }
4013
4014
4015 static int
4016 check_hle (void)
4017 {
4018   switch (i.tm.opcode_modifier.prefixok)
4019     {
4020     default:
4021       abort ();
4022     case PrefixLock:
4023     case PrefixNone:
4024     case PrefixNoTrack:
4025     case PrefixRep:
4026       as_bad (_("invalid instruction `%s' after `%s'"),
4027               insn_name (&i.tm), i.hle_prefix);
4028       return 0;
4029     case PrefixHLELock:
4030       if (i.prefix[LOCK_PREFIX])
4031         return 1;
4032       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4033       return 0;
4034     case PrefixHLEAny:
4035       return 1;
4036     case PrefixHLERelease:
4037       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4038         {
4039           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4040                   insn_name (&i.tm));
4041           return 0;
4042         }
4043       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4044         {
4045           as_bad (_("memory destination needed for instruction `%s'"
4046                     " after `xrelease'"), insn_name (&i.tm));
4047           return 0;
4048         }
4049       return 1;
4050     }
4051 }
4052
4053 /* Encode aligned vector move as unaligned vector move.  */
4054
4055 static void
4056 encode_with_unaligned_vector_move (void)
4057 {
4058   switch (i.tm.base_opcode)
4059     {
4060     case 0x28:  /* Load instructions.  */
4061     case 0x29:  /* Store instructions.  */
4062       /* movaps/movapd/vmovaps/vmovapd.  */
4063       if (i.tm.opcode_space == SPACE_0F
4064           && i.tm.opcode_modifier.opcodeprefix <= PREFIX_0X66)
4065         i.tm.base_opcode = 0x10 | (i.tm.base_opcode & 1);
4066       break;
4067     case 0x6f:  /* Load instructions.  */
4068     case 0x7f:  /* Store instructions.  */
4069       /* movdqa/vmovdqa/vmovdqa64/vmovdqa32. */
4070       if (i.tm.opcode_space == SPACE_0F
4071           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66)
4072         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4073       break;
4074     default:
4075       break;
4076     }
4077 }
4078
4079 /* Try the shortest encoding by shortening operand size.  */
4080
4081 static void
4082 optimize_encoding (void)
4083 {
4084   unsigned int j;
4085
4086   if (i.tm.mnem_off == MN_lea)
4087     {
4088       /* Optimize: -O:
4089            lea symbol, %rN    -> mov $symbol, %rN
4090            lea (%rM), %rN     -> mov %rM, %rN
4091            lea (,%rM,1), %rN  -> mov %rM, %rN
4092
4093            and in 32-bit mode for 16-bit addressing
4094
4095            lea (%rM), %rN     -> movzx %rM, %rN
4096
4097            and in 64-bit mode zap 32-bit addressing in favor of using a
4098            32-bit (or less) destination.
4099        */
4100       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4101         {
4102           if (!i.op[1].regs->reg_type.bitfield.word)
4103             i.tm.opcode_modifier.size = SIZE32;
4104           i.prefix[ADDR_PREFIX] = 0;
4105         }
4106
4107       if (!i.index_reg && !i.base_reg)
4108         {
4109           /* Handle:
4110                lea symbol, %rN    -> mov $symbol, %rN
4111            */
4112           if (flag_code == CODE_64BIT)
4113             {
4114               /* Don't transform a relocation to a 16-bit one.  */
4115               if (i.op[0].disps
4116                   && i.op[0].disps->X_op != O_constant
4117                   && i.op[1].regs->reg_type.bitfield.word)
4118                 return;
4119
4120               if (!i.op[1].regs->reg_type.bitfield.qword
4121                   || i.tm.opcode_modifier.size == SIZE32)
4122                 {
4123                   i.tm.base_opcode = 0xb8;
4124                   i.tm.opcode_modifier.modrm = 0;
4125                   if (!i.op[1].regs->reg_type.bitfield.word)
4126                     i.types[0].bitfield.imm32 = 1;
4127                   else
4128                     {
4129                       i.tm.opcode_modifier.size = SIZE16;
4130                       i.types[0].bitfield.imm16 = 1;
4131                     }
4132                 }
4133               else
4134                 {
4135                   /* Subject to further optimization below.  */
4136                   i.tm.base_opcode = 0xc7;
4137                   i.tm.extension_opcode = 0;
4138                   i.types[0].bitfield.imm32s = 1;
4139                   i.types[0].bitfield.baseindex = 0;
4140                 }
4141             }
4142           /* Outside of 64-bit mode address and operand sizes have to match if
4143              a relocation is involved, as otherwise we wouldn't (currently) or
4144              even couldn't express the relocation correctly.  */
4145           else if (i.op[0].disps
4146                    && i.op[0].disps->X_op != O_constant
4147                    && ((!i.prefix[ADDR_PREFIX])
4148                        != (flag_code == CODE_32BIT
4149                            ? i.op[1].regs->reg_type.bitfield.dword
4150                            : i.op[1].regs->reg_type.bitfield.word)))
4151             return;
4152           /* In 16-bit mode converting LEA with 16-bit addressing and a 32-bit
4153              destination is going to grow encoding size.  */
4154           else if (flag_code == CODE_16BIT
4155                    && (optimize <= 1 || optimize_for_space)
4156                    && !i.prefix[ADDR_PREFIX]
4157                    && i.op[1].regs->reg_type.bitfield.dword)
4158             return;
4159           else
4160             {
4161               i.tm.base_opcode = 0xb8;
4162               i.tm.opcode_modifier.modrm = 0;
4163               if (i.op[1].regs->reg_type.bitfield.dword)
4164                 i.types[0].bitfield.imm32 = 1;
4165               else
4166                 i.types[0].bitfield.imm16 = 1;
4167
4168               if (i.op[0].disps
4169                   && i.op[0].disps->X_op == O_constant
4170                   && i.op[1].regs->reg_type.bitfield.dword
4171                   /* NB: Add () to !i.prefix[ADDR_PREFIX] to silence
4172                      GCC 5. */
4173                   && (!i.prefix[ADDR_PREFIX]) != (flag_code == CODE_32BIT))
4174                 i.op[0].disps->X_add_number &= 0xffff;
4175             }
4176
4177           i.tm.operand_types[0] = i.types[0];
4178           i.imm_operands = 1;
4179           if (!i.op[0].imms)
4180             {
4181               i.op[0].imms = &im_expressions[0];
4182               i.op[0].imms->X_op = O_absent;
4183             }
4184         }
4185       else if (i.op[0].disps
4186                   && (i.op[0].disps->X_op != O_constant
4187                       || i.op[0].disps->X_add_number))
4188         return;
4189       else
4190         {
4191           /* Handle:
4192                lea (%rM), %rN     -> mov %rM, %rN
4193                lea (,%rM,1), %rN  -> mov %rM, %rN
4194                lea (%rM), %rN     -> movzx %rM, %rN
4195            */
4196           const reg_entry *addr_reg;
4197
4198           if (!i.index_reg && i.base_reg->reg_num != RegIP)
4199             addr_reg = i.base_reg;
4200           else if (!i.base_reg
4201                    && i.index_reg->reg_num != RegIZ
4202                    && !i.log2_scale_factor)
4203             addr_reg = i.index_reg;
4204           else
4205             return;
4206
4207           if (addr_reg->reg_type.bitfield.word
4208               && i.op[1].regs->reg_type.bitfield.dword)
4209             {
4210               if (flag_code != CODE_32BIT)
4211                 return;
4212               i.tm.opcode_space = SPACE_0F;
4213               i.tm.base_opcode = 0xb7;
4214             }
4215           else
4216             i.tm.base_opcode = 0x8b;
4217
4218           if (addr_reg->reg_type.bitfield.dword
4219               && i.op[1].regs->reg_type.bitfield.qword)
4220             i.tm.opcode_modifier.size = SIZE32;
4221
4222           i.op[0].regs = addr_reg;
4223           i.reg_operands = 2;
4224         }
4225
4226       i.mem_operands = 0;
4227       i.disp_operands = 0;
4228       i.prefix[ADDR_PREFIX] = 0;
4229       i.prefix[SEG_PREFIX] = 0;
4230       i.seg[0] = NULL;
4231     }
4232
4233   if (optimize_for_space
4234       && i.tm.mnem_off == MN_test
4235       && i.reg_operands == 1
4236       && i.imm_operands == 1
4237       && !i.types[1].bitfield.byte
4238       && i.op[0].imms->X_op == O_constant
4239       && fits_in_imm7 (i.op[0].imms->X_add_number))
4240     {
4241       /* Optimize: -Os:
4242            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4243        */
4244       unsigned int base_regnum = i.op[1].regs->reg_num;
4245       if (flag_code == CODE_64BIT || base_regnum < 4)
4246         {
4247           i.types[1].bitfield.byte = 1;
4248           /* Ignore the suffix.  */
4249           i.suffix = 0;
4250           /* Convert to byte registers.  */
4251           if (i.types[1].bitfield.word)
4252             j = 16;
4253           else if (i.types[1].bitfield.dword)
4254             j = 32;
4255           else
4256             j = 48;
4257           if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4258             j += 8;
4259           i.op[1].regs -= j;
4260         }
4261     }
4262   else if (flag_code == CODE_64BIT
4263            && i.tm.opcode_space == SPACE_BASE
4264            && ((i.types[1].bitfield.qword
4265                 && i.reg_operands == 1
4266                 && i.imm_operands == 1
4267                 && i.op[0].imms->X_op == O_constant
4268                 && ((i.tm.base_opcode == 0xb8
4269                      && i.tm.extension_opcode == None
4270                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4271                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4272                         && (i.tm.base_opcode == 0x24
4273                             || (i.tm.base_opcode == 0x80
4274                                 && i.tm.extension_opcode == 0x4)
4275                             || i.tm.mnem_off == MN_test
4276                             || ((i.tm.base_opcode | 1) == 0xc7
4277                                 && i.tm.extension_opcode == 0x0)))
4278                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4279                         && i.tm.base_opcode == 0x83
4280                         && i.tm.extension_opcode == 0x4)))
4281                || (i.types[0].bitfield.qword
4282                    && ((i.reg_operands == 2
4283                         && i.op[0].regs == i.op[1].regs
4284                         && (i.tm.mnem_off == MN_xor
4285                             || i.tm.mnem_off == MN_sub))
4286                        || i.tm.mnem_off == MN_clr))))
4287     {
4288       /* Optimize: -O:
4289            andq $imm31, %r64   -> andl $imm31, %r32
4290            andq $imm7, %r64    -> andl $imm7, %r32
4291            testq $imm31, %r64  -> testl $imm31, %r32
4292            xorq %r64, %r64     -> xorl %r32, %r32
4293            subq %r64, %r64     -> subl %r32, %r32
4294            movq $imm31, %r64   -> movl $imm31, %r32
4295            movq $imm32, %r64   -> movl $imm32, %r32
4296         */
4297       i.tm.opcode_modifier.size = SIZE32;
4298       if (i.imm_operands)
4299         {
4300           i.types[0].bitfield.imm32 = 1;
4301           i.types[0].bitfield.imm32s = 0;
4302           i.types[0].bitfield.imm64 = 0;
4303         }
4304       else
4305         {
4306           i.types[0].bitfield.dword = 1;
4307           i.types[0].bitfield.qword = 0;
4308         }
4309       i.types[1].bitfield.dword = 1;
4310       i.types[1].bitfield.qword = 0;
4311       if (i.tm.mnem_off == MN_mov || i.tm.mnem_off == MN_lea)
4312         {
4313           /* Handle
4314                movq $imm31, %r64   -> movl $imm31, %r32
4315                movq $imm32, %r64   -> movl $imm32, %r32
4316            */
4317           i.tm.operand_types[0].bitfield.imm32 = 1;
4318           i.tm.operand_types[0].bitfield.imm32s = 0;
4319           i.tm.operand_types[0].bitfield.imm64 = 0;
4320           if ((i.tm.base_opcode | 1) == 0xc7)
4321             {
4322               /* Handle
4323                    movq $imm31, %r64   -> movl $imm31, %r32
4324                */
4325               i.tm.base_opcode = 0xb8;
4326               i.tm.extension_opcode = None;
4327               i.tm.opcode_modifier.w = 0;
4328               i.tm.opcode_modifier.modrm = 0;
4329             }
4330         }
4331     }
4332   else if (optimize > 1
4333            && !optimize_for_space
4334            && i.reg_operands == 2
4335            && i.op[0].regs == i.op[1].regs
4336            && (i.tm.mnem_off == MN_and || i.tm.mnem_off == MN_or)
4337            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4338     {
4339       /* Optimize: -O2:
4340            andb %rN, %rN  -> testb %rN, %rN
4341            andw %rN, %rN  -> testw %rN, %rN
4342            andq %rN, %rN  -> testq %rN, %rN
4343            orb %rN, %rN   -> testb %rN, %rN
4344            orw %rN, %rN   -> testw %rN, %rN
4345            orq %rN, %rN   -> testq %rN, %rN
4346
4347            and outside of 64-bit mode
4348
4349            andl %rN, %rN  -> testl %rN, %rN
4350            orl %rN, %rN   -> testl %rN, %rN
4351        */
4352       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4353     }
4354   else if (i.tm.base_opcode == 0xba
4355            && i.tm.opcode_space == SPACE_0F
4356            && i.reg_operands == 1
4357            && i.op[0].imms->X_op == O_constant
4358            && i.op[0].imms->X_add_number >= 0)
4359     {
4360       /* Optimize: -O:
4361            btw $n, %rN -> btl $n, %rN (outside of 16-bit mode, n < 16)
4362            btq $n, %rN -> btl $n, %rN (in 64-bit mode, n < 32, N < 8)
4363            btl $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
4364
4365            With <BT> one of bts, btr, and bts also:
4366            <BT>w $n, %rN -> btl $n, %rN (in 32-bit mode, n < 16)
4367            <BT>l $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
4368        */
4369       switch (flag_code)
4370         {
4371         case CODE_64BIT:
4372           if (i.tm.extension_opcode != 4)
4373             break;
4374           if (i.types[1].bitfield.qword
4375               && i.op[0].imms->X_add_number < 32
4376               && !(i.op[1].regs->reg_flags & RegRex))
4377             i.tm.opcode_modifier.size = SIZE32;
4378           /* Fall through.  */
4379         case CODE_32BIT:
4380           if (i.types[1].bitfield.word
4381               && i.op[0].imms->X_add_number < 16)
4382             i.tm.opcode_modifier.size = SIZE32;
4383           break;
4384         case CODE_16BIT:
4385           if (i.op[0].imms->X_add_number < 16)
4386             i.tm.opcode_modifier.size = SIZE16;
4387           break;
4388         }
4389     }
4390   else if (i.reg_operands == 3
4391            && i.op[0].regs == i.op[1].regs
4392            && !i.types[2].bitfield.xmmword
4393            && (i.tm.opcode_modifier.vex
4394                || ((!i.mask.reg || i.mask.zeroing)
4395                    && is_evex_encoding (&i.tm)
4396                    && (i.vec_encoding != vex_encoding_evex
4397                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4398                        || i.tm.cpu_flags.bitfield.cpuavx512vl
4399                        || (i.tm.operand_types[2].bitfield.zmmword
4400                            && i.types[2].bitfield.ymmword))))
4401            && i.tm.opcode_space == SPACE_0F
4402            && ((i.tm.base_opcode | 2) == 0x57
4403                || i.tm.base_opcode == 0xdf
4404                || i.tm.base_opcode == 0xef
4405                || (i.tm.base_opcode | 3) == 0xfb
4406                || i.tm.base_opcode == 0x42
4407                || i.tm.base_opcode == 0x47))
4408     {
4409       /* Optimize: -O1:
4410            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4411            vpsubq and vpsubw:
4412              EVEX VOP %zmmM, %zmmM, %zmmN
4413                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4414                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4415              EVEX VOP %ymmM, %ymmM, %ymmN
4416                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4417                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4418              VEX VOP %ymmM, %ymmM, %ymmN
4419                -> VEX VOP %xmmM, %xmmM, %xmmN
4420            VOP, one of vpandn and vpxor:
4421              VEX VOP %ymmM, %ymmM, %ymmN
4422                -> VEX VOP %xmmM, %xmmM, %xmmN
4423            VOP, one of vpandnd and vpandnq:
4424              EVEX VOP %zmmM, %zmmM, %zmmN
4425                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4426                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4427              EVEX VOP %ymmM, %ymmM, %ymmN
4428                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4429                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4430            VOP, one of vpxord and vpxorq:
4431              EVEX VOP %zmmM, %zmmM, %zmmN
4432                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4433                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4434              EVEX VOP %ymmM, %ymmM, %ymmN
4435                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4436                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4437            VOP, one of kxord and kxorq:
4438              VEX VOP %kM, %kM, %kN
4439                -> VEX kxorw %kM, %kM, %kN
4440            VOP, one of kandnd and kandnq:
4441              VEX VOP %kM, %kM, %kN
4442                -> VEX kandnw %kM, %kM, %kN
4443        */
4444       if (is_evex_encoding (&i.tm))
4445         {
4446           if (i.vec_encoding != vex_encoding_evex)
4447             {
4448               i.tm.opcode_modifier.vex = VEX128;
4449               i.tm.opcode_modifier.vexw = VEXW0;
4450               i.tm.opcode_modifier.evex = 0;
4451             }
4452           else if (optimize > 1)
4453             i.tm.opcode_modifier.evex = EVEX128;
4454           else
4455             return;
4456         }
4457       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4458         {
4459           i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
4460           i.tm.opcode_modifier.vexw = VEXW0;
4461         }
4462       else
4463         i.tm.opcode_modifier.vex = VEX128;
4464
4465       if (i.tm.opcode_modifier.vex)
4466         for (j = 0; j < 3; j++)
4467           {
4468             i.types[j].bitfield.xmmword = 1;
4469             i.types[j].bitfield.ymmword = 0;
4470           }
4471     }
4472   else if (i.vec_encoding != vex_encoding_evex
4473            && !i.types[0].bitfield.zmmword
4474            && !i.types[1].bitfield.zmmword
4475            && !i.mask.reg
4476            && !i.broadcast.type
4477            && !i.broadcast.bytes
4478            && is_evex_encoding (&i.tm)
4479            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4480                || (i.tm.base_opcode & ~4) == 0xdb
4481                || (i.tm.base_opcode & ~4) == 0xeb)
4482            && i.tm.extension_opcode == None)
4483     {
4484       /* Optimize: -O1:
4485            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4486            vmovdqu32 and vmovdqu64:
4487              EVEX VOP %xmmM, %xmmN
4488                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4489              EVEX VOP %ymmM, %ymmN
4490                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4491              EVEX VOP %xmmM, mem
4492                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4493              EVEX VOP %ymmM, mem
4494                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4495              EVEX VOP mem, %xmmN
4496                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4497              EVEX VOP mem, %ymmN
4498                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4499            VOP, one of vpand, vpandn, vpor, vpxor:
4500              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4501                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4502              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4503                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4504              EVEX VOP{d,q} mem, %xmmM, %xmmN
4505                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4506              EVEX VOP{d,q} mem, %ymmM, %ymmN
4507                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4508        */
4509       for (j = 0; j < i.operands; j++)
4510         if (operand_type_check (i.types[j], disp)
4511             && i.op[j].disps->X_op == O_constant)
4512           {
4513             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4514                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4515                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4516             int evex_disp8, vex_disp8;
4517             unsigned int memshift = i.memshift;
4518             offsetT n = i.op[j].disps->X_add_number;
4519
4520             evex_disp8 = fits_in_disp8 (n);
4521             i.memshift = 0;
4522             vex_disp8 = fits_in_disp8 (n);
4523             if (evex_disp8 != vex_disp8)
4524               {
4525                 i.memshift = memshift;
4526                 return;
4527               }
4528
4529             i.types[j].bitfield.disp8 = vex_disp8;
4530             break;
4531           }
4532       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4533           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
4534         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4535       i.tm.opcode_modifier.vex
4536         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4537       i.tm.opcode_modifier.vexw = VEXW0;
4538       /* VPAND, VPOR, and VPXOR are commutative.  */
4539       if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
4540         i.tm.opcode_modifier.commutative = 1;
4541       i.tm.opcode_modifier.evex = 0;
4542       i.tm.opcode_modifier.masking = 0;
4543       i.tm.opcode_modifier.broadcast = 0;
4544       i.tm.opcode_modifier.disp8memshift = 0;
4545       i.memshift = 0;
4546       if (j < i.operands)
4547         i.types[j].bitfield.disp8
4548           = fits_in_disp8 (i.op[j].disps->X_add_number);
4549     }
4550 }
4551
4552 /* Return non-zero for load instruction.  */
4553
4554 static int
4555 load_insn_p (void)
4556 {
4557   unsigned int dest;
4558   int any_vex_p = is_any_vex_encoding (&i.tm);
4559   unsigned int base_opcode = i.tm.base_opcode | 1;
4560
4561   if (!any_vex_p)
4562     {
4563       /* Anysize insns: lea, invlpg, clflush, prefetch*, bndmk, bndcl, bndcu,
4564          bndcn, bndstx, bndldx, clflushopt, clwb, cldemote.  */
4565       if (i.tm.opcode_modifier.operandconstraint == ANY_SIZE)
4566         return 0;
4567
4568       /* pop.   */
4569       if (i.tm.mnem_off == MN_pop)
4570         return 1;
4571     }
4572
4573   if (i.tm.opcode_space == SPACE_BASE)
4574     {
4575       /* popf, popa.   */
4576       if (i.tm.base_opcode == 0x9d
4577           || i.tm.base_opcode == 0x61)
4578         return 1;
4579
4580       /* movs, cmps, lods, scas.  */
4581       if ((i.tm.base_opcode | 0xb) == 0xaf)
4582         return 1;
4583
4584       /* outs, xlatb.  */
4585       if (base_opcode == 0x6f
4586           || i.tm.base_opcode == 0xd7)
4587         return 1;
4588       /* NB: For AMD-specific insns with implicit memory operands,
4589          they're intentionally not covered.  */
4590     }
4591
4592   /* No memory operand.  */
4593   if (!i.mem_operands)
4594     return 0;
4595
4596   if (any_vex_p)
4597     {
4598       if (i.tm.mnem_off == MN_vldmxcsr)
4599         return 1;
4600     }
4601   else if (i.tm.opcode_space == SPACE_BASE)
4602     {
4603       /* test, not, neg, mul, imul, div, idiv.  */
4604       if (base_opcode == 0xf7 && i.tm.extension_opcode != 1)
4605         return 1;
4606
4607       /* inc, dec.  */
4608       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
4609         return 1;
4610
4611       /* add, or, adc, sbb, and, sub, xor, cmp.  */
4612       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
4613         return 1;
4614
4615       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
4616       if ((base_opcode == 0xc1 || (base_opcode | 2) == 0xd3)
4617           && i.tm.extension_opcode != 6)
4618         return 1;
4619
4620       /* Check for x87 instructions.  */
4621       if ((base_opcode | 6) == 0xdf)
4622         {
4623           /* Skip fst, fstp, fstenv, fstcw.  */
4624           if (i.tm.base_opcode == 0xd9
4625               && (i.tm.extension_opcode == 2
4626                   || i.tm.extension_opcode == 3
4627                   || i.tm.extension_opcode == 6
4628                   || i.tm.extension_opcode == 7))
4629             return 0;
4630
4631           /* Skip fisttp, fist, fistp, fstp.  */
4632           if (i.tm.base_opcode == 0xdb
4633               && (i.tm.extension_opcode == 1
4634                   || i.tm.extension_opcode == 2
4635                   || i.tm.extension_opcode == 3
4636                   || i.tm.extension_opcode == 7))
4637             return 0;
4638
4639           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
4640           if (i.tm.base_opcode == 0xdd
4641               && (i.tm.extension_opcode == 1
4642                   || i.tm.extension_opcode == 2
4643                   || i.tm.extension_opcode == 3
4644                   || i.tm.extension_opcode == 6
4645                   || i.tm.extension_opcode == 7))
4646             return 0;
4647
4648           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
4649           if (i.tm.base_opcode == 0xdf
4650               && (i.tm.extension_opcode == 1
4651                   || i.tm.extension_opcode == 2
4652                   || i.tm.extension_opcode == 3
4653                   || i.tm.extension_opcode == 6
4654                   || i.tm.extension_opcode == 7))
4655             return 0;
4656
4657           return 1;
4658         }
4659     }
4660   else if (i.tm.opcode_space == SPACE_0F)
4661     {
4662       /* bt, bts, btr, btc.  */
4663       if (i.tm.base_opcode == 0xba
4664           && (i.tm.extension_opcode | 3) == 7)
4665         return 1;
4666
4667       /* cmpxchg8b, cmpxchg16b, xrstors, vmptrld.  */
4668       if (i.tm.base_opcode == 0xc7
4669           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4670           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3
4671               || i.tm.extension_opcode == 6))
4672         return 1;
4673
4674       /* fxrstor, ldmxcsr, xrstor.  */
4675       if (i.tm.base_opcode == 0xae
4676           && (i.tm.extension_opcode == 1
4677               || i.tm.extension_opcode == 2
4678               || i.tm.extension_opcode == 5))
4679         return 1;
4680
4681       /* lgdt, lidt, lmsw.  */
4682       if (i.tm.base_opcode == 0x01
4683           && (i.tm.extension_opcode == 2
4684               || i.tm.extension_opcode == 3
4685               || i.tm.extension_opcode == 6))
4686         return 1;
4687     }
4688
4689   dest = i.operands - 1;
4690
4691   /* Check fake imm8 operand and 3 source operands.  */
4692   if ((i.tm.opcode_modifier.immext
4693        || i.reg_operands + i.mem_operands == 4)
4694       && i.types[dest].bitfield.imm8)
4695     dest--;
4696
4697   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg.  */
4698   if (i.tm.opcode_space == SPACE_BASE
4699       && ((base_opcode | 0x38) == 0x39
4700           || (base_opcode | 2) == 0x87))
4701     return 1;
4702
4703   if (i.tm.mnem_off == MN_xadd)
4704     return 1;
4705
4706   /* Check for load instruction.  */
4707   return (i.types[dest].bitfield.class != ClassNone
4708           || i.types[dest].bitfield.instance == Accum);
4709 }
4710
4711 /* Output lfence, 0xfaee8, after instruction.  */
4712
4713 static void
4714 insert_lfence_after (void)
4715 {
4716   if (lfence_after_load && load_insn_p ())
4717     {
4718       /* There are also two REP string instructions that require
4719          special treatment. Specifically, the compare string (CMPS)
4720          and scan string (SCAS) instructions set EFLAGS in a manner
4721          that depends on the data being compared/scanned. When used
4722          with a REP prefix, the number of iterations may therefore
4723          vary depending on this data. If the data is a program secret
4724          chosen by the adversary using an LVI method,
4725          then this data-dependent behavior may leak some aspect
4726          of the secret.  */
4727       if (((i.tm.base_opcode | 0x9) == 0xaf)
4728           && i.prefix[REP_PREFIX])
4729         {
4730             as_warn (_("`%s` changes flags which would affect control flow behavior"),
4731                      insn_name (&i.tm));
4732         }
4733       char *p = frag_more (3);
4734       *p++ = 0xf;
4735       *p++ = 0xae;
4736       *p = 0xe8;
4737     }
4738 }
4739
4740 /* Output lfence, 0xfaee8, before instruction.  */
4741
4742 static void
4743 insert_lfence_before (void)
4744 {
4745   char *p;
4746
4747   if (i.tm.opcode_space != SPACE_BASE)
4748     return;
4749
4750   if (i.tm.base_opcode == 0xff
4751       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
4752     {
4753       /* Insert lfence before indirect branch if needed.  */
4754
4755       if (lfence_before_indirect_branch == lfence_branch_none)
4756         return;
4757
4758       if (i.operands != 1)
4759         abort ();
4760
4761       if (i.reg_operands == 1)
4762         {
4763           /* Indirect branch via register.  Don't insert lfence with
4764              -mlfence-after-load=yes.  */
4765           if (lfence_after_load
4766               || lfence_before_indirect_branch == lfence_branch_memory)
4767             return;
4768         }
4769       else if (i.mem_operands == 1
4770                && lfence_before_indirect_branch != lfence_branch_register)
4771         {
4772           as_warn (_("indirect `%s` with memory operand should be avoided"),
4773                    insn_name (&i.tm));
4774           return;
4775         }
4776       else
4777         return;
4778
4779       if (last_insn.kind != last_insn_other
4780           && last_insn.seg == now_seg)
4781         {
4782           as_warn_where (last_insn.file, last_insn.line,
4783                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
4784                          last_insn.name, insn_name (&i.tm));
4785           return;
4786         }
4787
4788       p = frag_more (3);
4789       *p++ = 0xf;
4790       *p++ = 0xae;
4791       *p = 0xe8;
4792       return;
4793     }
4794
4795   /* Output or/not/shl and lfence before near ret.  */
4796   if (lfence_before_ret != lfence_before_ret_none
4797       && (i.tm.base_opcode | 1) == 0xc3)
4798     {
4799       if (last_insn.kind != last_insn_other
4800           && last_insn.seg == now_seg)
4801         {
4802           as_warn_where (last_insn.file, last_insn.line,
4803                          _("`%s` skips -mlfence-before-ret on `%s`"),
4804                          last_insn.name, insn_name (&i.tm));
4805           return;
4806         }
4807
4808       /* Near ret ingore operand size override under CPU64.  */
4809       char prefix = flag_code == CODE_64BIT
4810                     ? 0x48
4811                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
4812
4813       if (lfence_before_ret == lfence_before_ret_not)
4814         {
4815           /* not: 0xf71424, may add prefix
4816              for operand size override or 64-bit code.  */
4817           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
4818           if (prefix)
4819             *p++ = prefix;
4820           *p++ = 0xf7;
4821           *p++ = 0x14;
4822           *p++ = 0x24;
4823           if (prefix)
4824             *p++ = prefix;
4825           *p++ = 0xf7;
4826           *p++ = 0x14;
4827           *p++ = 0x24;
4828         }
4829       else
4830         {
4831           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
4832           if (prefix)
4833             *p++ = prefix;
4834           if (lfence_before_ret == lfence_before_ret_or)
4835             {
4836               /* or: 0x830c2400, may add prefix
4837                  for operand size override or 64-bit code.  */
4838               *p++ = 0x83;
4839               *p++ = 0x0c;
4840             }
4841           else
4842             {
4843               /* shl: 0xc1242400, may add prefix
4844                  for operand size override or 64-bit code.  */
4845               *p++ = 0xc1;
4846               *p++ = 0x24;
4847             }
4848
4849           *p++ = 0x24;
4850           *p++ = 0x0;
4851         }
4852
4853       *p++ = 0xf;
4854       *p++ = 0xae;
4855       *p = 0xe8;
4856     }
4857 }
4858
4859 /* Helper for md_assemble() to decide whether to prepare for a possible 2nd
4860    parsing pass. Instead of introducing a rarely use new insn attribute this
4861    utilizes a common pattern between affected templates. It is deemed
4862    acceptable that this will lead to unnecessary pass 2 preparations in a
4863    limited set of cases.  */
4864 static INLINE bool may_need_pass2 (const insn_template *t)
4865 {
4866   return t->opcode_modifier.sse2avx
4867          /* Note that all SSE2AVX templates have at least one operand.  */
4868          ? t->operand_types[t->operands - 1].bitfield.class == RegSIMD
4869          : (t->opcode_space == SPACE_0F
4870             && (t->base_opcode | 1) == 0xbf)
4871            || (t->opcode_space == SPACE_BASE
4872                && t->base_opcode == 0x63);
4873 }
4874
4875 /* This is the guts of the machine-dependent assembler.  LINE points to a
4876    machine dependent instruction.  This function is supposed to emit
4877    the frags/bytes it assembles to.  */
4878
4879 void
4880 md_assemble (char *line)
4881 {
4882   unsigned int j;
4883   char mnemonic[MAX_MNEM_SIZE], mnem_suffix = 0, *copy = NULL;
4884   const char *end, *pass1_mnem = NULL;
4885   enum i386_error pass1_err = 0;
4886   const insn_template *t;
4887
4888   /* Initialize globals.  */
4889   current_templates = NULL;
4890  retry:
4891   memset (&i, '\0', sizeof (i));
4892   i.rounding.type = rc_none;
4893   for (j = 0; j < MAX_OPERANDS; j++)
4894     i.reloc[j] = NO_RELOC;
4895   memset (disp_expressions, '\0', sizeof (disp_expressions));
4896   memset (im_expressions, '\0', sizeof (im_expressions));
4897   save_stack_p = save_stack;
4898
4899   /* First parse an instruction mnemonic & call i386_operand for the operands.
4900      We assume that the scrubber has arranged it so that line[0] is the valid
4901      start of a (possibly prefixed) mnemonic.  */
4902
4903   end = parse_insn (line, mnemonic);
4904   if (end == NULL)
4905     {
4906       if (pass1_mnem != NULL)
4907         goto match_error;
4908       if (i.error != no_error)
4909         {
4910           gas_assert (current_templates != NULL);
4911           if (may_need_pass2 (current_templates->start) && !i.suffix)
4912             goto no_match;
4913           /* No point in trying a 2nd pass - it'll only find the same suffix
4914              again.  */
4915           mnem_suffix = i.suffix;
4916           goto match_error;
4917         }
4918       return;
4919     }
4920   t = current_templates->start;
4921   if (may_need_pass2 (t))
4922     {
4923       /* Make a copy of the full line in case we need to retry.  */
4924       copy = xstrdup (line);
4925     }
4926   line += end - line;
4927   mnem_suffix = i.suffix;
4928
4929   line = parse_operands (line, mnemonic);
4930   this_operand = -1;
4931   if (line == NULL)
4932     {
4933       free (copy);
4934       return;
4935     }
4936
4937   /* Now we've parsed the mnemonic into a set of templates, and have the
4938      operands at hand.  */
4939
4940   /* All Intel opcodes have reversed operands except for "bound", "enter",
4941      "invlpg*", "monitor*", "mwait*", "tpause", "umwait", "pvalidate",
4942      "rmpadjust", "rmpupdate", and "rmpquery".  We also don't reverse
4943      intersegment "jmp" and "call" instructions with 2 immediate operands so
4944      that the immediate segment precedes the offset consistently in Intel and
4945      AT&T modes.  */
4946   if (intel_syntax
4947       && i.operands > 1
4948       && (t->mnem_off != MN_bound)
4949       && !startswith (mnemonic, "invlpg")
4950       && !startswith (mnemonic, "monitor")
4951       && !startswith (mnemonic, "mwait")
4952       && (t->mnem_off != MN_pvalidate)
4953       && !startswith (mnemonic, "rmp")
4954       && (t->mnem_off != MN_tpause)
4955       && (t->mnem_off != MN_umwait)
4956       && !(i.operands == 2
4957            && operand_type_check (i.types[0], imm)
4958            && operand_type_check (i.types[1], imm)))
4959     swap_operands ();
4960
4961   /* The order of the immediates should be reversed
4962      for 2 immediates extrq and insertq instructions */
4963   if (i.imm_operands == 2
4964       && (t->mnem_off == MN_extrq || t->mnem_off == MN_insertq))
4965       swap_2_operands (0, 1);
4966
4967   if (i.imm_operands)
4968     optimize_imm ();
4969
4970   if (i.disp_operands && !optimize_disp (t))
4971     return;
4972
4973   /* Next, we find a template that matches the given insn,
4974      making sure the overlap of the given operands types is consistent
4975      with the template operand types.  */
4976
4977   if (!(t = match_template (mnem_suffix)))
4978     {
4979       const char *err_msg;
4980
4981       if (copy && !mnem_suffix)
4982         {
4983           line = copy;
4984           copy = NULL;
4985   no_match:
4986           pass1_err = i.error;
4987           pass1_mnem = insn_name (current_templates->start);
4988           goto retry;
4989         }
4990
4991       /* If a non-/only-64bit template (group) was found in pass 1, and if
4992          _some_ template (group) was found in pass 2, squash pass 1's
4993          error.  */
4994       if (pass1_err == unsupported_64bit)
4995         pass1_mnem = NULL;
4996
4997   match_error:
4998       free (copy);
4999
5000       switch (pass1_mnem ? pass1_err : i.error)
5001         {
5002         default:
5003           abort ();
5004         case operand_size_mismatch:
5005           err_msg = _("operand size mismatch");
5006           break;
5007         case operand_type_mismatch:
5008           err_msg = _("operand type mismatch");
5009           break;
5010         case register_type_mismatch:
5011           err_msg = _("register type mismatch");
5012           break;
5013         case number_of_operands_mismatch:
5014           err_msg = _("number of operands mismatch");
5015           break;
5016         case invalid_instruction_suffix:
5017           err_msg = _("invalid instruction suffix");
5018           break;
5019         case bad_imm4:
5020           err_msg = _("constant doesn't fit in 4 bits");
5021           break;
5022         case unsupported_with_intel_mnemonic:
5023           err_msg = _("unsupported with Intel mnemonic");
5024           break;
5025         case unsupported_syntax:
5026           err_msg = _("unsupported syntax");
5027           break;
5028         case unsupported:
5029           as_bad (_("unsupported instruction `%s'"),
5030                   pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5031           return;
5032         case unsupported_on_arch:
5033           as_bad (_("`%s' is not supported on `%s%s'"),
5034                   pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5035                   cpu_arch_name ? cpu_arch_name : default_arch,
5036                   cpu_sub_arch_name ? cpu_sub_arch_name : "");
5037           return;
5038         case unsupported_64bit:
5039           if (ISLOWER (mnem_suffix))
5040             {
5041               if (flag_code == CODE_64BIT)
5042                 as_bad (_("`%s%c' is not supported in 64-bit mode"),
5043                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5044                         mnem_suffix);
5045               else
5046                 as_bad (_("`%s%c' is only supported in 64-bit mode"),
5047                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5048                         mnem_suffix);
5049             }
5050           else
5051             {
5052               if (flag_code == CODE_64BIT)
5053                 as_bad (_("`%s' is not supported in 64-bit mode"),
5054                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5055               else
5056                 as_bad (_("`%s' is only supported in 64-bit mode"),
5057                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5058             }
5059           return;
5060         case invalid_sib_address:
5061           err_msg = _("invalid SIB address");
5062           break;
5063         case invalid_vsib_address:
5064           err_msg = _("invalid VSIB address");
5065           break;
5066         case invalid_vector_register_set:
5067           err_msg = _("mask, index, and destination registers must be distinct");
5068           break;
5069         case invalid_tmm_register_set:
5070           err_msg = _("all tmm registers must be distinct");
5071           break;
5072         case invalid_dest_and_src_register_set:
5073           err_msg = _("destination and source registers must be distinct");
5074           break;
5075         case unsupported_vector_index_register:
5076           err_msg = _("unsupported vector index register");
5077           break;
5078         case unsupported_broadcast:
5079           err_msg = _("unsupported broadcast");
5080           break;
5081         case broadcast_needed:
5082           err_msg = _("broadcast is needed for operand of such type");
5083           break;
5084         case unsupported_masking:
5085           err_msg = _("unsupported masking");
5086           break;
5087         case mask_not_on_destination:
5088           err_msg = _("mask not on destination operand");
5089           break;
5090         case no_default_mask:
5091           err_msg = _("default mask isn't allowed");
5092           break;
5093         case unsupported_rc_sae:
5094           err_msg = _("unsupported static rounding/sae");
5095           break;
5096         case invalid_register_operand:
5097           err_msg = _("invalid register operand");
5098           break;
5099         }
5100       as_bad (_("%s for `%s'"), err_msg,
5101               pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5102       return;
5103     }
5104
5105   free (copy);
5106
5107   if (sse_check != check_none
5108       /* The opcode space check isn't strictly needed; it's there only to
5109          bypass the logic below when easily possible.  */
5110       && t->opcode_space >= SPACE_0F
5111       && t->opcode_space <= SPACE_0F3A
5112       && !i.tm.cpu_flags.bitfield.cpusse4a
5113       && !is_any_vex_encoding (t))
5114     {
5115       bool simd = false;
5116
5117       for (j = 0; j < t->operands; ++j)
5118         {
5119           if (t->operand_types[j].bitfield.class == RegMMX)
5120             break;
5121           if (t->operand_types[j].bitfield.class == RegSIMD)
5122             simd = true;
5123         }
5124
5125       if (j >= t->operands && simd)
5126         (sse_check == check_warning
5127          ? as_warn
5128          : as_bad) (_("SSE instruction `%s' is used"), insn_name (&i.tm));
5129     }
5130
5131   if (i.tm.opcode_modifier.fwait)
5132     if (!add_prefix (FWAIT_OPCODE))
5133       return;
5134
5135   /* Check if REP prefix is OK.  */
5136   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep)
5137     {
5138       as_bad (_("invalid instruction `%s' after `%s'"),
5139                 insn_name (&i.tm), i.rep_prefix);
5140       return;
5141     }
5142
5143   /* Check for lock without a lockable instruction.  Destination operand
5144      must be memory unless it is xchg (0x86).  */
5145   if (i.prefix[LOCK_PREFIX])
5146     {
5147       if (i.tm.opcode_modifier.prefixok < PrefixLock
5148           || i.mem_operands == 0
5149           || (i.tm.base_opcode != 0x86
5150               && !(i.flags[i.operands - 1] & Operand_Mem)))
5151         {
5152           as_bad (_("expecting lockable instruction after `lock'"));
5153           return;
5154         }
5155
5156       /* Zap the redundant prefix from XCHG when optimizing.  */
5157       if (i.tm.base_opcode == 0x86 && optimize && !i.no_optimize)
5158         i.prefix[LOCK_PREFIX] = 0;
5159     }
5160
5161   if (is_any_vex_encoding (&i.tm)
5162       || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
5163       || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX)
5164     {
5165       /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
5166       if (i.prefix[DATA_PREFIX])
5167         {
5168           as_bad (_("data size prefix invalid with `%s'"), insn_name (&i.tm));
5169           return;
5170         }
5171
5172       /* Don't allow e.g. KMOV in TLS code sequences.  */
5173       for (j = i.imm_operands; j < i.operands; ++j)
5174         switch (i.reloc[j])
5175           {
5176           case BFD_RELOC_386_TLS_GOTIE:
5177           case BFD_RELOC_386_TLS_LE_32:
5178           case BFD_RELOC_X86_64_GOTTPOFF:
5179           case BFD_RELOC_X86_64_TLSLD:
5180             as_bad (_("TLS relocation cannot be used with `%s'"), insn_name (&i.tm));
5181             return;
5182           default:
5183             break;
5184           }
5185     }
5186
5187   /* Check if HLE prefix is OK.  */
5188   if (i.hle_prefix && !check_hle ())
5189     return;
5190
5191   /* Check BND prefix.  */
5192   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
5193     as_bad (_("expecting valid branch instruction after `bnd'"));
5194
5195   /* Check NOTRACK prefix.  */
5196   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
5197     as_bad (_("expecting indirect branch instruction after `notrack'"));
5198
5199   if (i.tm.cpu_flags.bitfield.cpumpx)
5200     {
5201       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
5202         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
5203       else if (flag_code != CODE_16BIT
5204                ? i.prefix[ADDR_PREFIX]
5205                : i.mem_operands && !i.prefix[ADDR_PREFIX])
5206         as_bad (_("16-bit address isn't allowed in MPX instructions"));
5207     }
5208
5209   /* Insert BND prefix.  */
5210   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
5211     {
5212       if (!i.prefix[BND_PREFIX])
5213         add_prefix (BND_PREFIX_OPCODE);
5214       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
5215         {
5216           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
5217           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
5218         }
5219     }
5220
5221   /* Check string instruction segment overrides.  */
5222   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
5223     {
5224       gas_assert (i.mem_operands);
5225       if (!check_string ())
5226         return;
5227       i.disp_operands = 0;
5228     }
5229
5230   /* The memory operand of (%dx) should be only used with input/output
5231      instructions (base opcodes: 0x6c, 0x6e, 0xec, 0xee).  */
5232   if (i.input_output_operand
5233       && ((i.tm.base_opcode | 0x82) != 0xee
5234           || i.tm.opcode_space != SPACE_BASE))
5235     {
5236       as_bad (_("input/output port address isn't allowed with `%s'"),
5237               insn_name (&i.tm));
5238       return;
5239     }
5240
5241   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
5242     optimize_encoding ();
5243
5244   if (use_unaligned_vector_move)
5245     encode_with_unaligned_vector_move ();
5246
5247   if (!process_suffix ())
5248     return;
5249
5250   /* Check if IP-relative addressing requirements can be satisfied.  */
5251   if (i.tm.cpu_flags.bitfield.cpuprefetchi
5252       && !(i.base_reg && i.base_reg->reg_num == RegIP))
5253     as_warn (_("'%s' only supports RIP-relative address"), insn_name (&i.tm));
5254
5255   /* Update operand types and check extended states.  */
5256   for (j = 0; j < i.operands; j++)
5257     {
5258       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
5259       switch (i.tm.operand_types[j].bitfield.class)
5260         {
5261         default:
5262           break;
5263         case RegMMX:
5264           i.xstate |= xstate_mmx;
5265           break;
5266         case RegMask:
5267           i.xstate |= xstate_mask;
5268           break;
5269         case RegSIMD:
5270           if (i.tm.operand_types[j].bitfield.tmmword)
5271             i.xstate |= xstate_tmm;
5272           else if (i.tm.operand_types[j].bitfield.zmmword)
5273             i.xstate |= xstate_zmm;
5274           else if (i.tm.operand_types[j].bitfield.ymmword)
5275             i.xstate |= xstate_ymm;
5276           else if (i.tm.operand_types[j].bitfield.xmmword)
5277             i.xstate |= xstate_xmm;
5278           break;
5279         }
5280     }
5281
5282   /* Make still unresolved immediate matches conform to size of immediate
5283      given in i.suffix.  */
5284   if (!finalize_imm ())
5285     return;
5286
5287   if (i.types[0].bitfield.imm1)
5288     i.imm_operands = 0; /* kludge for shift insns.  */
5289
5290   /* For insns with operands there are more diddles to do to the opcode.  */
5291   if (i.operands)
5292     {
5293       if (!process_operands ())
5294         return;
5295     }
5296   else if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
5297     {
5298       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
5299       as_warn (_("translating to `%sp'"), insn_name (&i.tm));
5300     }
5301
5302   if (is_any_vex_encoding (&i.tm))
5303     {
5304       if (!cpu_arch_flags.bitfield.cpui286)
5305         {
5306           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
5307                   insn_name (&i.tm));
5308           return;
5309         }
5310
5311       /* Check for explicit REX prefix.  */
5312       if (i.prefix[REX_PREFIX] || i.rex_encoding)
5313         {
5314           as_bad (_("REX prefix invalid with `%s'"), insn_name (&i.tm));
5315           return;
5316         }
5317
5318       if (i.tm.opcode_modifier.vex)
5319         build_vex_prefix (t);
5320       else
5321         build_evex_prefix ();
5322
5323       /* The individual REX.RXBW bits got consumed.  */
5324       i.rex &= REX_OPCODE;
5325     }
5326
5327   /* Handle conversion of 'int $3' --> special int3 insn.  */
5328   if (i.tm.mnem_off == MN_int
5329       && i.op[0].imms->X_add_number == 3)
5330     {
5331       i.tm.base_opcode = INT3_OPCODE;
5332       i.imm_operands = 0;
5333     }
5334
5335   if ((i.tm.opcode_modifier.jump == JUMP
5336        || i.tm.opcode_modifier.jump == JUMP_BYTE
5337        || i.tm.opcode_modifier.jump == JUMP_DWORD)
5338       && i.op[0].disps->X_op == O_constant)
5339     {
5340       /* Convert "jmp constant" (and "call constant") to a jump (call) to
5341          the absolute address given by the constant.  Since ix86 jumps and
5342          calls are pc relative, we need to generate a reloc.  */
5343       i.op[0].disps->X_add_symbol = &abs_symbol;
5344       i.op[0].disps->X_op = O_symbol;
5345     }
5346
5347   /* For 8 bit registers we need an empty rex prefix.  Also if the
5348      instruction already has a prefix, we need to convert old
5349      registers to new ones.  */
5350
5351   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
5352        && (i.op[0].regs->reg_flags & RegRex64) != 0)
5353       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
5354           && (i.op[1].regs->reg_flags & RegRex64) != 0)
5355       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
5356            || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
5357           && i.rex != 0))
5358     {
5359       int x;
5360
5361       i.rex |= REX_OPCODE;
5362       for (x = 0; x < 2; x++)
5363         {
5364           /* Look for 8 bit operand that uses old registers.  */
5365           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
5366               && (i.op[x].regs->reg_flags & RegRex64) == 0)
5367             {
5368               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5369               /* In case it is "hi" register, give up.  */
5370               if (i.op[x].regs->reg_num > 3)
5371                 as_bad (_("can't encode register '%s%s' in an "
5372                           "instruction requiring REX prefix."),
5373                         register_prefix, i.op[x].regs->reg_name);
5374
5375               /* Otherwise it is equivalent to the extended register.
5376                  Since the encoding doesn't change this is merely
5377                  cosmetic cleanup for debug output.  */
5378
5379               i.op[x].regs = i.op[x].regs + 8;
5380             }
5381         }
5382     }
5383
5384   if (i.rex == 0 && i.rex_encoding)
5385     {
5386       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
5387          that uses legacy register.  If it is "hi" register, don't add
5388          the REX_OPCODE byte.  */
5389       int x;
5390       for (x = 0; x < 2; x++)
5391         if (i.types[x].bitfield.class == Reg
5392             && i.types[x].bitfield.byte
5393             && (i.op[x].regs->reg_flags & RegRex64) == 0
5394             && i.op[x].regs->reg_num > 3)
5395           {
5396             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5397             i.rex_encoding = false;
5398             break;
5399           }
5400
5401       if (i.rex_encoding)
5402         i.rex = REX_OPCODE;
5403     }
5404
5405   if (i.rex != 0)
5406     add_prefix (REX_OPCODE | i.rex);
5407
5408   insert_lfence_before ();
5409
5410   /* We are ready to output the insn.  */
5411   output_insn ();
5412
5413   insert_lfence_after ();
5414
5415   last_insn.seg = now_seg;
5416
5417   if (i.tm.opcode_modifier.isprefix)
5418     {
5419       last_insn.kind = last_insn_prefix;
5420       last_insn.name = insn_name (&i.tm);
5421       last_insn.file = as_where (&last_insn.line);
5422     }
5423   else
5424     last_insn.kind = last_insn_other;
5425 }
5426
5427 /* The Q suffix is generally valid only in 64-bit mode, with very few
5428    exceptions: fild, fistp, fisttp, and cmpxchg8b.  Note that for fild
5429    and fisttp only one of their two templates is matched below: That's
5430    sufficient since other relevant attributes are the same between both
5431    respective templates.  */
5432 static INLINE bool q_suffix_allowed(const insn_template *t)
5433 {
5434   return flag_code == CODE_64BIT
5435          || (t->opcode_space == SPACE_BASE
5436              && t->base_opcode == 0xdf
5437              && (t->extension_opcode & 1)) /* fild / fistp / fisttp */
5438          || t->mnem_off == MN_cmpxchg8b;
5439 }
5440
5441 static const char *
5442 parse_insn (const char *line, char *mnemonic)
5443 {
5444   const char *l = line, *token_start = l;
5445   char *mnem_p;
5446   bool pass1 = !current_templates;
5447   int supported;
5448   const insn_template *t;
5449   char *dot_p = NULL;
5450
5451   while (1)
5452     {
5453       mnem_p = mnemonic;
5454       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
5455         {
5456           if (*mnem_p == '.')
5457             dot_p = mnem_p;
5458           mnem_p++;
5459           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5460             {
5461               as_bad (_("no such instruction: `%s'"), token_start);
5462               return NULL;
5463             }
5464           l++;
5465         }
5466       if (!is_space_char (*l)
5467           && *l != END_OF_INSN
5468           && (intel_syntax
5469               || (*l != PREFIX_SEPARATOR
5470                   && *l != ',')))
5471         {
5472           as_bad (_("invalid character %s in mnemonic"),
5473                   output_invalid (*l));
5474           return NULL;
5475         }
5476       if (token_start == l)
5477         {
5478           if (!intel_syntax && *l == PREFIX_SEPARATOR)
5479             as_bad (_("expecting prefix; got nothing"));
5480           else
5481             as_bad (_("expecting mnemonic; got nothing"));
5482           return NULL;
5483         }
5484
5485       /* Look up instruction (or prefix) via hash table.  */
5486       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5487
5488       if (*l != END_OF_INSN
5489           && (!is_space_char (*l) || l[1] != END_OF_INSN)
5490           && current_templates
5491           && current_templates->start->opcode_modifier.isprefix)
5492         {
5493           if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
5494             {
5495               as_bad ((flag_code != CODE_64BIT
5496                        ? _("`%s' is only supported in 64-bit mode")
5497                        : _("`%s' is not supported in 64-bit mode")),
5498                       insn_name (current_templates->start));
5499               return NULL;
5500             }
5501           /* If we are in 16-bit mode, do not allow addr16 or data16.
5502              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
5503           if ((current_templates->start->opcode_modifier.size == SIZE16
5504                || current_templates->start->opcode_modifier.size == SIZE32)
5505               && flag_code != CODE_64BIT
5506               && ((current_templates->start->opcode_modifier.size == SIZE32)
5507                   ^ (flag_code == CODE_16BIT)))
5508             {
5509               as_bad (_("redundant %s prefix"),
5510                       insn_name (current_templates->start));
5511               return NULL;
5512             }
5513
5514           if (current_templates->start->base_opcode == PSEUDO_PREFIX)
5515             {
5516               /* Handle pseudo prefixes.  */
5517               switch (current_templates->start->extension_opcode)
5518                 {
5519                 case Prefix_Disp8:
5520                   /* {disp8} */
5521                   i.disp_encoding = disp_encoding_8bit;
5522                   break;
5523                 case Prefix_Disp16:
5524                   /* {disp16} */
5525                   i.disp_encoding = disp_encoding_16bit;
5526                   break;
5527                 case Prefix_Disp32:
5528                   /* {disp32} */
5529                   i.disp_encoding = disp_encoding_32bit;
5530                   break;
5531                 case Prefix_Load:
5532                   /* {load} */
5533                   i.dir_encoding = dir_encoding_load;
5534                   break;
5535                 case Prefix_Store:
5536                   /* {store} */
5537                   i.dir_encoding = dir_encoding_store;
5538                   break;
5539                 case Prefix_VEX:
5540                   /* {vex} */
5541                   i.vec_encoding = vex_encoding_vex;
5542                   break;
5543                 case Prefix_VEX3:
5544                   /* {vex3} */
5545                   i.vec_encoding = vex_encoding_vex3;
5546                   break;
5547                 case Prefix_EVEX:
5548                   /* {evex} */
5549                   i.vec_encoding = vex_encoding_evex;
5550                   break;
5551                 case Prefix_REX:
5552                   /* {rex} */
5553                   i.rex_encoding = true;
5554                   break;
5555                 case Prefix_NoOptimize:
5556                   /* {nooptimize} */
5557                   i.no_optimize = true;
5558                   break;
5559                 default:
5560                   abort ();
5561                 }
5562             }
5563           else
5564             {
5565               /* Add prefix, checking for repeated prefixes.  */
5566               switch (add_prefix (current_templates->start->base_opcode))
5567                 {
5568                 case PREFIX_EXIST:
5569                   return NULL;
5570                 case PREFIX_DS:
5571                   if (current_templates->start->cpu_flags.bitfield.cpuibt)
5572                     i.notrack_prefix = insn_name (current_templates->start);
5573                   break;
5574                 case PREFIX_REP:
5575                   if (current_templates->start->cpu_flags.bitfield.cpuhle)
5576                     i.hle_prefix = insn_name (current_templates->start);
5577                   else if (current_templates->start->cpu_flags.bitfield.cpumpx)
5578                     i.bnd_prefix = insn_name (current_templates->start);
5579                   else
5580                     i.rep_prefix = insn_name (current_templates->start);
5581                   break;
5582                 default:
5583                   break;
5584                 }
5585             }
5586           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
5587           token_start = ++l;
5588         }
5589       else
5590         break;
5591     }
5592
5593   if (!current_templates)
5594     {
5595       /* Deprecated functionality (new code should use pseudo-prefixes instead):
5596          Check if we should swap operand or force 32bit displacement in
5597          encoding.  */
5598       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
5599         i.dir_encoding = dir_encoding_swap;
5600       else if (mnem_p - 3 == dot_p
5601                && dot_p[1] == 'd'
5602                && dot_p[2] == '8')
5603         i.disp_encoding = disp_encoding_8bit;
5604       else if (mnem_p - 4 == dot_p
5605                && dot_p[1] == 'd'
5606                && dot_p[2] == '3'
5607                && dot_p[3] == '2')
5608         i.disp_encoding = disp_encoding_32bit;
5609       else
5610         goto check_suffix;
5611       mnem_p = dot_p;
5612       *dot_p = '\0';
5613       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5614     }
5615
5616   if (!current_templates || !pass1)
5617     {
5618       current_templates = NULL;
5619
5620     check_suffix:
5621       if (mnem_p > mnemonic)
5622         {
5623           /* See if we can get a match by trimming off a suffix.  */
5624           switch (mnem_p[-1])
5625             {
5626             case WORD_MNEM_SUFFIX:
5627               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
5628                 i.suffix = SHORT_MNEM_SUFFIX;
5629               else
5630                 /* Fall through.  */
5631               case BYTE_MNEM_SUFFIX:
5632               case QWORD_MNEM_SUFFIX:
5633                 i.suffix = mnem_p[-1];
5634               mnem_p[-1] = '\0';
5635               current_templates
5636                 = (const templates *) str_hash_find (op_hash, mnemonic);
5637               break;
5638             case SHORT_MNEM_SUFFIX:
5639             case LONG_MNEM_SUFFIX:
5640               if (!intel_syntax)
5641                 {
5642                   i.suffix = mnem_p[-1];
5643                   mnem_p[-1] = '\0';
5644                   current_templates
5645                     = (const templates *) str_hash_find (op_hash, mnemonic);
5646                 }
5647               break;
5648
5649               /* Intel Syntax.  */
5650             case 'd':
5651               if (intel_syntax)
5652                 {
5653                   if (intel_float_operand (mnemonic) == 1)
5654                     i.suffix = SHORT_MNEM_SUFFIX;
5655                   else
5656                     i.suffix = LONG_MNEM_SUFFIX;
5657                   mnem_p[-1] = '\0';
5658                   current_templates
5659                     = (const templates *) str_hash_find (op_hash, mnemonic);
5660                 }
5661               /* For compatibility reasons accept MOVSD and CMPSD without
5662                  operands even in AT&T mode.  */
5663               else if (*l == END_OF_INSN
5664                        || (is_space_char (*l) && l[1] == END_OF_INSN))
5665                 {
5666                   mnem_p[-1] = '\0';
5667                   current_templates
5668                     = (const templates *) str_hash_find (op_hash, mnemonic);
5669                   if (current_templates != NULL
5670                       /* MOVS or CMPS */
5671                       && (current_templates->start->base_opcode | 2) == 0xa6
5672                       && current_templates->start->opcode_space
5673                          == SPACE_BASE
5674                       && mnem_p[-2] == 's')
5675                     {
5676                       as_warn (_("found `%sd'; assuming `%sl' was meant"),
5677                                mnemonic, mnemonic);
5678                       i.suffix = LONG_MNEM_SUFFIX;
5679                     }
5680                   else
5681                     {
5682                       current_templates = NULL;
5683                       mnem_p[-1] = 'd';
5684                     }
5685                 }
5686               break;
5687             }
5688         }
5689
5690       if (!current_templates)
5691         {
5692           if (pass1)
5693             as_bad (_("no such instruction: `%s'"), token_start);
5694           return NULL;
5695         }
5696     }
5697
5698   if (current_templates->start->opcode_modifier.jump == JUMP
5699       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
5700     {
5701       /* Check for a branch hint.  We allow ",pt" and ",pn" for
5702          predict taken and predict not taken respectively.
5703          I'm not sure that branch hints actually do anything on loop
5704          and jcxz insns (JumpByte) for current Pentium4 chips.  They
5705          may work in the future and it doesn't hurt to accept them
5706          now.  */
5707       if (l[0] == ',' && l[1] == 'p')
5708         {
5709           if (l[2] == 't')
5710             {
5711               if (!add_prefix (DS_PREFIX_OPCODE))
5712                 return NULL;
5713               l += 3;
5714             }
5715           else if (l[2] == 'n')
5716             {
5717               if (!add_prefix (CS_PREFIX_OPCODE))
5718                 return NULL;
5719               l += 3;
5720             }
5721         }
5722     }
5723   /* Any other comma loses.  */
5724   if (*l == ',')
5725     {
5726       as_bad (_("invalid character %s in mnemonic"),
5727               output_invalid (*l));
5728       return NULL;
5729     }
5730
5731   /* Check if instruction is supported on specified architecture.  */
5732   supported = 0;
5733   for (t = current_templates->start; t < current_templates->end; ++t)
5734     {
5735       supported |= cpu_flags_match (t);
5736
5737       if (i.suffix == QWORD_MNEM_SUFFIX && !q_suffix_allowed (t))
5738         supported &= ~CPU_FLAGS_64BIT_MATCH;
5739
5740       if (supported == CPU_FLAGS_PERFECT_MATCH)
5741         return l;
5742     }
5743
5744   if (pass1)
5745     {
5746       if (supported & CPU_FLAGS_64BIT_MATCH)
5747         i.error = unsupported_on_arch;
5748       else
5749         i.error = unsupported_64bit;
5750     }
5751
5752   return NULL;
5753 }
5754
5755 static char *
5756 parse_operands (char *l, const char *mnemonic)
5757 {
5758   char *token_start;
5759
5760   /* 1 if operand is pending after ','.  */
5761   unsigned int expecting_operand = 0;
5762
5763   while (*l != END_OF_INSN)
5764     {
5765       /* Non-zero if operand parens not balanced.  */
5766       unsigned int paren_not_balanced = 0;
5767       /* True if inside double quotes.  */
5768       bool in_quotes = false;
5769
5770       /* Skip optional white space before operand.  */
5771       if (is_space_char (*l))
5772         ++l;
5773       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
5774         {
5775           as_bad (_("invalid character %s before operand %d"),
5776                   output_invalid (*l),
5777                   i.operands + 1);
5778           return NULL;
5779         }
5780       token_start = l;  /* After white space.  */
5781       while (in_quotes || paren_not_balanced || *l != ',')
5782         {
5783           if (*l == END_OF_INSN)
5784             {
5785               if (in_quotes)
5786                 {
5787                   as_bad (_("unbalanced double quotes in operand %d."),
5788                           i.operands + 1);
5789                   return NULL;
5790                 }
5791               if (paren_not_balanced)
5792                 {
5793                   know (!intel_syntax);
5794                   as_bad (_("unbalanced parenthesis in operand %d."),
5795                           i.operands + 1);
5796                   return NULL;
5797                 }
5798               else
5799                 break;  /* we are done */
5800             }
5801           else if (*l == '\\' && l[1] == '"')
5802             ++l;
5803           else if (*l == '"')
5804             in_quotes = !in_quotes;
5805           else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l))
5806             {
5807               as_bad (_("invalid character %s in operand %d"),
5808                       output_invalid (*l),
5809                       i.operands + 1);
5810               return NULL;
5811             }
5812           if (!intel_syntax && !in_quotes)
5813             {
5814               if (*l == '(')
5815                 ++paren_not_balanced;
5816               if (*l == ')')
5817                 --paren_not_balanced;
5818             }
5819           l++;
5820         }
5821       if (l != token_start)
5822         {                       /* Yes, we've read in another operand.  */
5823           unsigned int operand_ok;
5824           this_operand = i.operands++;
5825           if (i.operands > MAX_OPERANDS)
5826             {
5827               as_bad (_("spurious operands; (%d operands/instruction max)"),
5828                       MAX_OPERANDS);
5829               return NULL;
5830             }
5831           i.types[this_operand].bitfield.unspecified = 1;
5832           /* Now parse operand adding info to 'i' as we go along.  */
5833           END_STRING_AND_SAVE (l);
5834
5835           if (i.mem_operands > 1)
5836             {
5837               as_bad (_("too many memory references for `%s'"),
5838                       mnemonic);
5839               return 0;
5840             }
5841
5842           if (intel_syntax)
5843             operand_ok =
5844               i386_intel_operand (token_start,
5845                                   intel_float_operand (mnemonic));
5846           else
5847             operand_ok = i386_att_operand (token_start);
5848
5849           RESTORE_END_STRING (l);
5850           if (!operand_ok)
5851             return NULL;
5852         }
5853       else
5854         {
5855           if (expecting_operand)
5856             {
5857             expecting_operand_after_comma:
5858               as_bad (_("expecting operand after ','; got nothing"));
5859               return NULL;
5860             }
5861           if (*l == ',')
5862             {
5863               as_bad (_("expecting operand before ','; got nothing"));
5864               return NULL;
5865             }
5866         }
5867
5868       /* Now *l must be either ',' or END_OF_INSN.  */
5869       if (*l == ',')
5870         {
5871           if (*++l == END_OF_INSN)
5872             {
5873               /* Just skip it, if it's \n complain.  */
5874               goto expecting_operand_after_comma;
5875             }
5876           expecting_operand = 1;
5877         }
5878     }
5879   return l;
5880 }
5881
5882 static void
5883 swap_2_operands (unsigned int xchg1, unsigned int xchg2)
5884 {
5885   union i386_op temp_op;
5886   i386_operand_type temp_type;
5887   unsigned int temp_flags;
5888   enum bfd_reloc_code_real temp_reloc;
5889
5890   temp_type = i.types[xchg2];
5891   i.types[xchg2] = i.types[xchg1];
5892   i.types[xchg1] = temp_type;
5893
5894   temp_flags = i.flags[xchg2];
5895   i.flags[xchg2] = i.flags[xchg1];
5896   i.flags[xchg1] = temp_flags;
5897
5898   temp_op = i.op[xchg2];
5899   i.op[xchg2] = i.op[xchg1];
5900   i.op[xchg1] = temp_op;
5901
5902   temp_reloc = i.reloc[xchg2];
5903   i.reloc[xchg2] = i.reloc[xchg1];
5904   i.reloc[xchg1] = temp_reloc;
5905
5906   if (i.mask.reg)
5907     {
5908       if (i.mask.operand == xchg1)
5909         i.mask.operand = xchg2;
5910       else if (i.mask.operand == xchg2)
5911         i.mask.operand = xchg1;
5912     }
5913   if (i.broadcast.type || i.broadcast.bytes)
5914     {
5915       if (i.broadcast.operand == xchg1)
5916         i.broadcast.operand = xchg2;
5917       else if (i.broadcast.operand == xchg2)
5918         i.broadcast.operand = xchg1;
5919     }
5920 }
5921
5922 static void
5923 swap_operands (void)
5924 {
5925   switch (i.operands)
5926     {
5927     case 5:
5928     case 4:
5929       swap_2_operands (1, i.operands - 2);
5930       /* Fall through.  */
5931     case 3:
5932     case 2:
5933       swap_2_operands (0, i.operands - 1);
5934       break;
5935     default:
5936       abort ();
5937     }
5938
5939   if (i.mem_operands == 2)
5940     {
5941       const reg_entry *temp_seg;
5942       temp_seg = i.seg[0];
5943       i.seg[0] = i.seg[1];
5944       i.seg[1] = temp_seg;
5945     }
5946 }
5947
5948 /* Try to ensure constant immediates are represented in the smallest
5949    opcode possible.  */
5950 static void
5951 optimize_imm (void)
5952 {
5953   char guess_suffix = 0;
5954   int op;
5955
5956   if (i.suffix)
5957     guess_suffix = i.suffix;
5958   else if (i.reg_operands)
5959     {
5960       /* Figure out a suffix from the last register operand specified.
5961          We can't do this properly yet, i.e. excluding special register
5962          instances, but the following works for instructions with
5963          immediates.  In any case, we can't set i.suffix yet.  */
5964       for (op = i.operands; --op >= 0;)
5965         if (i.types[op].bitfield.class != Reg)
5966           continue;
5967         else if (i.types[op].bitfield.byte)
5968           {
5969             guess_suffix = BYTE_MNEM_SUFFIX;
5970             break;
5971           }
5972         else if (i.types[op].bitfield.word)
5973           {
5974             guess_suffix = WORD_MNEM_SUFFIX;
5975             break;
5976           }
5977         else if (i.types[op].bitfield.dword)
5978           {
5979             guess_suffix = LONG_MNEM_SUFFIX;
5980             break;
5981           }
5982         else if (i.types[op].bitfield.qword)
5983           {
5984             guess_suffix = QWORD_MNEM_SUFFIX;
5985             break;
5986           }
5987     }
5988   else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
5989     guess_suffix = WORD_MNEM_SUFFIX;
5990
5991   for (op = i.operands; --op >= 0;)
5992     if (operand_type_check (i.types[op], imm))
5993       {
5994         switch (i.op[op].imms->X_op)
5995           {
5996           case O_constant:
5997             /* If a suffix is given, this operand may be shortened.  */
5998             switch (guess_suffix)
5999               {
6000               case LONG_MNEM_SUFFIX:
6001                 i.types[op].bitfield.imm32 = 1;
6002                 i.types[op].bitfield.imm64 = 1;
6003                 break;
6004               case WORD_MNEM_SUFFIX:
6005                 i.types[op].bitfield.imm16 = 1;
6006                 i.types[op].bitfield.imm32 = 1;
6007                 i.types[op].bitfield.imm32s = 1;
6008                 i.types[op].bitfield.imm64 = 1;
6009                 break;
6010               case BYTE_MNEM_SUFFIX:
6011                 i.types[op].bitfield.imm8 = 1;
6012                 i.types[op].bitfield.imm8s = 1;
6013                 i.types[op].bitfield.imm16 = 1;
6014                 i.types[op].bitfield.imm32 = 1;
6015                 i.types[op].bitfield.imm32s = 1;
6016                 i.types[op].bitfield.imm64 = 1;
6017                 break;
6018               }
6019
6020             /* If this operand is at most 16 bits, convert it
6021                to a signed 16 bit number before trying to see
6022                whether it will fit in an even smaller size.
6023                This allows a 16-bit operand such as $0xffe0 to
6024                be recognised as within Imm8S range.  */
6025             if ((i.types[op].bitfield.imm16)
6026                 && fits_in_unsigned_word (i.op[op].imms->X_add_number))
6027               {
6028                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
6029                                                 ^ 0x8000) - 0x8000);
6030               }
6031 #ifdef BFD64
6032             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
6033             if ((i.types[op].bitfield.imm32)
6034                 && fits_in_unsigned_long (i.op[op].imms->X_add_number))
6035               {
6036                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
6037                                                 ^ ((offsetT) 1 << 31))
6038                                                - ((offsetT) 1 << 31));
6039               }
6040 #endif
6041             i.types[op]
6042               = operand_type_or (i.types[op],
6043                                  smallest_imm_type (i.op[op].imms->X_add_number));
6044
6045             /* We must avoid matching of Imm32 templates when 64bit
6046                only immediate is available.  */
6047             if (guess_suffix == QWORD_MNEM_SUFFIX)
6048               i.types[op].bitfield.imm32 = 0;
6049             break;
6050
6051           case O_absent:
6052           case O_register:
6053             abort ();
6054
6055             /* Symbols and expressions.  */
6056           default:
6057             /* Convert symbolic operand to proper sizes for matching, but don't
6058                prevent matching a set of insns that only supports sizes other
6059                than those matching the insn suffix.  */
6060             {
6061               i386_operand_type mask, allowed;
6062               const insn_template *t = current_templates->start;
6063
6064               operand_type_set (&mask, 0);
6065               switch (guess_suffix)
6066                 {
6067                 case QWORD_MNEM_SUFFIX:
6068                   mask.bitfield.imm64 = 1;
6069                   mask.bitfield.imm32s = 1;
6070                   break;
6071                 case LONG_MNEM_SUFFIX:
6072                   mask.bitfield.imm32 = 1;
6073                   break;
6074                 case WORD_MNEM_SUFFIX:
6075                   mask.bitfield.imm16 = 1;
6076                   break;
6077                 case BYTE_MNEM_SUFFIX:
6078                   mask.bitfield.imm8 = 1;
6079                   break;
6080                 default:
6081                   break;
6082                 }
6083
6084               allowed = operand_type_and (t->operand_types[op], mask);
6085               while (++t < current_templates->end)
6086                 {
6087                   allowed = operand_type_or (allowed, t->operand_types[op]);
6088                   allowed = operand_type_and (allowed, mask);
6089                 }
6090
6091               if (!operand_type_all_zero (&allowed))
6092                 i.types[op] = operand_type_and (i.types[op], mask);
6093             }
6094             break;
6095           }
6096       }
6097 }
6098
6099 /* Try to use the smallest displacement type too.  */
6100 static bool
6101 optimize_disp (const insn_template *t)
6102 {
6103   unsigned int op;
6104
6105   if (!want_disp32 (t)
6106       && (!t->opcode_modifier.jump
6107           || i.jumpabsolute || i.types[0].bitfield.baseindex))
6108     {
6109       for (op = 0; op < i.operands; ++op)
6110         {
6111           const expressionS *exp = i.op[op].disps;
6112
6113           if (!operand_type_check (i.types[op], disp))
6114             continue;
6115
6116           if (exp->X_op != O_constant)
6117             continue;
6118
6119           /* Since displacement is signed extended to 64bit, don't allow
6120              disp32 if it is out of range.  */
6121           if (fits_in_signed_long (exp->X_add_number))
6122             continue;
6123
6124           i.types[op].bitfield.disp32 = 0;
6125           if (i.types[op].bitfield.baseindex)
6126             {
6127               as_bad (_("0x%" PRIx64 " out of range of signed 32bit displacement"),
6128                       (uint64_t) exp->X_add_number);
6129               return false;
6130             }
6131         }
6132     }
6133
6134   /* Don't optimize displacement for movabs since it only takes 64bit
6135      displacement.  */
6136   if (i.disp_encoding > disp_encoding_8bit
6137       || (flag_code == CODE_64BIT && t->mnem_off == MN_movabs))
6138     return true;
6139
6140   for (op = i.operands; op-- > 0;)
6141     if (operand_type_check (i.types[op], disp))
6142       {
6143         if (i.op[op].disps->X_op == O_constant)
6144           {
6145             offsetT op_disp = i.op[op].disps->X_add_number;
6146
6147             if (!op_disp && i.types[op].bitfield.baseindex)
6148               {
6149                 i.types[op] = operand_type_and_not (i.types[op], anydisp);
6150                 i.op[op].disps = NULL;
6151                 i.disp_operands--;
6152                 continue;
6153               }
6154
6155             if (i.types[op].bitfield.disp16
6156                 && fits_in_unsigned_word (op_disp))
6157               {
6158                 /* If this operand is at most 16 bits, convert
6159                    to a signed 16 bit number and don't use 64bit
6160                    displacement.  */
6161                 op_disp = ((op_disp ^ 0x8000) - 0x8000);
6162                 i.types[op].bitfield.disp64 = 0;
6163               }
6164
6165 #ifdef BFD64
6166             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
6167             if ((flag_code != CODE_64BIT
6168                  ? i.types[op].bitfield.disp32
6169                  : want_disp32 (t)
6170                    && (!t->opcode_modifier.jump
6171                        || i.jumpabsolute || i.types[op].bitfield.baseindex))
6172                 && fits_in_unsigned_long (op_disp))
6173               {
6174                 /* If this operand is at most 32 bits, convert
6175                    to a signed 32 bit number and don't use 64bit
6176                    displacement.  */
6177                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
6178                 i.types[op].bitfield.disp64 = 0;
6179                 i.types[op].bitfield.disp32 = 1;
6180               }
6181
6182             if (flag_code == CODE_64BIT && fits_in_signed_long (op_disp))
6183               {
6184                 i.types[op].bitfield.disp64 = 0;
6185                 i.types[op].bitfield.disp32 = 1;
6186               }
6187 #endif
6188             if ((i.types[op].bitfield.disp32
6189                  || i.types[op].bitfield.disp16)
6190                 && fits_in_disp8 (op_disp))
6191               i.types[op].bitfield.disp8 = 1;
6192
6193             i.op[op].disps->X_add_number = op_disp;
6194           }
6195         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
6196                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
6197           {
6198             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
6199                          i.op[op].disps, 0, i.reloc[op]);
6200             i.types[op] = operand_type_and_not (i.types[op], anydisp);
6201           }
6202         else
6203           /* We only support 64bit displacement on constants.  */
6204           i.types[op].bitfield.disp64 = 0;
6205       }
6206
6207   return true;
6208 }
6209
6210 /* Return 1 if there is a match in broadcast bytes between operand
6211    GIVEN and instruction template T.   */
6212
6213 static INLINE int
6214 match_broadcast_size (const insn_template *t, unsigned int given)
6215 {
6216   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
6217            && i.types[given].bitfield.byte)
6218           || (t->opcode_modifier.broadcast == WORD_BROADCAST
6219               && i.types[given].bitfield.word)
6220           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
6221               && i.types[given].bitfield.dword)
6222           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
6223               && i.types[given].bitfield.qword));
6224 }
6225
6226 /* Check if operands are valid for the instruction.  */
6227
6228 static int
6229 check_VecOperands (const insn_template *t)
6230 {
6231   unsigned int op;
6232   i386_cpu_flags cpu;
6233
6234   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
6235      any one operand are implicity requiring AVX512VL support if the actual
6236      operand size is YMMword or XMMword.  Since this function runs after
6237      template matching, there's no need to check for YMMword/XMMword in
6238      the template.  */
6239   cpu = cpu_flags_and (t->cpu_flags, avx512);
6240   if (!cpu_flags_all_zero (&cpu)
6241       && !t->cpu_flags.bitfield.cpuavx512vl
6242       && !cpu_arch_flags.bitfield.cpuavx512vl)
6243     {
6244       for (op = 0; op < t->operands; ++op)
6245         {
6246           if (t->operand_types[op].bitfield.zmmword
6247               && (i.types[op].bitfield.ymmword
6248                   || i.types[op].bitfield.xmmword))
6249             {
6250               i.error = unsupported;
6251               return 1;
6252             }
6253         }
6254     }
6255
6256   /* Somewhat similarly, templates specifying both AVX and AVX2 are
6257      requiring AVX2 support if the actual operand size is YMMword.  */
6258   if (t->cpu_flags.bitfield.cpuavx
6259       && t->cpu_flags.bitfield.cpuavx2
6260       && !cpu_arch_flags.bitfield.cpuavx2)
6261     {
6262       for (op = 0; op < t->operands; ++op)
6263         {
6264           if (t->operand_types[op].bitfield.xmmword
6265               && i.types[op].bitfield.ymmword)
6266             {
6267               i.error = unsupported;
6268               return 1;
6269             }
6270         }
6271     }
6272
6273   /* Without VSIB byte, we can't have a vector register for index.  */
6274   if (!t->opcode_modifier.sib
6275       && i.index_reg
6276       && (i.index_reg->reg_type.bitfield.xmmword
6277           || i.index_reg->reg_type.bitfield.ymmword
6278           || i.index_reg->reg_type.bitfield.zmmword))
6279     {
6280       i.error = unsupported_vector_index_register;
6281       return 1;
6282     }
6283
6284   /* Check if default mask is allowed.  */
6285   if (t->opcode_modifier.operandconstraint == NO_DEFAULT_MASK
6286       && (!i.mask.reg || i.mask.reg->reg_num == 0))
6287     {
6288       i.error = no_default_mask;
6289       return 1;
6290     }
6291
6292   /* For VSIB byte, we need a vector register for index, and all vector
6293      registers must be distinct.  */
6294   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
6295     {
6296       if (!i.index_reg
6297           || !((t->opcode_modifier.sib == VECSIB128
6298                 && i.index_reg->reg_type.bitfield.xmmword)
6299                || (t->opcode_modifier.sib == VECSIB256
6300                    && i.index_reg->reg_type.bitfield.ymmword)
6301                || (t->opcode_modifier.sib == VECSIB512
6302                    && i.index_reg->reg_type.bitfield.zmmword)))
6303       {
6304         i.error = invalid_vsib_address;
6305         return 1;
6306       }
6307
6308       gas_assert (i.reg_operands == 2 || i.mask.reg);
6309       if (i.reg_operands == 2 && !i.mask.reg)
6310         {
6311           gas_assert (i.types[0].bitfield.class == RegSIMD);
6312           gas_assert (i.types[0].bitfield.xmmword
6313                       || i.types[0].bitfield.ymmword);
6314           gas_assert (i.types[2].bitfield.class == RegSIMD);
6315           gas_assert (i.types[2].bitfield.xmmword
6316                       || i.types[2].bitfield.ymmword);
6317           if (operand_check == check_none)
6318             return 0;
6319           if (register_number (i.op[0].regs)
6320               != register_number (i.index_reg)
6321               && register_number (i.op[2].regs)
6322                  != register_number (i.index_reg)
6323               && register_number (i.op[0].regs)
6324                  != register_number (i.op[2].regs))
6325             return 0;
6326           if (operand_check == check_error)
6327             {
6328               i.error = invalid_vector_register_set;
6329               return 1;
6330             }
6331           as_warn (_("mask, index, and destination registers should be distinct"));
6332         }
6333       else if (i.reg_operands == 1 && i.mask.reg)
6334         {
6335           if (i.types[1].bitfield.class == RegSIMD
6336               && (i.types[1].bitfield.xmmword
6337                   || i.types[1].bitfield.ymmword
6338                   || i.types[1].bitfield.zmmword)
6339               && (register_number (i.op[1].regs)
6340                   == register_number (i.index_reg)))
6341             {
6342               if (operand_check == check_error)
6343                 {
6344                   i.error = invalid_vector_register_set;
6345                   return 1;
6346                 }
6347               if (operand_check != check_none)
6348                 as_warn (_("index and destination registers should be distinct"));
6349             }
6350         }
6351     }
6352
6353   /* For AMX instructions with 3 TMM register operands, all operands
6354       must be distinct.  */
6355   if (i.reg_operands == 3
6356       && t->operand_types[0].bitfield.tmmword
6357       && (i.op[0].regs == i.op[1].regs
6358           || i.op[0].regs == i.op[2].regs
6359           || i.op[1].regs == i.op[2].regs))
6360     {
6361       i.error = invalid_tmm_register_set;
6362       return 1;
6363     }
6364
6365   /* For some special instructions require that destination must be distinct
6366      from source registers.  */
6367   if (t->opcode_modifier.operandconstraint == DISTINCT_DEST)
6368     {
6369       unsigned int dest_reg = i.operands - 1;
6370
6371       know (i.operands >= 3);
6372
6373       /* #UD if dest_reg == src1_reg or dest_reg == src2_reg.  */
6374       if (i.op[dest_reg - 1].regs == i.op[dest_reg].regs
6375           || (i.reg_operands > 2
6376               && i.op[dest_reg - 2].regs == i.op[dest_reg].regs))
6377         {
6378           i.error = invalid_dest_and_src_register_set;
6379           return 1;
6380         }
6381     }
6382
6383   /* Check if broadcast is supported by the instruction and is applied
6384      to the memory operand.  */
6385   if (i.broadcast.type || i.broadcast.bytes)
6386     {
6387       i386_operand_type type, overlap;
6388
6389       /* Check if specified broadcast is supported in this instruction,
6390          and its broadcast bytes match the memory operand.  */
6391       op = i.broadcast.operand;
6392       if (!t->opcode_modifier.broadcast
6393           || !(i.flags[op] & Operand_Mem)
6394           || (!i.types[op].bitfield.unspecified
6395               && !match_broadcast_size (t, op)))
6396         {
6397         bad_broadcast:
6398           i.error = unsupported_broadcast;
6399           return 1;
6400         }
6401
6402       operand_type_set (&type, 0);
6403       switch (get_broadcast_bytes (t, false))
6404         {
6405         case 2:
6406           type.bitfield.word = 1;
6407           break;
6408         case 4:
6409           type.bitfield.dword = 1;
6410           break;
6411         case 8:
6412           type.bitfield.qword = 1;
6413           break;
6414         case 16:
6415           type.bitfield.xmmword = 1;
6416           break;
6417         case 32:
6418           type.bitfield.ymmword = 1;
6419           break;
6420         case 64:
6421           type.bitfield.zmmword = 1;
6422           break;
6423         default:
6424           goto bad_broadcast;
6425         }
6426
6427       overlap = operand_type_and (type, t->operand_types[op]);
6428       if (t->operand_types[op].bitfield.class == RegSIMD
6429           && t->operand_types[op].bitfield.byte
6430              + t->operand_types[op].bitfield.word
6431              + t->operand_types[op].bitfield.dword
6432              + t->operand_types[op].bitfield.qword > 1)
6433         {
6434           overlap.bitfield.xmmword = 0;
6435           overlap.bitfield.ymmword = 0;
6436           overlap.bitfield.zmmword = 0;
6437         }
6438       if (operand_type_all_zero (&overlap))
6439           goto bad_broadcast;
6440
6441       if (t->opcode_modifier.checkoperandsize)
6442         {
6443           unsigned int j;
6444
6445           type.bitfield.baseindex = 1;
6446           for (j = 0; j < i.operands; ++j)
6447             {
6448               if (j != op
6449                   && !operand_type_register_match(i.types[j],
6450                                                   t->operand_types[j],
6451                                                   type,
6452                                                   t->operand_types[op]))
6453                 goto bad_broadcast;
6454             }
6455         }
6456     }
6457   /* If broadcast is supported in this instruction, we need to check if
6458      operand of one-element size isn't specified without broadcast.  */
6459   else if (t->opcode_modifier.broadcast && i.mem_operands)
6460     {
6461       /* Find memory operand.  */
6462       for (op = 0; op < i.operands; op++)
6463         if (i.flags[op] & Operand_Mem)
6464           break;
6465       gas_assert (op < i.operands);
6466       /* Check size of the memory operand.  */
6467       if (match_broadcast_size (t, op))
6468         {
6469           i.error = broadcast_needed;
6470           return 1;
6471         }
6472     }
6473   else
6474     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
6475
6476   /* Check if requested masking is supported.  */
6477   if (i.mask.reg)
6478     {
6479       switch (t->opcode_modifier.masking)
6480         {
6481         case BOTH_MASKING:
6482           break;
6483         case MERGING_MASKING:
6484           if (i.mask.zeroing)
6485             {
6486         case 0:
6487               i.error = unsupported_masking;
6488               return 1;
6489             }
6490           break;
6491         case DYNAMIC_MASKING:
6492           /* Memory destinations allow only merging masking.  */
6493           if (i.mask.zeroing && i.mem_operands)
6494             {
6495               /* Find memory operand.  */
6496               for (op = 0; op < i.operands; op++)
6497                 if (i.flags[op] & Operand_Mem)
6498                   break;
6499               gas_assert (op < i.operands);
6500               if (op == i.operands - 1)
6501                 {
6502                   i.error = unsupported_masking;
6503                   return 1;
6504                 }
6505             }
6506           break;
6507         default:
6508           abort ();
6509         }
6510     }
6511
6512   /* Check if masking is applied to dest operand.  */
6513   if (i.mask.reg && (i.mask.operand != i.operands - 1))
6514     {
6515       i.error = mask_not_on_destination;
6516       return 1;
6517     }
6518
6519   /* Check RC/SAE.  */
6520   if (i.rounding.type != rc_none)
6521     {
6522       if (!t->opcode_modifier.sae
6523           || ((i.rounding.type != saeonly) != t->opcode_modifier.staticrounding)
6524           || i.mem_operands)
6525         {
6526           i.error = unsupported_rc_sae;
6527           return 1;
6528         }
6529
6530       /* Non-EVEX.LIG forms need to have a ZMM register as at least one
6531          operand.  */
6532       if (t->opcode_modifier.evex != EVEXLIG)
6533         {
6534           for (op = 0; op < t->operands; ++op)
6535             if (i.types[op].bitfield.zmmword)
6536               break;
6537           if (op >= t->operands)
6538             {
6539               i.error = operand_size_mismatch;
6540               return 1;
6541             }
6542         }
6543     }
6544
6545   /* Check the special Imm4 cases; must be the first operand.  */
6546   if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
6547     {
6548       if (i.op[0].imms->X_op != O_constant
6549           || !fits_in_imm4 (i.op[0].imms->X_add_number))
6550         {
6551           i.error = bad_imm4;
6552           return 1;
6553         }
6554
6555       /* Turn off Imm<N> so that update_imm won't complain.  */
6556       operand_type_set (&i.types[0], 0);
6557     }
6558
6559   /* Check vector Disp8 operand.  */
6560   if (t->opcode_modifier.disp8memshift
6561       && i.disp_encoding <= disp_encoding_8bit)
6562     {
6563       if (i.broadcast.type || i.broadcast.bytes)
6564         i.memshift = t->opcode_modifier.broadcast - 1;
6565       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
6566         i.memshift = t->opcode_modifier.disp8memshift;
6567       else
6568         {
6569           const i386_operand_type *type = NULL, *fallback = NULL;
6570
6571           i.memshift = 0;
6572           for (op = 0; op < i.operands; op++)
6573             if (i.flags[op] & Operand_Mem)
6574               {
6575                 if (t->opcode_modifier.evex == EVEXLIG)
6576                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
6577                 else if (t->operand_types[op].bitfield.xmmword
6578                          + t->operand_types[op].bitfield.ymmword
6579                          + t->operand_types[op].bitfield.zmmword <= 1)
6580                   type = &t->operand_types[op];
6581                 else if (!i.types[op].bitfield.unspecified)
6582                   type = &i.types[op];
6583                 else /* Ambiguities get resolved elsewhere.  */
6584                   fallback = &t->operand_types[op];
6585               }
6586             else if (i.types[op].bitfield.class == RegSIMD
6587                      && t->opcode_modifier.evex != EVEXLIG)
6588               {
6589                 if (i.types[op].bitfield.zmmword)
6590                   i.memshift = 6;
6591                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
6592                   i.memshift = 5;
6593                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
6594                   i.memshift = 4;
6595               }
6596
6597           if (!type && !i.memshift)
6598             type = fallback;
6599           if (type)
6600             {
6601               if (type->bitfield.zmmword)
6602                 i.memshift = 6;
6603               else if (type->bitfield.ymmword)
6604                 i.memshift = 5;
6605               else if (type->bitfield.xmmword)
6606                 i.memshift = 4;
6607             }
6608
6609           /* For the check in fits_in_disp8().  */
6610           if (i.memshift == 0)
6611             i.memshift = -1;
6612         }
6613
6614       for (op = 0; op < i.operands; op++)
6615         if (operand_type_check (i.types[op], disp)
6616             && i.op[op].disps->X_op == O_constant)
6617           {
6618             if (fits_in_disp8 (i.op[op].disps->X_add_number))
6619               {
6620                 i.types[op].bitfield.disp8 = 1;
6621                 return 0;
6622               }
6623             i.types[op].bitfield.disp8 = 0;
6624           }
6625     }
6626
6627   i.memshift = 0;
6628
6629   return 0;
6630 }
6631
6632 /* Check if encoding requirements are met by the instruction.  */
6633
6634 static int
6635 VEX_check_encoding (const insn_template *t)
6636 {
6637   if (i.vec_encoding == vex_encoding_error)
6638     {
6639       i.error = unsupported;
6640       return 1;
6641     }
6642
6643   if (i.vec_encoding == vex_encoding_evex)
6644     {
6645       /* This instruction must be encoded with EVEX prefix.  */
6646       if (!is_evex_encoding (t))
6647         {
6648           i.error = unsupported;
6649           return 1;
6650         }
6651       return 0;
6652     }
6653
6654   if (!t->opcode_modifier.vex)
6655     {
6656       /* This instruction template doesn't have VEX prefix.  */
6657       if (i.vec_encoding != vex_encoding_default)
6658         {
6659           i.error = unsupported;
6660           return 1;
6661         }
6662       return 0;
6663     }
6664
6665   return 0;
6666 }
6667
6668 /* Helper function for the progress() macro in match_template().  */
6669 static INLINE enum i386_error progress (enum i386_error new,
6670                                         enum i386_error last,
6671                                         unsigned int line, unsigned int *line_p)
6672 {
6673   if (line <= *line_p)
6674     return last;
6675   *line_p = line;
6676   return new;
6677 }
6678
6679 static const insn_template *
6680 match_template (char mnem_suffix)
6681 {
6682   /* Points to template once we've found it.  */
6683   const insn_template *t;
6684   i386_operand_type overlap0, overlap1, overlap2, overlap3;
6685   i386_operand_type overlap4;
6686   unsigned int found_reverse_match;
6687   i386_operand_type operand_types [MAX_OPERANDS];
6688   int addr_prefix_disp;
6689   unsigned int j, size_match, check_register, errline = __LINE__;
6690   enum i386_error specific_error = number_of_operands_mismatch;
6691 #define progress(err) progress (err, specific_error, __LINE__, &errline)
6692
6693 #if MAX_OPERANDS != 5
6694 # error "MAX_OPERANDS must be 5."
6695 #endif
6696
6697   found_reverse_match = 0;
6698   addr_prefix_disp = -1;
6699
6700   for (t = current_templates->start; t < current_templates->end; t++)
6701     {
6702       addr_prefix_disp = -1;
6703       found_reverse_match = 0;
6704
6705       /* Must have right number of operands.  */
6706       if (i.operands != t->operands)
6707         continue;
6708
6709       /* Check processor support.  */
6710       specific_error = progress (unsupported);
6711       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
6712         continue;
6713
6714       /* Check AT&T mnemonic.   */
6715       specific_error = progress (unsupported_with_intel_mnemonic);
6716       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
6717         continue;
6718
6719       /* Check AT&T/Intel syntax.  */
6720       specific_error = progress (unsupported_syntax);
6721       if ((intel_syntax && t->opcode_modifier.attsyntax)
6722           || (!intel_syntax && t->opcode_modifier.intelsyntax))
6723         continue;
6724
6725       /* Check Intel64/AMD64 ISA.   */
6726       switch (isa64)
6727         {
6728         default:
6729           /* Default: Don't accept Intel64.  */
6730           if (t->opcode_modifier.isa64 == INTEL64)
6731             continue;
6732           break;
6733         case amd64:
6734           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
6735           if (t->opcode_modifier.isa64 >= INTEL64)
6736             continue;
6737           break;
6738         case intel64:
6739           /* -mintel64: Don't accept AMD64.  */
6740           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
6741             continue;
6742           break;
6743         }
6744
6745       /* Check the suffix.  */
6746       specific_error = progress (invalid_instruction_suffix);
6747       if ((t->opcode_modifier.no_bsuf && mnem_suffix == BYTE_MNEM_SUFFIX)
6748           || (t->opcode_modifier.no_wsuf && mnem_suffix == WORD_MNEM_SUFFIX)
6749           || (t->opcode_modifier.no_lsuf && mnem_suffix == LONG_MNEM_SUFFIX)
6750           || (t->opcode_modifier.no_ssuf && mnem_suffix == SHORT_MNEM_SUFFIX)
6751           || (t->opcode_modifier.no_qsuf && mnem_suffix == QWORD_MNEM_SUFFIX))
6752         continue;
6753
6754       specific_error = progress (operand_size_mismatch);
6755       size_match = operand_size_match (t);
6756       if (!size_match)
6757         continue;
6758
6759       /* This is intentionally not
6760
6761          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
6762
6763          as the case of a missing * on the operand is accepted (perhaps with
6764          a warning, issued further down).  */
6765       specific_error = progress (operand_type_mismatch);
6766       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
6767         continue;
6768
6769       /* In Intel syntax, normally we can check for memory operand size when
6770          there is no mnemonic suffix.  But jmp and call have 2 different
6771          encodings with Dword memory operand size.  Skip the "near" one
6772          (permitting a register operand) when "far" was requested.  */
6773       if (i.far_branch
6774           && t->opcode_modifier.jump == JUMP_ABSOLUTE
6775           && t->operand_types[0].bitfield.class == Reg)
6776         continue;
6777
6778       for (j = 0; j < MAX_OPERANDS; j++)
6779         operand_types[j] = t->operand_types[j];
6780
6781       /* In general, don't allow 32-bit operands on pre-386.  */
6782       specific_error = progress (mnem_suffix ? invalid_instruction_suffix
6783                                              : operand_size_mismatch);
6784       j = i.imm_operands + (t->operands > i.imm_operands + 1);
6785       if (i.suffix == LONG_MNEM_SUFFIX
6786           && !cpu_arch_flags.bitfield.cpui386
6787           && (intel_syntax
6788               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
6789                  && !intel_float_operand (insn_name (t)))
6790               : intel_float_operand (insn_name (t)) != 2)
6791           && (t->operands == i.imm_operands
6792               || (operand_types[i.imm_operands].bitfield.class != RegMMX
6793                && operand_types[i.imm_operands].bitfield.class != RegSIMD
6794                && operand_types[i.imm_operands].bitfield.class != RegMask)
6795               || (operand_types[j].bitfield.class != RegMMX
6796                   && operand_types[j].bitfield.class != RegSIMD
6797                   && operand_types[j].bitfield.class != RegMask))
6798           && !t->opcode_modifier.sib)
6799         continue;
6800
6801       /* Do not verify operands when there are none.  */
6802       if (!t->operands)
6803         {
6804           if (VEX_check_encoding (t))
6805             {
6806               specific_error = progress (i.error);
6807               continue;
6808             }
6809
6810           /* We've found a match; break out of loop.  */
6811           break;
6812         }
6813
6814       if (!t->opcode_modifier.jump
6815           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
6816         {
6817           /* There should be only one Disp operand.  */
6818           for (j = 0; j < MAX_OPERANDS; j++)
6819             if (operand_type_check (operand_types[j], disp))
6820               break;
6821           if (j < MAX_OPERANDS)
6822             {
6823               bool override = (i.prefix[ADDR_PREFIX] != 0);
6824
6825               addr_prefix_disp = j;
6826
6827               /* Address size prefix will turn Disp64 operand into Disp32 and
6828                  Disp32/Disp16 one into Disp16/Disp32 respectively.  */
6829               switch (flag_code)
6830                 {
6831                 case CODE_16BIT:
6832                   override = !override;
6833                   /* Fall through.  */
6834                 case CODE_32BIT:
6835                   if (operand_types[j].bitfield.disp32
6836                       && operand_types[j].bitfield.disp16)
6837                     {
6838                       operand_types[j].bitfield.disp16 = override;
6839                       operand_types[j].bitfield.disp32 = !override;
6840                     }
6841                   gas_assert (!operand_types[j].bitfield.disp64);
6842                   break;
6843
6844                 case CODE_64BIT:
6845                   if (operand_types[j].bitfield.disp64)
6846                     {
6847                       gas_assert (!operand_types[j].bitfield.disp32);
6848                       operand_types[j].bitfield.disp32 = override;
6849                       operand_types[j].bitfield.disp64 = !override;
6850                     }
6851                   operand_types[j].bitfield.disp16 = 0;
6852                   break;
6853                 }
6854             }
6855         }
6856
6857       /* We check register size if needed.  */
6858       if (t->opcode_modifier.checkoperandsize)
6859         {
6860           check_register = (1 << t->operands) - 1;
6861           if (i.broadcast.type || i.broadcast.bytes)
6862             check_register &= ~(1 << i.broadcast.operand);
6863         }
6864       else
6865         check_register = 0;
6866
6867       overlap0 = operand_type_and (i.types[0], operand_types[0]);
6868       switch (t->operands)
6869         {
6870         case 1:
6871           if (!operand_type_match (overlap0, i.types[0]))
6872             continue;
6873
6874           /* Allow the ModR/M encoding to be requested by using the {load} or
6875              {store} pseudo prefix on an applicable insn.  */
6876           if (!t->opcode_modifier.modrm
6877               && i.reg_operands == 1
6878               && ((i.dir_encoding == dir_encoding_load
6879                    && t->mnem_off != MN_pop)
6880                   || (i.dir_encoding == dir_encoding_store
6881                       && t->mnem_off != MN_push))
6882               /* Avoid BSWAP.  */
6883               && t->mnem_off != MN_bswap)
6884             continue;
6885           break;
6886
6887         case 2:
6888           /* xchg %eax, %eax is a special case. It is an alias for nop
6889              only in 32bit mode and we can use opcode 0x90.  In 64bit
6890              mode, we can't use 0x90 for xchg %eax, %eax since it should
6891              zero-extend %eax to %rax.  */
6892           if (t->base_opcode == 0x90
6893               && t->opcode_space == SPACE_BASE)
6894             {
6895               if (flag_code == CODE_64BIT
6896                   && i.types[0].bitfield.instance == Accum
6897                   && i.types[0].bitfield.dword
6898                   && i.types[1].bitfield.instance == Accum)
6899                 continue;
6900
6901               /* Allow the ModR/M encoding to be requested by using the
6902                  {load} or {store} pseudo prefix.  */
6903               if (i.dir_encoding == dir_encoding_load
6904                   || i.dir_encoding == dir_encoding_store)
6905                 continue;
6906             }
6907
6908           if (t->base_opcode == MOV_AX_DISP32
6909               && t->opcode_space == SPACE_BASE
6910               && t->mnem_off != MN_movabs)
6911             {
6912               /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
6913               if (i.reloc[0] == BFD_RELOC_386_GOT32)
6914                 continue;
6915
6916               /* xrelease mov %eax, <disp> is another special case. It must not
6917                  match the accumulator-only encoding of mov.  */
6918               if (i.hle_prefix)
6919                 continue;
6920
6921               /* Allow the ModR/M encoding to be requested by using a suitable
6922                  {load} or {store} pseudo prefix.  */
6923               if (i.dir_encoding == (i.types[0].bitfield.instance == Accum
6924                                      ? dir_encoding_store
6925                                      : dir_encoding_load)
6926                   && !i.types[0].bitfield.disp64
6927                   && !i.types[1].bitfield.disp64)
6928                 continue;
6929             }
6930
6931           /* Allow the ModR/M encoding to be requested by using the {load} or
6932              {store} pseudo prefix on an applicable insn.  */
6933           if (!t->opcode_modifier.modrm
6934               && i.reg_operands == 1
6935               && i.imm_operands == 1
6936               && (i.dir_encoding == dir_encoding_load
6937                   || i.dir_encoding == dir_encoding_store)
6938               && t->opcode_space == SPACE_BASE)
6939             {
6940               if (t->base_opcode == 0xb0 /* mov $imm, %reg */
6941                   && i.dir_encoding == dir_encoding_store)
6942                 continue;
6943
6944               if ((t->base_opcode | 0x38) == 0x3c /* <alu> $imm, %acc */
6945                   && (t->base_opcode != 0x3c /* cmp $imm, %acc */
6946                       || i.dir_encoding == dir_encoding_load))
6947                 continue;
6948
6949               if (t->base_opcode == 0xa8 /* test $imm, %acc */
6950                   && i.dir_encoding == dir_encoding_load)
6951                 continue;
6952             }
6953           /* Fall through.  */
6954
6955         case 3:
6956           if (!(size_match & MATCH_STRAIGHT))
6957             goto check_reverse;
6958           /* Reverse direction of operands if swapping is possible in the first
6959              place (operands need to be symmetric) and
6960              - the load form is requested, and the template is a store form,
6961              - the store form is requested, and the template is a load form,
6962              - the non-default (swapped) form is requested.  */
6963           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
6964           if (t->opcode_modifier.d && i.reg_operands == i.operands
6965               && !operand_type_all_zero (&overlap1))
6966             switch (i.dir_encoding)
6967               {
6968               case dir_encoding_load:
6969                 if (operand_type_check (operand_types[i.operands - 1], anymem)
6970                     || t->opcode_modifier.regmem)
6971                   goto check_reverse;
6972                 break;
6973
6974               case dir_encoding_store:
6975                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
6976                     && !t->opcode_modifier.regmem)
6977                   goto check_reverse;
6978                 break;
6979
6980               case dir_encoding_swap:
6981                 goto check_reverse;
6982
6983               case dir_encoding_default:
6984                 break;
6985               }
6986           /* If we want store form, we skip the current load.  */
6987           if ((i.dir_encoding == dir_encoding_store
6988                || i.dir_encoding == dir_encoding_swap)
6989               && i.mem_operands == 0
6990               && t->opcode_modifier.load)
6991             continue;
6992           /* Fall through.  */
6993         case 4:
6994         case 5:
6995           overlap1 = operand_type_and (i.types[1], operand_types[1]);
6996           if (!operand_type_match (overlap0, i.types[0])
6997               || !operand_type_match (overlap1, i.types[1])
6998               || ((check_register & 3) == 3
6999                   && !operand_type_register_match (i.types[0],
7000                                                    operand_types[0],
7001                                                    i.types[1],
7002                                                    operand_types[1])))
7003             {
7004               specific_error = progress (i.error);
7005
7006               /* Check if other direction is valid ...  */
7007               if (!t->opcode_modifier.d)
7008                 continue;
7009
7010             check_reverse:
7011               if (!(size_match & MATCH_REVERSE))
7012                 continue;
7013               /* Try reversing direction of operands.  */
7014               j = t->cpu_flags.bitfield.cpufma4
7015                   || t->cpu_flags.bitfield.cpuxop ? 1 : i.operands - 1;
7016               overlap0 = operand_type_and (i.types[0], operand_types[j]);
7017               overlap1 = operand_type_and (i.types[j], operand_types[0]);
7018               overlap2 = operand_type_and (i.types[1], operand_types[1]);
7019               gas_assert (t->operands != 3 || !check_register);
7020               if (!operand_type_match (overlap0, i.types[0])
7021                   || !operand_type_match (overlap1, i.types[j])
7022                   || (t->operands == 3
7023                       && !operand_type_match (overlap2, i.types[1]))
7024                   || (check_register
7025                       && !operand_type_register_match (i.types[0],
7026                                                        operand_types[j],
7027                                                        i.types[j],
7028                                                        operand_types[0])))
7029                 {
7030                   /* Does not match either direction.  */
7031                   specific_error = progress (i.error);
7032                   continue;
7033                 }
7034               /* found_reverse_match holds which variant of D
7035                  we've found.  */
7036               if (!t->opcode_modifier.d)
7037                 found_reverse_match = 0;
7038               else if (operand_types[0].bitfield.tbyte)
7039                 {
7040                   if (t->opcode_modifier.operandconstraint != UGH)
7041                     found_reverse_match = Opcode_FloatD;
7042                   else
7043                     found_reverse_match = ~0;
7044                   /* FSUB{,R} and FDIV{,R} may need a 2nd bit flipped.  */
7045                   if ((t->extension_opcode & 4)
7046                       && (intel_syntax || intel_mnemonic))
7047                     found_reverse_match |= Opcode_FloatR;
7048                 }
7049               else if (t->cpu_flags.bitfield.cpufma4
7050                        || t->cpu_flags.bitfield.cpuxop)
7051                 {
7052                   found_reverse_match = Opcode_VexW;
7053                   goto check_operands_345;
7054                 }
7055               else if (t->opcode_space != SPACE_BASE
7056                        && (t->opcode_space != SPACE_0F
7057                            /* MOV to/from CR/DR/TR, as an exception, follow
7058                               the base opcode space encoding model.  */
7059                            || (t->base_opcode | 7) != 0x27))
7060                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
7061                                       ? Opcode_ExtD : Opcode_SIMD_IntD;
7062               else if (!t->opcode_modifier.commutative)
7063                 found_reverse_match = Opcode_D;
7064               else
7065                 found_reverse_match = ~0;
7066             }
7067           else
7068             {
7069               /* Found a forward 2 operand match here.  */
7070             check_operands_345:
7071               switch (t->operands)
7072                 {
7073                 case 5:
7074                   overlap4 = operand_type_and (i.types[4], operand_types[4]);
7075                   if (!operand_type_match (overlap4, i.types[4])
7076                       || !operand_type_register_match (i.types[3],
7077                                                        operand_types[3],
7078                                                        i.types[4],
7079                                                        operand_types[4]))
7080                     {
7081                       specific_error = progress (i.error);
7082                       continue;
7083                     }
7084                   /* Fall through.  */
7085                 case 4:
7086                   overlap3 = operand_type_and (i.types[3], operand_types[3]);
7087                   if (!operand_type_match (overlap3, i.types[3])
7088                       || ((check_register & 0xa) == 0xa
7089                           && !operand_type_register_match (i.types[1],
7090                                                             operand_types[1],
7091                                                             i.types[3],
7092                                                             operand_types[3]))
7093                       || ((check_register & 0xc) == 0xc
7094                           && !operand_type_register_match (i.types[2],
7095                                                             operand_types[2],
7096                                                             i.types[3],
7097                                                             operand_types[3])))
7098                     {
7099                       specific_error = progress (i.error);
7100                       continue;
7101                     }
7102                   /* Fall through.  */
7103                 case 3:
7104                   overlap2 = operand_type_and (i.types[2], operand_types[2]);
7105                   if (!operand_type_match (overlap2, i.types[2])
7106                       || ((check_register & 5) == 5
7107                           && !operand_type_register_match (i.types[0],
7108                                                             operand_types[0],
7109                                                             i.types[2],
7110                                                             operand_types[2]))
7111                       || ((check_register & 6) == 6
7112                           && !operand_type_register_match (i.types[1],
7113                                                             operand_types[1],
7114                                                             i.types[2],
7115                                                             operand_types[2])))
7116                     {
7117                       specific_error = progress (i.error);
7118                       continue;
7119                     }
7120                   break;
7121                 }
7122             }
7123           /* Found either forward/reverse 2, 3 or 4 operand match here:
7124              slip through to break.  */
7125         }
7126
7127       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
7128       if (VEX_check_encoding (t))
7129         {
7130           specific_error = progress (i.error);
7131           continue;
7132         }
7133
7134       /* Check if vector operands are valid.  */
7135       if (check_VecOperands (t))
7136         {
7137           specific_error = progress (i.error);
7138           continue;
7139         }
7140
7141       /* We've found a match; break out of loop.  */
7142       break;
7143     }
7144
7145 #undef progress
7146
7147   if (t == current_templates->end)
7148     {
7149       /* We found no match.  */
7150       i.error = specific_error;
7151       return NULL;
7152     }
7153
7154   if (!quiet_warnings)
7155     {
7156       if (!intel_syntax
7157           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
7158         as_warn (_("indirect %s without `*'"), insn_name (t));
7159
7160       if (t->opcode_modifier.isprefix
7161           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
7162         {
7163           /* Warn them that a data or address size prefix doesn't
7164              affect assembly of the next line of code.  */
7165           as_warn (_("stand-alone `%s' prefix"), insn_name (t));
7166         }
7167     }
7168
7169   /* Copy the template we found.  */
7170   install_template (t);
7171
7172   if (addr_prefix_disp != -1)
7173     i.tm.operand_types[addr_prefix_disp]
7174       = operand_types[addr_prefix_disp];
7175
7176   switch (found_reverse_match)
7177     {
7178     case 0:
7179       break;
7180
7181     case Opcode_FloatR:
7182     case Opcode_FloatR | Opcode_FloatD:
7183       i.tm.extension_opcode ^= Opcode_FloatR >> 3;
7184       found_reverse_match &= Opcode_FloatD;
7185
7186       /* Fall through.  */
7187     default:
7188       /* If we found a reverse match we must alter the opcode direction
7189          bit and clear/flip the regmem modifier one.  found_reverse_match
7190          holds bits to change (different for int & float insns).  */
7191
7192       i.tm.base_opcode ^= found_reverse_match;
7193
7194       /* Certain SIMD insns have their load forms specified in the opcode
7195          table, and hence we need to _set_ RegMem instead of clearing it.
7196          We need to avoid setting the bit though on insns like KMOVW.  */
7197       i.tm.opcode_modifier.regmem
7198         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
7199           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
7200           && !i.tm.opcode_modifier.regmem;
7201
7202       /* Fall through.  */
7203     case ~0:
7204       i.tm.operand_types[0] = operand_types[i.operands - 1];
7205       i.tm.operand_types[i.operands - 1] = operand_types[0];
7206       break;
7207
7208     case Opcode_VexW:
7209       /* Only the first two register operands need reversing, alongside
7210          flipping VEX.W.  */
7211       i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;
7212
7213       j = i.tm.operand_types[0].bitfield.imm8;
7214       i.tm.operand_types[j] = operand_types[j + 1];
7215       i.tm.operand_types[j + 1] = operand_types[j];
7216       break;
7217     }
7218
7219   return t;
7220 }
7221
7222 static int
7223 check_string (void)
7224 {
7225   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
7226   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
7227
7228   if (i.seg[op] != NULL && i.seg[op] != reg_es)
7229     {
7230       as_bad (_("`%s' operand %u must use `%ses' segment"),
7231               insn_name (&i.tm),
7232               intel_syntax ? i.tm.operands - es_op : es_op + 1,
7233               register_prefix);
7234       return 0;
7235     }
7236
7237   /* There's only ever one segment override allowed per instruction.
7238      This instruction possibly has a legal segment override on the
7239      second operand, so copy the segment to where non-string
7240      instructions store it, allowing common code.  */
7241   i.seg[op] = i.seg[1];
7242
7243   return 1;
7244 }
7245
7246 static int
7247 process_suffix (void)
7248 {
7249   bool is_movx = false;
7250
7251   /* If matched instruction specifies an explicit instruction mnemonic
7252      suffix, use it.  */
7253   if (i.tm.opcode_modifier.size == SIZE16)
7254     i.suffix = WORD_MNEM_SUFFIX;
7255   else if (i.tm.opcode_modifier.size == SIZE32)
7256     i.suffix = LONG_MNEM_SUFFIX;
7257   else if (i.tm.opcode_modifier.size == SIZE64)
7258     i.suffix = QWORD_MNEM_SUFFIX;
7259   else if (i.reg_operands
7260            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
7261            && i.tm.opcode_modifier.operandconstraint != ADDR_PREFIX_OP_REG)
7262     {
7263       unsigned int numop = i.operands;
7264
7265       /* MOVSX/MOVZX */
7266       is_movx = (i.tm.opcode_space == SPACE_0F
7267                  && (i.tm.base_opcode | 8) == 0xbe)
7268                 || (i.tm.opcode_space == SPACE_BASE
7269                     && i.tm.base_opcode == 0x63
7270                     && i.tm.cpu_flags.bitfield.cpu64);
7271
7272       /* movsx/movzx want only their source operand considered here, for the
7273          ambiguity checking below.  The suffix will be replaced afterwards
7274          to represent the destination (register).  */
7275       if (is_movx && (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63))
7276         --i.operands;
7277
7278       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
7279       if (i.tm.mnem_off == MN_crc32 && i.tm.operand_types[1].bitfield.qword)
7280         i.rex |= REX_W;
7281
7282       /* If there's no instruction mnemonic suffix we try to invent one
7283          based on GPR operands.  */
7284       if (!i.suffix)
7285         {
7286           /* We take i.suffix from the last register operand specified,
7287              Destination register type is more significant than source
7288              register type.  crc32 in SSE4.2 prefers source register
7289              type. */
7290           unsigned int op = i.tm.mnem_off == MN_crc32 ? 1 : i.operands;
7291
7292           while (op--)
7293             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
7294                 || i.tm.operand_types[op].bitfield.instance == Accum)
7295               {
7296                 if (i.types[op].bitfield.class != Reg)
7297                   continue;
7298                 if (i.types[op].bitfield.byte)
7299                   i.suffix = BYTE_MNEM_SUFFIX;
7300                 else if (i.types[op].bitfield.word)
7301                   i.suffix = WORD_MNEM_SUFFIX;
7302                 else if (i.types[op].bitfield.dword)
7303                   i.suffix = LONG_MNEM_SUFFIX;
7304                 else if (i.types[op].bitfield.qword)
7305                   i.suffix = QWORD_MNEM_SUFFIX;
7306                 else
7307                   continue;
7308                 break;
7309               }
7310
7311           /* As an exception, movsx/movzx silently default to a byte source
7312              in AT&T mode.  */
7313           if (is_movx && i.tm.opcode_modifier.w && !i.suffix && !intel_syntax)
7314             i.suffix = BYTE_MNEM_SUFFIX;
7315         }
7316       else if (i.suffix == BYTE_MNEM_SUFFIX)
7317         {
7318           if (!check_byte_reg ())
7319             return 0;
7320         }
7321       else if (i.suffix == LONG_MNEM_SUFFIX)
7322         {
7323           if (!check_long_reg ())
7324             return 0;
7325         }
7326       else if (i.suffix == QWORD_MNEM_SUFFIX)
7327         {
7328           if (!check_qword_reg ())
7329             return 0;
7330         }
7331       else if (i.suffix == WORD_MNEM_SUFFIX)
7332         {
7333           if (!check_word_reg ())
7334             return 0;
7335         }
7336       else if (intel_syntax
7337                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
7338         /* Do nothing if the instruction is going to ignore the prefix.  */
7339         ;
7340       else
7341         abort ();
7342
7343       /* Undo the movsx/movzx change done above.  */
7344       i.operands = numop;
7345     }
7346   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
7347            && !i.suffix)
7348     {
7349       i.suffix = stackop_size;
7350       if (stackop_size == LONG_MNEM_SUFFIX)
7351         {
7352           /* stackop_size is set to LONG_MNEM_SUFFIX for the
7353              .code16gcc directive to support 16-bit mode with
7354              32-bit address.  For IRET without a suffix, generate
7355              16-bit IRET (opcode 0xcf) to return from an interrupt
7356              handler.  */
7357           if (i.tm.base_opcode == 0xcf)
7358             {
7359               i.suffix = WORD_MNEM_SUFFIX;
7360               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
7361             }
7362           /* Warn about changed behavior for segment register push/pop.  */
7363           else if ((i.tm.base_opcode | 1) == 0x07)
7364             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
7365                      insn_name (&i.tm));
7366         }
7367     }
7368   else if (!i.suffix
7369            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
7370                || i.tm.opcode_modifier.jump == JUMP_BYTE
7371                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
7372                || (i.tm.opcode_space == SPACE_0F
7373                    && i.tm.base_opcode == 0x01 /* [ls][gi]dt */
7374                    && i.tm.extension_opcode <= 3)))
7375     {
7376       switch (flag_code)
7377         {
7378         case CODE_64BIT:
7379           if (!i.tm.opcode_modifier.no_qsuf)
7380             {
7381               if (i.tm.opcode_modifier.jump == JUMP_BYTE
7382                   || i.tm.opcode_modifier.no_lsuf)
7383                 i.suffix = QWORD_MNEM_SUFFIX;
7384               break;
7385             }
7386           /* Fall through.  */
7387         case CODE_32BIT:
7388           if (!i.tm.opcode_modifier.no_lsuf)
7389             i.suffix = LONG_MNEM_SUFFIX;
7390           break;
7391         case CODE_16BIT:
7392           if (!i.tm.opcode_modifier.no_wsuf)
7393             i.suffix = WORD_MNEM_SUFFIX;
7394           break;
7395         }
7396     }
7397
7398   if (!i.suffix
7399       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7400           /* Also cover lret/retf/iret in 64-bit mode.  */
7401           || (flag_code == CODE_64BIT
7402               && !i.tm.opcode_modifier.no_lsuf
7403               && !i.tm.opcode_modifier.no_qsuf))
7404       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7405       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
7406       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
7407       /* Accept FLDENV et al without suffix.  */
7408       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
7409     {
7410       unsigned int suffixes, evex = 0;
7411
7412       suffixes = !i.tm.opcode_modifier.no_bsuf;
7413       if (!i.tm.opcode_modifier.no_wsuf)
7414         suffixes |= 1 << 1;
7415       if (!i.tm.opcode_modifier.no_lsuf)
7416         suffixes |= 1 << 2;
7417       if (!i.tm.opcode_modifier.no_ssuf)
7418         suffixes |= 1 << 4;
7419       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
7420         suffixes |= 1 << 5;
7421
7422       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
7423          also suitable for AT&T syntax mode, it was requested that this be
7424          restricted to just Intel syntax.  */
7425       if (intel_syntax && is_any_vex_encoding (&i.tm)
7426           && !i.broadcast.type && !i.broadcast.bytes)
7427         {
7428           unsigned int op;
7429
7430           for (op = 0; op < i.tm.operands; ++op)
7431             {
7432               if (is_evex_encoding (&i.tm)
7433                   && !cpu_arch_flags.bitfield.cpuavx512vl)
7434                 {
7435                   if (i.tm.operand_types[op].bitfield.ymmword)
7436                     i.tm.operand_types[op].bitfield.xmmword = 0;
7437                   if (i.tm.operand_types[op].bitfield.zmmword)
7438                     i.tm.operand_types[op].bitfield.ymmword = 0;
7439                   if (!i.tm.opcode_modifier.evex
7440                       || i.tm.opcode_modifier.evex == EVEXDYN)
7441                     i.tm.opcode_modifier.evex = EVEX512;
7442                 }
7443
7444               if (i.tm.operand_types[op].bitfield.xmmword
7445                   + i.tm.operand_types[op].bitfield.ymmword
7446                   + i.tm.operand_types[op].bitfield.zmmword < 2)
7447                 continue;
7448
7449               /* Any properly sized operand disambiguates the insn.  */
7450               if (i.types[op].bitfield.xmmword
7451                   || i.types[op].bitfield.ymmword
7452                   || i.types[op].bitfield.zmmword)
7453                 {
7454                   suffixes &= ~(7 << 6);
7455                   evex = 0;
7456                   break;
7457                 }
7458
7459               if ((i.flags[op] & Operand_Mem)
7460                   && i.tm.operand_types[op].bitfield.unspecified)
7461                 {
7462                   if (i.tm.operand_types[op].bitfield.xmmword)
7463                     suffixes |= 1 << 6;
7464                   if (i.tm.operand_types[op].bitfield.ymmword)
7465                     suffixes |= 1 << 7;
7466                   if (i.tm.operand_types[op].bitfield.zmmword)
7467                     suffixes |= 1 << 8;
7468                   if (is_evex_encoding (&i.tm))
7469                     evex = EVEX512;
7470                 }
7471             }
7472         }
7473
7474       /* Are multiple suffixes / operand sizes allowed?  */
7475       if (suffixes & (suffixes - 1))
7476         {
7477           if (intel_syntax
7478               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7479                   || operand_check == check_error))
7480             {
7481               as_bad (_("ambiguous operand size for `%s'"), insn_name (&i.tm));
7482               return 0;
7483             }
7484           if (operand_check == check_error)
7485             {
7486               as_bad (_("no instruction mnemonic suffix given and "
7487                         "no register operands; can't size `%s'"), insn_name (&i.tm));
7488               return 0;
7489             }
7490           if (operand_check == check_warning)
7491             as_warn (_("%s; using default for `%s'"),
7492                        intel_syntax
7493                        ? _("ambiguous operand size")
7494                        : _("no instruction mnemonic suffix given and "
7495                            "no register operands"),
7496                        insn_name (&i.tm));
7497
7498           if (i.tm.opcode_modifier.floatmf)
7499             i.suffix = SHORT_MNEM_SUFFIX;
7500           else if (is_movx)
7501             /* handled below */;
7502           else if (evex)
7503             i.tm.opcode_modifier.evex = evex;
7504           else if (flag_code == CODE_16BIT)
7505             i.suffix = WORD_MNEM_SUFFIX;
7506           else if (!i.tm.opcode_modifier.no_lsuf)
7507             i.suffix = LONG_MNEM_SUFFIX;
7508           else
7509             i.suffix = QWORD_MNEM_SUFFIX;
7510         }
7511     }
7512
7513   if (is_movx)
7514     {
7515       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
7516          In AT&T syntax, if there is no suffix (warned about above), the default
7517          will be byte extension.  */
7518       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
7519         i.tm.base_opcode |= 1;
7520
7521       /* For further processing, the suffix should represent the destination
7522          (register).  This is already the case when one was used with
7523          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
7524          no suffix to begin with.  */
7525       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
7526         {
7527           if (i.types[1].bitfield.word)
7528             i.suffix = WORD_MNEM_SUFFIX;
7529           else if (i.types[1].bitfield.qword)
7530             i.suffix = QWORD_MNEM_SUFFIX;
7531           else
7532             i.suffix = LONG_MNEM_SUFFIX;
7533
7534           i.tm.opcode_modifier.w = 0;
7535         }
7536     }
7537
7538   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
7539     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
7540                    != (i.tm.operand_types[1].bitfield.class == Reg);
7541
7542   /* Change the opcode based on the operand size given by i.suffix.  */
7543   switch (i.suffix)
7544     {
7545     /* Size floating point instruction.  */
7546     case LONG_MNEM_SUFFIX:
7547       if (i.tm.opcode_modifier.floatmf)
7548         {
7549           i.tm.base_opcode ^= 4;
7550           break;
7551         }
7552     /* fall through */
7553     case WORD_MNEM_SUFFIX:
7554     case QWORD_MNEM_SUFFIX:
7555       /* It's not a byte, select word/dword operation.  */
7556       if (i.tm.opcode_modifier.w)
7557         {
7558           if (i.short_form)
7559             i.tm.base_opcode |= 8;
7560           else
7561             i.tm.base_opcode |= 1;
7562         }
7563     /* fall through */
7564     case SHORT_MNEM_SUFFIX:
7565       /* Now select between word & dword operations via the operand
7566          size prefix, except for instructions that will ignore this
7567          prefix anyway.  */
7568       if (i.suffix != QWORD_MNEM_SUFFIX
7569           && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7570           && !i.tm.opcode_modifier.floatmf
7571           && !is_any_vex_encoding (&i.tm)
7572           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
7573               || (flag_code == CODE_64BIT
7574                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
7575         {
7576           unsigned int prefix = DATA_PREFIX_OPCODE;
7577
7578           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
7579             prefix = ADDR_PREFIX_OPCODE;
7580
7581           if (!add_prefix (prefix))
7582             return 0;
7583         }
7584
7585       /* Set mode64 for an operand.  */
7586       if (i.suffix == QWORD_MNEM_SUFFIX
7587           && flag_code == CODE_64BIT
7588           && !i.tm.opcode_modifier.norex64
7589           && !i.tm.opcode_modifier.vexw
7590           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
7591              need rex64. */
7592           && ! (i.operands == 2
7593                 && i.tm.base_opcode == 0x90
7594                 && i.tm.opcode_space == SPACE_BASE
7595                 && i.types[0].bitfield.instance == Accum
7596                 && i.types[0].bitfield.qword
7597                 && i.types[1].bitfield.instance == Accum))
7598         i.rex |= REX_W;
7599
7600       break;
7601
7602     case 0:
7603       /* Select word/dword/qword operation with explicit data sizing prefix
7604          when there are no suitable register operands.  */
7605       if (i.tm.opcode_modifier.w
7606           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
7607           && (!i.reg_operands
7608               || (i.reg_operands == 1
7609                       /* ShiftCount */
7610                   && (i.tm.operand_types[0].bitfield.instance == RegC
7611                       /* InOutPortReg */
7612                       || i.tm.operand_types[0].bitfield.instance == RegD
7613                       || i.tm.operand_types[1].bitfield.instance == RegD
7614                       || i.tm.mnem_off == MN_crc32))))
7615         i.tm.base_opcode |= 1;
7616       break;
7617     }
7618
7619   if (i.tm.opcode_modifier.operandconstraint == ADDR_PREFIX_OP_REG)
7620     {
7621       gas_assert (!i.suffix);
7622       gas_assert (i.reg_operands);
7623
7624       if (i.tm.operand_types[0].bitfield.instance == Accum
7625           || i.operands == 1)
7626         {
7627           /* The address size override prefix changes the size of the
7628              first operand.  */
7629           if (flag_code == CODE_64BIT
7630               && i.op[0].regs->reg_type.bitfield.word)
7631             {
7632               as_bad (_("16-bit addressing unavailable for `%s'"),
7633                       insn_name (&i.tm));
7634               return 0;
7635             }
7636
7637           if ((flag_code == CODE_32BIT
7638                ? i.op[0].regs->reg_type.bitfield.word
7639                : i.op[0].regs->reg_type.bitfield.dword)
7640               && !add_prefix (ADDR_PREFIX_OPCODE))
7641             return 0;
7642         }
7643       else
7644         {
7645           /* Check invalid register operand when the address size override
7646              prefix changes the size of register operands.  */
7647           unsigned int op;
7648           enum { need_word, need_dword, need_qword } need;
7649
7650           /* Check the register operand for the address size prefix if
7651              the memory operand has no real registers, like symbol, DISP
7652              or bogus (x32-only) symbol(%rip) when symbol(%eip) is meant.  */
7653           if (i.mem_operands == 1
7654               && i.reg_operands == 1
7655               && i.operands == 2
7656               && i.types[1].bitfield.class == Reg
7657               && (flag_code == CODE_32BIT
7658                   ? i.op[1].regs->reg_type.bitfield.word
7659                   : i.op[1].regs->reg_type.bitfield.dword)
7660               && ((i.base_reg == NULL && i.index_reg == NULL)
7661 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
7662                   || (x86_elf_abi == X86_64_X32_ABI
7663                       && i.base_reg
7664                       && i.base_reg->reg_num == RegIP
7665                       && i.base_reg->reg_type.bitfield.qword))
7666 #else
7667                   || 0)
7668 #endif
7669               && !add_prefix (ADDR_PREFIX_OPCODE))
7670             return 0;
7671
7672           if (flag_code == CODE_32BIT)
7673             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
7674           else if (i.prefix[ADDR_PREFIX])
7675             need = need_dword;
7676           else
7677             need = flag_code == CODE_64BIT ? need_qword : need_word;
7678
7679           for (op = 0; op < i.operands; op++)
7680             {
7681               if (i.types[op].bitfield.class != Reg)
7682                 continue;
7683
7684               switch (need)
7685                 {
7686                 case need_word:
7687                   if (i.op[op].regs->reg_type.bitfield.word)
7688                     continue;
7689                   break;
7690                 case need_dword:
7691                   if (i.op[op].regs->reg_type.bitfield.dword)
7692                     continue;
7693                   break;
7694                 case need_qword:
7695                   if (i.op[op].regs->reg_type.bitfield.qword)
7696                     continue;
7697                   break;
7698                 }
7699
7700               as_bad (_("invalid register operand size for `%s'"),
7701                       insn_name (&i.tm));
7702               return 0;
7703             }
7704         }
7705     }
7706
7707   return 1;
7708 }
7709
7710 static int
7711 check_byte_reg (void)
7712 {
7713   int op;
7714
7715   for (op = i.operands; --op >= 0;)
7716     {
7717       /* Skip non-register operands. */
7718       if (i.types[op].bitfield.class != Reg)
7719         continue;
7720
7721       /* If this is an eight bit register, it's OK.  If it's the 16 or
7722          32 bit version of an eight bit register, we will just use the
7723          low portion, and that's OK too.  */
7724       if (i.types[op].bitfield.byte)
7725         continue;
7726
7727       /* I/O port address operands are OK too.  */
7728       if (i.tm.operand_types[op].bitfield.instance == RegD
7729           && i.tm.operand_types[op].bitfield.word)
7730         continue;
7731
7732       /* crc32 only wants its source operand checked here.  */
7733       if (i.tm.mnem_off == MN_crc32 && op != 0)
7734         continue;
7735
7736       /* Any other register is bad.  */
7737       as_bad (_("`%s%s' not allowed with `%s%c'"),
7738               register_prefix, i.op[op].regs->reg_name,
7739               insn_name (&i.tm), i.suffix);
7740       return 0;
7741     }
7742   return 1;
7743 }
7744
7745 static int
7746 check_long_reg (void)
7747 {
7748   int op;
7749
7750   for (op = i.operands; --op >= 0;)
7751     /* Skip non-register operands. */
7752     if (i.types[op].bitfield.class != Reg)
7753       continue;
7754     /* Reject eight bit registers, except where the template requires
7755        them. (eg. movzb)  */
7756     else if (i.types[op].bitfield.byte
7757              && (i.tm.operand_types[op].bitfield.class == Reg
7758                  || i.tm.operand_types[op].bitfield.instance == Accum)
7759              && (i.tm.operand_types[op].bitfield.word
7760                  || i.tm.operand_types[op].bitfield.dword))
7761       {
7762         as_bad (_("`%s%s' not allowed with `%s%c'"),
7763                 register_prefix,
7764                 i.op[op].regs->reg_name,
7765                 insn_name (&i.tm),
7766                 i.suffix);
7767         return 0;
7768       }
7769     /* Error if the e prefix on a general reg is missing.  */
7770     else if (i.types[op].bitfield.word
7771              && (i.tm.operand_types[op].bitfield.class == Reg
7772                  || i.tm.operand_types[op].bitfield.instance == Accum)
7773              && i.tm.operand_types[op].bitfield.dword)
7774       {
7775         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7776                 register_prefix, i.op[op].regs->reg_name,
7777                 i.suffix);
7778         return 0;
7779       }
7780     /* Warn if the r prefix on a general reg is present.  */
7781     else if (i.types[op].bitfield.qword
7782              && (i.tm.operand_types[op].bitfield.class == Reg
7783                  || i.tm.operand_types[op].bitfield.instance == Accum)
7784              && i.tm.operand_types[op].bitfield.dword)
7785       {
7786         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7787                 register_prefix, i.op[op].regs->reg_name, i.suffix);
7788         return 0;
7789       }
7790   return 1;
7791 }
7792
7793 static int
7794 check_qword_reg (void)
7795 {
7796   int op;
7797
7798   for (op = i.operands; --op >= 0; )
7799     /* Skip non-register operands. */
7800     if (i.types[op].bitfield.class != Reg)
7801       continue;
7802     /* Reject eight bit registers, except where the template requires
7803        them. (eg. movzb)  */
7804     else if (i.types[op].bitfield.byte
7805              && (i.tm.operand_types[op].bitfield.class == Reg
7806                  || i.tm.operand_types[op].bitfield.instance == Accum)
7807              && (i.tm.operand_types[op].bitfield.word
7808                  || i.tm.operand_types[op].bitfield.dword))
7809       {
7810         as_bad (_("`%s%s' not allowed with `%s%c'"),
7811                 register_prefix,
7812                 i.op[op].regs->reg_name,
7813                 insn_name (&i.tm),
7814                 i.suffix);
7815         return 0;
7816       }
7817     /* Warn if the r prefix on a general reg is missing.  */
7818     else if ((i.types[op].bitfield.word
7819               || i.types[op].bitfield.dword)
7820              && (i.tm.operand_types[op].bitfield.class == Reg
7821                  || i.tm.operand_types[op].bitfield.instance == Accum)
7822              && i.tm.operand_types[op].bitfield.qword)
7823       {
7824         /* Prohibit these changes in the 64bit mode, since the
7825            lowering is more complicated.  */
7826         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7827                 register_prefix, i.op[op].regs->reg_name, i.suffix);
7828         return 0;
7829       }
7830   return 1;
7831 }
7832
7833 static int
7834 check_word_reg (void)
7835 {
7836   int op;
7837   for (op = i.operands; --op >= 0;)
7838     /* Skip non-register operands. */
7839     if (i.types[op].bitfield.class != Reg)
7840       continue;
7841     /* Reject eight bit registers, except where the template requires
7842        them. (eg. movzb)  */
7843     else if (i.types[op].bitfield.byte
7844              && (i.tm.operand_types[op].bitfield.class == Reg
7845                  || i.tm.operand_types[op].bitfield.instance == Accum)
7846              && (i.tm.operand_types[op].bitfield.word
7847                  || i.tm.operand_types[op].bitfield.dword))
7848       {
7849         as_bad (_("`%s%s' not allowed with `%s%c'"),
7850                 register_prefix,
7851                 i.op[op].regs->reg_name,
7852                 insn_name (&i.tm),
7853                 i.suffix);
7854         return 0;
7855       }
7856     /* Error if the e or r prefix on a general reg is present.  */
7857     else if ((i.types[op].bitfield.dword
7858                  || i.types[op].bitfield.qword)
7859              && (i.tm.operand_types[op].bitfield.class == Reg
7860                  || i.tm.operand_types[op].bitfield.instance == Accum)
7861              && i.tm.operand_types[op].bitfield.word)
7862       {
7863         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7864                 register_prefix, i.op[op].regs->reg_name,
7865                 i.suffix);
7866         return 0;
7867       }
7868   return 1;
7869 }
7870
7871 static int
7872 update_imm (unsigned int j)
7873 {
7874   i386_operand_type overlap = i.types[j];
7875
7876   if (i.tm.operand_types[j].bitfield.imm8
7877       && i.tm.operand_types[j].bitfield.imm8s
7878       && overlap.bitfield.imm8 && overlap.bitfield.imm8s)
7879     {
7880       /* This combination is used on 8-bit immediates where e.g. $~0 is
7881          desirable to permit.  We're past operand type matching, so simply
7882          put things back in the shape they were before introducing the
7883          distinction between Imm8, Imm8S, and Imm8|Imm8S.  */
7884       overlap.bitfield.imm8s = 0;
7885     }
7886
7887   if (overlap.bitfield.imm8
7888       + overlap.bitfield.imm8s
7889       + overlap.bitfield.imm16
7890       + overlap.bitfield.imm32
7891       + overlap.bitfield.imm32s
7892       + overlap.bitfield.imm64 > 1)
7893     {
7894       static const i386_operand_type imm16 = { .bitfield = { .imm16 = 1 } };
7895       static const i386_operand_type imm32 = { .bitfield = { .imm32 = 1 } };
7896       static const i386_operand_type imm32s = { .bitfield = { .imm32s = 1 } };
7897       static const i386_operand_type imm16_32 = { .bitfield =
7898         { .imm16 = 1, .imm32 = 1 }
7899       };
7900       static const i386_operand_type imm16_32s =  { .bitfield =
7901         { .imm16 = 1, .imm32s = 1 }
7902       };
7903       static const i386_operand_type imm16_32_32s = { .bitfield =
7904         { .imm16 = 1, .imm32 = 1, .imm32s = 1 }
7905       };
7906
7907       if (i.suffix)
7908         {
7909           i386_operand_type temp;
7910
7911           operand_type_set (&temp, 0);
7912           if (i.suffix == BYTE_MNEM_SUFFIX)
7913             {
7914               temp.bitfield.imm8 = overlap.bitfield.imm8;
7915               temp.bitfield.imm8s = overlap.bitfield.imm8s;
7916             }
7917           else if (i.suffix == WORD_MNEM_SUFFIX)
7918             temp.bitfield.imm16 = overlap.bitfield.imm16;
7919           else if (i.suffix == QWORD_MNEM_SUFFIX)
7920             {
7921               temp.bitfield.imm64 = overlap.bitfield.imm64;
7922               temp.bitfield.imm32s = overlap.bitfield.imm32s;
7923             }
7924           else
7925             temp.bitfield.imm32 = overlap.bitfield.imm32;
7926           overlap = temp;
7927         }
7928       else if (operand_type_equal (&overlap, &imm16_32_32s)
7929                || operand_type_equal (&overlap, &imm16_32)
7930                || operand_type_equal (&overlap, &imm16_32s))
7931         {
7932           if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
7933             overlap = imm16;
7934           else
7935             overlap = imm32s;
7936         }
7937       else if (i.prefix[REX_PREFIX] & REX_W)
7938         overlap = operand_type_and (overlap, imm32s);
7939       else if (i.prefix[DATA_PREFIX])
7940         overlap = operand_type_and (overlap,
7941                                     flag_code != CODE_16BIT ? imm16 : imm32);
7942       if (overlap.bitfield.imm8
7943           + overlap.bitfield.imm8s
7944           + overlap.bitfield.imm16
7945           + overlap.bitfield.imm32
7946           + overlap.bitfield.imm32s
7947           + overlap.bitfield.imm64 != 1)
7948         {
7949           as_bad (_("no instruction mnemonic suffix given; "
7950                     "can't determine immediate size"));
7951           return 0;
7952         }
7953     }
7954   i.types[j] = overlap;
7955
7956   return 1;
7957 }
7958
7959 static int
7960 finalize_imm (void)
7961 {
7962   unsigned int j, n;
7963
7964   /* Update the first 2 immediate operands.  */
7965   n = i.operands > 2 ? 2 : i.operands;
7966   if (n)
7967     {
7968       for (j = 0; j < n; j++)
7969         if (update_imm (j) == 0)
7970           return 0;
7971
7972       /* The 3rd operand can't be immediate operand.  */
7973       gas_assert (operand_type_check (i.types[2], imm) == 0);
7974     }
7975
7976   return 1;
7977 }
7978
7979 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
7980                                  bool do_sse2avx)
7981 {
7982   if (r->reg_flags & RegRex)
7983     {
7984       if (i.rex & rex_bit)
7985         as_bad (_("same type of prefix used twice"));
7986       i.rex |= rex_bit;
7987     }
7988   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
7989     {
7990       gas_assert (i.vex.register_specifier == r);
7991       i.vex.register_specifier += 8;
7992     }
7993
7994   if (r->reg_flags & RegVRex)
7995     i.vrex |= rex_bit;
7996 }
7997
7998 static int
7999 process_operands (void)
8000 {
8001   /* Default segment register this instruction will use for memory
8002      accesses.  0 means unknown.  This is only for optimizing out
8003      unnecessary segment overrides.  */
8004   const reg_entry *default_seg = NULL;
8005
8006   /* We only need to check those implicit registers for instructions
8007      with 3 operands or less.  */
8008   if (i.operands <= 3)
8009     for (unsigned int j = 0; j < i.operands; j++)
8010       if (i.types[j].bitfield.instance != InstanceNone)
8011         i.reg_operands--;
8012
8013   if (i.tm.opcode_modifier.sse2avx)
8014     {
8015       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
8016          need converting.  */
8017       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
8018       i.prefix[REX_PREFIX] = 0;
8019       i.rex_encoding = 0;
8020     }
8021   /* ImmExt should be processed after SSE2AVX.  */
8022   else if (i.tm.opcode_modifier.immext)
8023     process_immext ();
8024
8025   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
8026     {
8027       static const i386_operand_type regxmm = {
8028         .bitfield = { .class = RegSIMD, .xmmword = 1 }
8029       };
8030       unsigned int dupl = i.operands;
8031       unsigned int dest = dupl - 1;
8032       unsigned int j;
8033
8034       /* The destination must be an xmm register.  */
8035       gas_assert (i.reg_operands
8036                   && MAX_OPERANDS > dupl
8037                   && operand_type_equal (&i.types[dest], &regxmm));
8038
8039       if (i.tm.operand_types[0].bitfield.instance == Accum
8040           && i.tm.operand_types[0].bitfield.xmmword)
8041         {
8042           /* Keep xmm0 for instructions with VEX prefix and 3
8043              sources.  */
8044           i.tm.operand_types[0].bitfield.instance = InstanceNone;
8045           i.tm.operand_types[0].bitfield.class = RegSIMD;
8046           i.reg_operands++;
8047           goto duplicate;
8048         }
8049
8050       if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_1ST_XMM0)
8051         {
8052           gas_assert ((MAX_OPERANDS - 1) > dupl);
8053
8054           /* Add the implicit xmm0 for instructions with VEX prefix
8055              and 3 sources.  */
8056           for (j = i.operands; j > 0; j--)
8057             {
8058               i.op[j] = i.op[j - 1];
8059               i.types[j] = i.types[j - 1];
8060               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
8061               i.flags[j] = i.flags[j - 1];
8062             }
8063           i.op[0].regs
8064             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
8065           i.types[0] = regxmm;
8066           i.tm.operand_types[0] = regxmm;
8067
8068           i.operands += 2;
8069           i.reg_operands += 2;
8070           i.tm.operands += 2;
8071
8072           dupl++;
8073           dest++;
8074           i.op[dupl] = i.op[dest];
8075           i.types[dupl] = i.types[dest];
8076           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
8077           i.flags[dupl] = i.flags[dest];
8078         }
8079       else
8080         {
8081         duplicate:
8082           i.operands++;
8083           i.reg_operands++;
8084           i.tm.operands++;
8085
8086           i.op[dupl] = i.op[dest];
8087           i.types[dupl] = i.types[dest];
8088           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
8089           i.flags[dupl] = i.flags[dest];
8090         }
8091
8092        if (i.tm.opcode_modifier.immext)
8093          process_immext ();
8094     }
8095   else if (i.tm.operand_types[0].bitfield.instance == Accum
8096            && i.tm.opcode_modifier.modrm)
8097     {
8098       unsigned int j;
8099
8100       for (j = 1; j < i.operands; j++)
8101         {
8102           i.op[j - 1] = i.op[j];
8103           i.types[j - 1] = i.types[j];
8104
8105           /* We need to adjust fields in i.tm since they are used by
8106              build_modrm_byte.  */
8107           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
8108
8109           i.flags[j - 1] = i.flags[j];
8110         }
8111
8112       /* No adjustment to i.reg_operands: This was already done at the top
8113          of the function.  */
8114       i.operands--;
8115       i.tm.operands--;
8116     }
8117   else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_QUAD_GROUP)
8118     {
8119       unsigned int regnum, first_reg_in_group, last_reg_in_group;
8120
8121       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
8122       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
8123       regnum = register_number (i.op[1].regs);
8124       first_reg_in_group = regnum & ~3;
8125       last_reg_in_group = first_reg_in_group + 3;
8126       if (regnum != first_reg_in_group)
8127         as_warn (_("source register `%s%s' implicitly denotes"
8128                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
8129                  register_prefix, i.op[1].regs->reg_name,
8130                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
8131                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
8132                  insn_name (&i.tm));
8133     }
8134   else if (i.tm.opcode_modifier.operandconstraint == REG_KLUDGE)
8135     {
8136       /* The imul $imm, %reg instruction is converted into
8137          imul $imm, %reg, %reg, and the clr %reg instruction
8138          is converted into xor %reg, %reg.  */
8139
8140       unsigned int first_reg_op;
8141
8142       if (operand_type_check (i.types[0], reg))
8143         first_reg_op = 0;
8144       else
8145         first_reg_op = 1;
8146       /* Pretend we saw the extra register operand.  */
8147       gas_assert (i.reg_operands == 1
8148                   && i.op[first_reg_op + 1].regs == 0);
8149       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
8150       i.types[first_reg_op + 1] = i.types[first_reg_op];
8151       i.operands++;
8152       i.reg_operands++;
8153     }
8154
8155   if (i.tm.opcode_modifier.modrm)
8156     {
8157       /* The opcode is completed (modulo i.tm.extension_opcode which
8158          must be put into the modrm byte).  Now, we make the modrm and
8159          index base bytes based on all the info we've collected.  */
8160
8161       default_seg = build_modrm_byte ();
8162
8163       if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
8164         {
8165           /* Warn about some common errors, but press on regardless.  */
8166           if (i.operands == 2)
8167             {
8168               /* Reversed arguments on faddp or fmulp.  */
8169               as_warn (_("translating to `%s %s%s,%s%s'"), insn_name (&i.tm),
8170                        register_prefix, i.op[!intel_syntax].regs->reg_name,
8171                        register_prefix, i.op[intel_syntax].regs->reg_name);
8172             }
8173           else if (i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
8174             {
8175               /* Extraneous `l' suffix on fp insn.  */
8176               as_warn (_("translating to `%s %s%s'"), insn_name (&i.tm),
8177                        register_prefix, i.op[0].regs->reg_name);
8178             }
8179         }
8180     }
8181   else if (i.types[0].bitfield.class == SReg)
8182     {
8183       if (flag_code != CODE_64BIT
8184           ? i.tm.base_opcode == POP_SEG_SHORT
8185             && i.op[0].regs->reg_num == 1
8186           : (i.tm.base_opcode | 1) == (POP_SEG386_SHORT & 0xff)
8187             && i.op[0].regs->reg_num < 4)
8188         {
8189           as_bad (_("you can't `%s %s%s'"),
8190                   insn_name (&i.tm), register_prefix, i.op[0].regs->reg_name);
8191           return 0;
8192         }
8193       if (i.op[0].regs->reg_num > 3
8194           && i.tm.opcode_space == SPACE_BASE )
8195         {
8196           i.tm.base_opcode ^= (POP_SEG_SHORT ^ POP_SEG386_SHORT) & 0xff;
8197           i.tm.opcode_space = SPACE_0F;
8198         }
8199       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
8200     }
8201   else if (i.tm.opcode_space == SPACE_BASE
8202            && (i.tm.base_opcode & ~3) == MOV_AX_DISP32)
8203     {
8204       default_seg = reg_ds;
8205     }
8206   else if (i.tm.opcode_modifier.isstring)
8207     {
8208       /* For the string instructions that allow a segment override
8209          on one of their operands, the default segment is ds.  */
8210       default_seg = reg_ds;
8211     }
8212   else if (i.short_form)
8213     {
8214       /* The register operand is in operand 0 or 1.  */
8215       const reg_entry *r = i.op[0].regs;
8216
8217       if (i.imm_operands
8218           || (r->reg_type.bitfield.instance == Accum && i.op[1].regs))
8219         r = i.op[1].regs;
8220       /* Register goes in low 3 bits of opcode.  */
8221       i.tm.base_opcode |= r->reg_num;
8222       set_rex_vrex (r, REX_B, false);
8223     }
8224
8225   if ((i.seg[0] || i.prefix[SEG_PREFIX])
8226       && i.tm.mnem_off == MN_lea)
8227     {
8228       if (!quiet_warnings)
8229         as_warn (_("segment override on `%s' is ineffectual"), insn_name (&i.tm));
8230       if (optimize && !i.no_optimize)
8231         {
8232           i.seg[0] = NULL;
8233           i.prefix[SEG_PREFIX] = 0;
8234         }
8235     }
8236
8237   /* If a segment was explicitly specified, and the specified segment
8238      is neither the default nor the one already recorded from a prefix,
8239      use an opcode prefix to select it.  If we never figured out what
8240      the default segment is, then default_seg will be zero at this
8241      point, and the specified segment prefix will always be used.  */
8242   if (i.seg[0]
8243       && i.seg[0] != default_seg
8244       && i386_seg_prefixes[i.seg[0]->reg_num] != i.prefix[SEG_PREFIX])
8245     {
8246       if (!add_prefix (i386_seg_prefixes[i.seg[0]->reg_num]))
8247         return 0;
8248     }
8249   return 1;
8250 }
8251
8252 static const reg_entry *
8253 build_modrm_byte (void)
8254 {
8255   const reg_entry *default_seg = NULL;
8256   unsigned int source, dest;
8257   bool vex_3_sources = (i.reg_operands + i.mem_operands == 4);
8258
8259   if (vex_3_sources)
8260     {
8261       unsigned int nds, reg_slot;
8262       expressionS *exp;
8263
8264       dest = i.operands - 1;
8265       nds = dest - 1;
8266
8267       /* There are 2 kinds of instructions:
8268          1. 5 operands: 4 register operands or 3 register operands
8269          plus 1 memory operand plus one Imm4 operand, VexXDS, and
8270          VexW0 or VexW1.  The destination must be either XMM, YMM or
8271          ZMM register.
8272          2. 4 operands: 4 register operands or 3 register operands
8273          plus 1 memory operand, with VexXDS.  */
8274       gas_assert (i.tm.opcode_modifier.vexvvvv == VEXXDS
8275                   && i.tm.opcode_modifier.vexw
8276                   && i.tm.operand_types[dest].bitfield.class == RegSIMD);
8277
8278       /* If VexW1 is set, the first non-immediate operand is the source and
8279          the second non-immediate one is encoded in the immediate operand.  */
8280       if (i.tm.opcode_modifier.vexw == VEXW1)
8281         {
8282           source = i.imm_operands;
8283           reg_slot = i.imm_operands + 1;
8284         }
8285       else
8286         {
8287           source = i.imm_operands + 1;
8288           reg_slot = i.imm_operands;
8289         }
8290
8291       if (i.imm_operands == 0)
8292         {
8293           /* When there is no immediate operand, generate an 8bit
8294              immediate operand to encode the first operand.  */
8295           exp = &im_expressions[i.imm_operands++];
8296           i.op[i.operands].imms = exp;
8297           i.types[i.operands].bitfield.imm8 = 1;
8298           i.operands++;
8299
8300           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8301           exp->X_op = O_constant;
8302           exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
8303           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8304         }
8305       else
8306         {
8307           gas_assert (i.imm_operands == 1);
8308           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
8309           gas_assert (!i.tm.opcode_modifier.immext);
8310
8311           /* Turn on Imm8 again so that output_imm will generate it.  */
8312           i.types[0].bitfield.imm8 = 1;
8313
8314           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8315           i.op[0].imms->X_add_number
8316               |= register_number (i.op[reg_slot].regs) << 4;
8317           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8318         }
8319
8320       gas_assert (i.tm.operand_types[nds].bitfield.class == RegSIMD);
8321       i.vex.register_specifier = i.op[nds].regs;
8322     }
8323   else
8324     source = dest = 0;
8325
8326   /* i.reg_operands MUST be the number of real register operands;
8327      implicit registers do not count.  If there are 3 register
8328      operands, it must be a instruction with VexNDS.  For a
8329      instruction with VexNDD, the destination register is encoded
8330      in VEX prefix.  If there are 4 register operands, it must be
8331      a instruction with VEX prefix and 3 sources.  */
8332   if (i.mem_operands == 0
8333       && ((i.reg_operands == 2
8334            && i.tm.opcode_modifier.vexvvvv <= VEXXDS)
8335           || (i.reg_operands == 3
8336               && i.tm.opcode_modifier.vexvvvv == VEXXDS)
8337           || (i.reg_operands == 4 && vex_3_sources)))
8338     {
8339       switch (i.operands)
8340         {
8341         case 2:
8342           source = 0;
8343           break;
8344         case 3:
8345           /* When there are 3 operands, one of them may be immediate,
8346              which may be the first or the last operand.  Otherwise,
8347              the first operand must be shift count register (cl) or it
8348              is an instruction with VexNDS. */
8349           gas_assert (i.imm_operands == 1
8350                       || (i.imm_operands == 0
8351                           && (i.tm.opcode_modifier.vexvvvv == VEXXDS
8352                               || (i.types[0].bitfield.instance == RegC
8353                                   && i.types[0].bitfield.byte))));
8354           if (operand_type_check (i.types[0], imm)
8355               || (i.types[0].bitfield.instance == RegC
8356                   && i.types[0].bitfield.byte))
8357             source = 1;
8358           else
8359             source = 0;
8360           break;
8361         case 4:
8362           /* When there are 4 operands, the first two must be 8bit
8363              immediate operands. The source operand will be the 3rd
8364              one.
8365
8366              For instructions with VexNDS, if the first operand
8367              an imm8, the source operand is the 2nd one.  If the last
8368              operand is imm8, the source operand is the first one.  */
8369           gas_assert ((i.imm_operands == 2
8370                        && i.types[0].bitfield.imm8
8371                        && i.types[1].bitfield.imm8)
8372                       || (i.tm.opcode_modifier.vexvvvv == VEXXDS
8373                           && i.imm_operands == 1
8374                           && (i.types[0].bitfield.imm8
8375                               || i.types[0].bitfield.imm8s
8376                               || i.types[i.operands - 1].bitfield.imm8)));
8377           if (i.imm_operands == 2)
8378             source = 2;
8379           else
8380             {
8381               if (i.types[0].bitfield.imm8)
8382                 source = 1;
8383               else
8384                 source = 0;
8385             }
8386           break;
8387         case 5:
8388           gas_assert (!is_evex_encoding (&i.tm));
8389           gas_assert (i.imm_operands == 1 && vex_3_sources);
8390           break;
8391         default:
8392           abort ();
8393         }
8394
8395       if (!vex_3_sources)
8396         {
8397           dest = source + 1;
8398
8399           if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8400             {
8401               /* For instructions with VexNDS, the register-only source
8402                  operand must be a 32/64bit integer, XMM, YMM, ZMM, or mask
8403                  register.  It is encoded in VEX prefix.  */
8404
8405               i386_operand_type op;
8406               unsigned int vvvv;
8407
8408               /* Swap two source operands if needed.  */
8409               if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES)
8410                 {
8411                   vvvv = source;
8412                   source = dest;
8413                 }
8414               else
8415                 vvvv = dest;
8416
8417               op = i.tm.operand_types[vvvv];
8418               if ((dest + 1) >= i.operands
8419                   || ((op.bitfield.class != Reg
8420                        || (!op.bitfield.dword && !op.bitfield.qword))
8421                       && op.bitfield.class != RegSIMD
8422                       && op.bitfield.class != RegMask))
8423                 abort ();
8424               i.vex.register_specifier = i.op[vvvv].regs;
8425               dest++;
8426             }
8427         }
8428
8429       i.rm.mode = 3;
8430       /* One of the register operands will be encoded in the i.rm.reg
8431          field, the other in the combined i.rm.mode and i.rm.regmem
8432          fields.  If no form of this instruction supports a memory
8433          destination operand, then we assume the source operand may
8434          sometimes be a memory operand and so we need to store the
8435          destination in the i.rm.reg field.  */
8436       if (!i.tm.opcode_modifier.regmem
8437           && operand_type_check (i.tm.operand_types[dest], anymem) == 0)
8438         {
8439           i.rm.reg = i.op[dest].regs->reg_num;
8440           i.rm.regmem = i.op[source].regs->reg_num;
8441           set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
8442           set_rex_vrex (i.op[source].regs, REX_B, false);
8443         }
8444       else
8445         {
8446           i.rm.reg = i.op[source].regs->reg_num;
8447           i.rm.regmem = i.op[dest].regs->reg_num;
8448           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
8449           set_rex_vrex (i.op[source].regs, REX_R, false);
8450         }
8451       if (flag_code != CODE_64BIT && (i.rex & REX_R))
8452         {
8453           if (i.types[!i.tm.opcode_modifier.regmem].bitfield.class != RegCR)
8454             abort ();
8455           i.rex &= ~REX_R;
8456           add_prefix (LOCK_PREFIX_OPCODE);
8457         }
8458     }
8459   else
8460     {                   /* If it's not 2 reg operands...  */
8461       unsigned int mem;
8462
8463       if (i.mem_operands)
8464         {
8465           unsigned int fake_zero_displacement = 0;
8466           unsigned int op;
8467
8468           for (op = 0; op < i.operands; op++)
8469             if (i.flags[op] & Operand_Mem)
8470               break;
8471           gas_assert (op < i.operands);
8472
8473           if (i.tm.opcode_modifier.sib)
8474             {
8475               /* The index register of VSIB shouldn't be RegIZ.  */
8476               if (i.tm.opcode_modifier.sib != SIBMEM
8477                   && i.index_reg->reg_num == RegIZ)
8478                 abort ();
8479
8480               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8481               if (!i.base_reg)
8482                 {
8483                   i.sib.base = NO_BASE_REGISTER;
8484                   i.sib.scale = i.log2_scale_factor;
8485                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8486                   i.types[op].bitfield.disp32 = 1;
8487                 }
8488
8489               /* Since the mandatory SIB always has index register, so
8490                  the code logic remains unchanged. The non-mandatory SIB
8491                  without index register is allowed and will be handled
8492                  later.  */
8493               if (i.index_reg)
8494                 {
8495                   if (i.index_reg->reg_num == RegIZ)
8496                     i.sib.index = NO_INDEX_REGISTER;
8497                   else
8498                     i.sib.index = i.index_reg->reg_num;
8499                   set_rex_vrex (i.index_reg, REX_X, false);
8500                 }
8501             }
8502
8503           default_seg = reg_ds;
8504
8505           if (i.base_reg == 0)
8506             {
8507               i.rm.mode = 0;
8508               if (!i.disp_operands)
8509                 fake_zero_displacement = 1;
8510               if (i.index_reg == 0)
8511                 {
8512                   /* Both check for VSIB and mandatory non-vector SIB. */
8513                   gas_assert (!i.tm.opcode_modifier.sib
8514                               || i.tm.opcode_modifier.sib == SIBMEM);
8515                   /* Operand is just <disp>  */
8516                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8517                   if (flag_code == CODE_64BIT)
8518                     {
8519                       /* 64bit mode overwrites the 32bit absolute
8520                          addressing by RIP relative addressing and
8521                          absolute addressing is encoded by one of the
8522                          redundant SIB forms.  */
8523                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8524                       i.sib.base = NO_BASE_REGISTER;
8525                       i.sib.index = NO_INDEX_REGISTER;
8526                       i.types[op].bitfield.disp32 = 1;
8527                     }
8528                   else if ((flag_code == CODE_16BIT)
8529                            ^ (i.prefix[ADDR_PREFIX] != 0))
8530                     {
8531                       i.rm.regmem = NO_BASE_REGISTER_16;
8532                       i.types[op].bitfield.disp16 = 1;
8533                     }
8534                   else
8535                     {
8536                       i.rm.regmem = NO_BASE_REGISTER;
8537                       i.types[op].bitfield.disp32 = 1;
8538                     }
8539                 }
8540               else if (!i.tm.opcode_modifier.sib)
8541                 {
8542                   /* !i.base_reg && i.index_reg  */
8543                   if (i.index_reg->reg_num == RegIZ)
8544                     i.sib.index = NO_INDEX_REGISTER;
8545                   else
8546                     i.sib.index = i.index_reg->reg_num;
8547                   i.sib.base = NO_BASE_REGISTER;
8548                   i.sib.scale = i.log2_scale_factor;
8549                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8550                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8551                   i.types[op].bitfield.disp32 = 1;
8552                   if ((i.index_reg->reg_flags & RegRex) != 0)
8553                     i.rex |= REX_X;
8554                 }
8555             }
8556           /* RIP addressing for 64bit mode.  */
8557           else if (i.base_reg->reg_num == RegIP)
8558             {
8559               gas_assert (!i.tm.opcode_modifier.sib);
8560               i.rm.regmem = NO_BASE_REGISTER;
8561               i.types[op].bitfield.disp8 = 0;
8562               i.types[op].bitfield.disp16 = 0;
8563               i.types[op].bitfield.disp32 = 1;
8564               i.types[op].bitfield.disp64 = 0;
8565               i.flags[op] |= Operand_PCrel;
8566               if (! i.disp_operands)
8567                 fake_zero_displacement = 1;
8568             }
8569           else if (i.base_reg->reg_type.bitfield.word)
8570             {
8571               gas_assert (!i.tm.opcode_modifier.sib);
8572               switch (i.base_reg->reg_num)
8573                 {
8574                 case 3: /* (%bx)  */
8575                   if (i.index_reg == 0)
8576                     i.rm.regmem = 7;
8577                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
8578                     i.rm.regmem = i.index_reg->reg_num - 6;
8579                   break;
8580                 case 5: /* (%bp)  */
8581                   default_seg = reg_ss;
8582                   if (i.index_reg == 0)
8583                     {
8584                       i.rm.regmem = 6;
8585                       if (operand_type_check (i.types[op], disp) == 0)
8586                         {
8587                           /* fake (%bp) into 0(%bp)  */
8588                           if (i.disp_encoding == disp_encoding_16bit)
8589                             i.types[op].bitfield.disp16 = 1;
8590                           else
8591                             i.types[op].bitfield.disp8 = 1;
8592                           fake_zero_displacement = 1;
8593                         }
8594                     }
8595                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
8596                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
8597                   break;
8598                 default: /* (%si) -> 4 or (%di) -> 5  */
8599                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
8600                 }
8601               if (!fake_zero_displacement
8602                   && !i.disp_operands
8603                   && i.disp_encoding)
8604                 {
8605                   fake_zero_displacement = 1;
8606                   if (i.disp_encoding == disp_encoding_8bit)
8607                     i.types[op].bitfield.disp8 = 1;
8608                   else
8609                     i.types[op].bitfield.disp16 = 1;
8610                 }
8611               i.rm.mode = mode_from_disp_size (i.types[op]);
8612             }
8613           else /* i.base_reg and 32/64 bit mode  */
8614             {
8615               if (operand_type_check (i.types[op], disp))
8616                 {
8617                   i.types[op].bitfield.disp16 = 0;
8618                   i.types[op].bitfield.disp64 = 0;
8619                   i.types[op].bitfield.disp32 = 1;
8620                 }
8621
8622               if (!i.tm.opcode_modifier.sib)
8623                 i.rm.regmem = i.base_reg->reg_num;
8624               if ((i.base_reg->reg_flags & RegRex) != 0)
8625                 i.rex |= REX_B;
8626               i.sib.base = i.base_reg->reg_num;
8627               /* x86-64 ignores REX prefix bit here to avoid decoder
8628                  complications.  */
8629               if (!(i.base_reg->reg_flags & RegRex)
8630                   && (i.base_reg->reg_num == EBP_REG_NUM
8631                    || i.base_reg->reg_num == ESP_REG_NUM))
8632                   default_seg = reg_ss;
8633               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
8634                 {
8635                   fake_zero_displacement = 1;
8636                   if (i.disp_encoding == disp_encoding_32bit)
8637                     i.types[op].bitfield.disp32 = 1;
8638                   else
8639                     i.types[op].bitfield.disp8 = 1;
8640                 }
8641               i.sib.scale = i.log2_scale_factor;
8642               if (i.index_reg == 0)
8643                 {
8644                   /* Only check for VSIB. */
8645                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
8646                               && i.tm.opcode_modifier.sib != VECSIB256
8647                               && i.tm.opcode_modifier.sib != VECSIB512);
8648
8649                   /* <disp>(%esp) becomes two byte modrm with no index
8650                      register.  We've already stored the code for esp
8651                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
8652                      Any base register besides %esp will not use the
8653                      extra modrm byte.  */
8654                   i.sib.index = NO_INDEX_REGISTER;
8655                 }
8656               else if (!i.tm.opcode_modifier.sib)
8657                 {
8658                   if (i.index_reg->reg_num == RegIZ)
8659                     i.sib.index = NO_INDEX_REGISTER;
8660                   else
8661                     i.sib.index = i.index_reg->reg_num;
8662                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8663                   if ((i.index_reg->reg_flags & RegRex) != 0)
8664                     i.rex |= REX_X;
8665                 }
8666
8667               if (i.disp_operands
8668                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
8669                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
8670                 i.rm.mode = 0;
8671               else
8672                 {
8673                   if (!fake_zero_displacement
8674                       && !i.disp_operands
8675                       && i.disp_encoding)
8676                     {
8677                       fake_zero_displacement = 1;
8678                       if (i.disp_encoding == disp_encoding_8bit)
8679                         i.types[op].bitfield.disp8 = 1;
8680                       else
8681                         i.types[op].bitfield.disp32 = 1;
8682                     }
8683                   i.rm.mode = mode_from_disp_size (i.types[op]);
8684                 }
8685             }
8686
8687           if (fake_zero_displacement)
8688             {
8689               /* Fakes a zero displacement assuming that i.types[op]
8690                  holds the correct displacement size.  */
8691               expressionS *exp;
8692
8693               gas_assert (i.op[op].disps == 0);
8694               exp = &disp_expressions[i.disp_operands++];
8695               i.op[op].disps = exp;
8696               exp->X_op = O_constant;
8697               exp->X_add_number = 0;
8698               exp->X_add_symbol = (symbolS *) 0;
8699               exp->X_op_symbol = (symbolS *) 0;
8700             }
8701
8702           mem = op;
8703         }
8704       else
8705         mem = ~0;
8706
8707       if (i.tm.opcode_modifier.vexvvvv == VEXLWP)
8708         {
8709           i.vex.register_specifier = i.op[2].regs;
8710           if (!i.mem_operands)
8711             {
8712               i.rm.mode = 3;
8713               i.rm.regmem = i.op[1].regs->reg_num;
8714               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8715                 i.rex |= REX_B;
8716             }
8717         }
8718       /* Fill in i.rm.reg or i.rm.regmem field with register operand
8719          (if any) based on i.tm.extension_opcode.  Again, we must be
8720          careful to make sure that segment/control/debug/test/MMX
8721          registers are coded into the i.rm.reg field.  */
8722       else if (i.reg_operands)
8723         {
8724           unsigned int op;
8725           unsigned int vex_reg = ~0;
8726
8727           for (op = 0; op < i.operands; op++)
8728             if (i.types[op].bitfield.class == Reg
8729                 || i.types[op].bitfield.class == RegBND
8730                 || i.types[op].bitfield.class == RegMask
8731                 || i.types[op].bitfield.class == SReg
8732                 || i.types[op].bitfield.class == RegCR
8733                 || i.types[op].bitfield.class == RegDR
8734                 || i.types[op].bitfield.class == RegTR
8735                 || i.types[op].bitfield.class == RegSIMD
8736                 || i.types[op].bitfield.class == RegMMX)
8737               break;
8738
8739           if (vex_3_sources)
8740             op = dest;
8741           else if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8742             {
8743               /* For instructions with VexNDS, the register-only
8744                  source operand is encoded in VEX prefix. */
8745               gas_assert (mem != (unsigned int) ~0);
8746
8747               if (op > mem || i.tm.cpu_flags.bitfield.cpucmpccxadd)
8748                 {
8749                   vex_reg = op++;
8750                   gas_assert (op < i.operands);
8751                 }
8752               else
8753                 {
8754                   /* Check register-only source operand when two source
8755                      operands are swapped.  */
8756                   if (!i.tm.operand_types[op].bitfield.baseindex
8757                       && i.tm.operand_types[op + 1].bitfield.baseindex)
8758                     {
8759                       vex_reg = op;
8760                       op += 2;
8761                       gas_assert (mem == (vex_reg + 1)
8762                                   && op < i.operands);
8763                     }
8764                   else
8765                     {
8766                       vex_reg = op + 1;
8767                       gas_assert (vex_reg < i.operands);
8768                     }
8769                 }
8770             }
8771           else if (i.tm.opcode_modifier.vexvvvv == VEXNDD)
8772             {
8773               /* For instructions with VexNDD, the register destination
8774                  is encoded in VEX prefix.  */
8775               if (i.mem_operands == 0)
8776                 {
8777                   /* There is no memory operand.  */
8778                   gas_assert ((op + 2) == i.operands);
8779                   vex_reg = op + 1;
8780                 }
8781               else
8782                 {
8783                   /* There are only 2 non-immediate operands.  */
8784                   gas_assert (op < i.imm_operands + 2
8785                               && i.operands == i.imm_operands + 2);
8786                   vex_reg = i.imm_operands + 1;
8787                 }
8788             }
8789           else
8790             gas_assert (op < i.operands);
8791
8792           if (vex_reg != (unsigned int) ~0)
8793             {
8794               i386_operand_type *type = &i.tm.operand_types[vex_reg];
8795
8796               if ((type->bitfield.class != Reg
8797                    || (!type->bitfield.dword && !type->bitfield.qword))
8798                   && type->bitfield.class != RegSIMD
8799                   && type->bitfield.class != RegMask)
8800                 abort ();
8801
8802               i.vex.register_specifier = i.op[vex_reg].regs;
8803             }
8804
8805           /* Don't set OP operand twice.  */
8806           if (vex_reg != op)
8807             {
8808               /* If there is an extension opcode to put here, the
8809                  register number must be put into the regmem field.  */
8810               if (i.tm.extension_opcode != None)
8811                 {
8812                   i.rm.regmem = i.op[op].regs->reg_num;
8813                   set_rex_vrex (i.op[op].regs, REX_B,
8814                                 i.tm.opcode_modifier.sse2avx);
8815                 }
8816               else
8817                 {
8818                   i.rm.reg = i.op[op].regs->reg_num;
8819                   set_rex_vrex (i.op[op].regs, REX_R,
8820                                 i.tm.opcode_modifier.sse2avx);
8821                 }
8822             }
8823
8824           /* Now, if no memory operand has set i.rm.mode = 0, 1, 2 we
8825              must set it to 3 to indicate this is a register operand
8826              in the regmem field.  */
8827           if (!i.mem_operands)
8828             i.rm.mode = 3;
8829         }
8830
8831       /* Fill in i.rm.reg field with extension opcode (if any).  */
8832       if (i.tm.extension_opcode != None)
8833         i.rm.reg = i.tm.extension_opcode;
8834     }
8835   return default_seg;
8836 }
8837
8838 static INLINE void
8839 frag_opcode_byte (unsigned char byte)
8840 {
8841   if (now_seg != absolute_section)
8842     FRAG_APPEND_1_CHAR (byte);
8843   else
8844     ++abs_section_offset;
8845 }
8846
8847 static unsigned int
8848 flip_code16 (unsigned int code16)
8849 {
8850   gas_assert (i.tm.operands == 1);
8851
8852   return !(i.prefix[REX_PREFIX] & REX_W)
8853          && (code16 ? i.tm.operand_types[0].bitfield.disp32
8854                     : i.tm.operand_types[0].bitfield.disp16)
8855          ? CODE16 : 0;
8856 }
8857
8858 static void
8859 output_branch (void)
8860 {
8861   char *p;
8862   int size;
8863   int code16;
8864   int prefix;
8865   relax_substateT subtype;
8866   symbolS *sym;
8867   offsetT off;
8868
8869   if (now_seg == absolute_section)
8870     {
8871       as_bad (_("relaxable branches not supported in absolute section"));
8872       return;
8873     }
8874
8875   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
8876   size = i.disp_encoding > disp_encoding_8bit ? BIG : SMALL;
8877
8878   prefix = 0;
8879   if (i.prefix[DATA_PREFIX] != 0)
8880     {
8881       prefix = 1;
8882       i.prefixes -= 1;
8883       code16 ^= flip_code16(code16);
8884     }
8885   /* Pentium4 branch hints.  */
8886   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8887       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8888     {
8889       prefix++;
8890       i.prefixes--;
8891     }
8892   if (i.prefix[REX_PREFIX] != 0)
8893     {
8894       prefix++;
8895       i.prefixes--;
8896     }
8897
8898   /* BND prefixed jump.  */
8899   if (i.prefix[BND_PREFIX] != 0)
8900     {
8901       prefix++;
8902       i.prefixes--;
8903     }
8904
8905   if (i.prefixes != 0)
8906     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
8907
8908   /* It's always a symbol;  End frag & setup for relax.
8909      Make sure there is enough room in this frag for the largest
8910      instruction we may generate in md_convert_frag.  This is 2
8911      bytes for the opcode and room for the prefix and largest
8912      displacement.  */
8913   frag_grow (prefix + 2 + 4);
8914   /* Prefix and 1 opcode byte go in fr_fix.  */
8915   p = frag_more (prefix + 1);
8916   if (i.prefix[DATA_PREFIX] != 0)
8917     *p++ = DATA_PREFIX_OPCODE;
8918   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
8919       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
8920     *p++ = i.prefix[SEG_PREFIX];
8921   if (i.prefix[BND_PREFIX] != 0)
8922     *p++ = BND_PREFIX_OPCODE;
8923   if (i.prefix[REX_PREFIX] != 0)
8924     *p++ = i.prefix[REX_PREFIX];
8925   *p = i.tm.base_opcode;
8926
8927   if ((unsigned char) *p == JUMP_PC_RELATIVE)
8928     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
8929   else if (cpu_arch_flags.bitfield.cpui386)
8930     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
8931   else
8932     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
8933   subtype |= code16;
8934
8935   sym = i.op[0].disps->X_add_symbol;
8936   off = i.op[0].disps->X_add_number;
8937
8938   if (i.op[0].disps->X_op != O_constant
8939       && i.op[0].disps->X_op != O_symbol)
8940     {
8941       /* Handle complex expressions.  */
8942       sym = make_expr_symbol (i.op[0].disps);
8943       off = 0;
8944     }
8945
8946   frag_now->tc_frag_data.code64 = flag_code == CODE_64BIT;
8947
8948   /* 1 possible extra opcode + 4 byte displacement go in var part.
8949      Pass reloc in fr_var.  */
8950   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
8951 }
8952
8953 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8954 /* Return TRUE iff PLT32 relocation should be used for branching to
8955    symbol S.  */
8956
8957 static bool
8958 need_plt32_p (symbolS *s)
8959 {
8960   /* PLT32 relocation is ELF only.  */
8961   if (!IS_ELF)
8962     return false;
8963
8964 #ifdef TE_SOLARIS
8965   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
8966      krtld support it.  */
8967   return false;
8968 #endif
8969
8970   /* Since there is no need to prepare for PLT branch on x86-64, we
8971      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
8972      be used as a marker for 32-bit PC-relative branches.  */
8973   if (!object_64bit)
8974     return false;
8975
8976   if (s == NULL)
8977     return false;
8978
8979   /* Weak or undefined symbol need PLT32 relocation.  */
8980   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
8981     return true;
8982
8983   /* Non-global symbol doesn't need PLT32 relocation.  */
8984   if (! S_IS_EXTERNAL (s))
8985     return false;
8986
8987   /* Other global symbols need PLT32 relocation.  NB: Symbol with
8988      non-default visibilities are treated as normal global symbol
8989      so that PLT32 relocation can be used as a marker for 32-bit
8990      PC-relative branches.  It is useful for linker relaxation.  */
8991   return true;
8992 }
8993 #endif
8994
8995 static void
8996 output_jump (void)
8997 {
8998   char *p;
8999   int size;
9000   fixS *fixP;
9001   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
9002
9003   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
9004     {
9005       /* This is a loop or jecxz type instruction.  */
9006       size = 1;
9007       if (i.prefix[ADDR_PREFIX] != 0)
9008         {
9009           frag_opcode_byte (ADDR_PREFIX_OPCODE);
9010           i.prefixes -= 1;
9011         }
9012       /* Pentium4 branch hints.  */
9013       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
9014           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
9015         {
9016           frag_opcode_byte (i.prefix[SEG_PREFIX]);
9017           i.prefixes--;
9018         }
9019     }
9020   else
9021     {
9022       int code16;
9023
9024       code16 = 0;
9025       if (flag_code == CODE_16BIT)
9026         code16 = CODE16;
9027
9028       if (i.prefix[DATA_PREFIX] != 0)
9029         {
9030           frag_opcode_byte (DATA_PREFIX_OPCODE);
9031           i.prefixes -= 1;
9032           code16 ^= flip_code16(code16);
9033         }
9034
9035       size = 4;
9036       if (code16)
9037         size = 2;
9038     }
9039
9040   /* BND prefixed jump.  */
9041   if (i.prefix[BND_PREFIX] != 0)
9042     {
9043       frag_opcode_byte (i.prefix[BND_PREFIX]);
9044       i.prefixes -= 1;
9045     }
9046
9047   if (i.prefix[REX_PREFIX] != 0)
9048     {
9049       frag_opcode_byte (i.prefix[REX_PREFIX]);
9050       i.prefixes -= 1;
9051     }
9052
9053   if (i.prefixes != 0)
9054     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
9055
9056   if (now_seg == absolute_section)
9057     {
9058       abs_section_offset += i.opcode_length + size;
9059       return;
9060     }
9061
9062   p = frag_more (i.opcode_length + size);
9063   switch (i.opcode_length)
9064     {
9065     case 2:
9066       *p++ = i.tm.base_opcode >> 8;
9067       /* Fall through.  */
9068     case 1:
9069       *p++ = i.tm.base_opcode;
9070       break;
9071     default:
9072       abort ();
9073     }
9074
9075 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9076   if (flag_code == CODE_64BIT && size == 4
9077       && jump_reloc == NO_RELOC && i.op[0].disps->X_add_number == 0
9078       && need_plt32_p (i.op[0].disps->X_add_symbol))
9079     jump_reloc = BFD_RELOC_X86_64_PLT32;
9080 #endif
9081
9082   jump_reloc = reloc (size, 1, 1, jump_reloc);
9083
9084   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9085                       i.op[0].disps, 1, jump_reloc);
9086
9087   /* All jumps handled here are signed, but don't unconditionally use a
9088      signed limit check for 32 and 16 bit jumps as we want to allow wrap
9089      around at 4G (outside of 64-bit mode) and 64k (except for XBEGIN)
9090      respectively.  */
9091   switch (size)
9092     {
9093     case 1:
9094       fixP->fx_signed = 1;
9095       break;
9096
9097     case 2:
9098       if (i.tm.mnem_off == MN_xbegin)
9099         fixP->fx_signed = 1;
9100       break;
9101
9102     case 4:
9103       if (flag_code == CODE_64BIT)
9104         fixP->fx_signed = 1;
9105       break;
9106     }
9107 }
9108
9109 static void
9110 output_interseg_jump (void)
9111 {
9112   char *p;
9113   int size;
9114   int prefix;
9115   int code16;
9116
9117   code16 = 0;
9118   if (flag_code == CODE_16BIT)
9119     code16 = CODE16;
9120
9121   prefix = 0;
9122   if (i.prefix[DATA_PREFIX] != 0)
9123     {
9124       prefix = 1;
9125       i.prefixes -= 1;
9126       code16 ^= CODE16;
9127     }
9128
9129   gas_assert (!i.prefix[REX_PREFIX]);
9130
9131   size = 4;
9132   if (code16)
9133     size = 2;
9134
9135   if (i.prefixes != 0)
9136     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
9137
9138   if (now_seg == absolute_section)
9139     {
9140       abs_section_offset += prefix + 1 + 2 + size;
9141       return;
9142     }
9143
9144   /* 1 opcode; 2 segment; offset  */
9145   p = frag_more (prefix + 1 + 2 + size);
9146
9147   if (i.prefix[DATA_PREFIX] != 0)
9148     *p++ = DATA_PREFIX_OPCODE;
9149
9150   if (i.prefix[REX_PREFIX] != 0)
9151     *p++ = i.prefix[REX_PREFIX];
9152
9153   *p++ = i.tm.base_opcode;
9154   if (i.op[1].imms->X_op == O_constant)
9155     {
9156       offsetT n = i.op[1].imms->X_add_number;
9157
9158       if (size == 2
9159           && !fits_in_unsigned_word (n)
9160           && !fits_in_signed_word (n))
9161         {
9162           as_bad (_("16-bit jump out of range"));
9163           return;
9164         }
9165       md_number_to_chars (p, n, size);
9166     }
9167   else
9168     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9169                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
9170
9171   p += size;
9172   if (i.op[0].imms->X_op == O_constant)
9173     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
9174   else
9175     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
9176                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
9177 }
9178
9179 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9180 void
9181 x86_cleanup (void)
9182 {
9183   char *p;
9184   asection *seg = now_seg;
9185   subsegT subseg = now_subseg;
9186   asection *sec;
9187   unsigned int alignment, align_size_1;
9188   unsigned int isa_1_descsz, feature_2_descsz, descsz;
9189   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
9190   unsigned int padding;
9191
9192   if (!IS_ELF || !x86_used_note)
9193     return;
9194
9195   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
9196
9197   /* The .note.gnu.property section layout:
9198
9199      Field      Length          Contents
9200      ----       ----            ----
9201      n_namsz    4               4
9202      n_descsz   4               The note descriptor size
9203      n_type     4               NT_GNU_PROPERTY_TYPE_0
9204      n_name     4               "GNU"
9205      n_desc     n_descsz        The program property array
9206      ....       ....            ....
9207    */
9208
9209   /* Create the .note.gnu.property section.  */
9210   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
9211   bfd_set_section_flags (sec,
9212                          (SEC_ALLOC
9213                           | SEC_LOAD
9214                           | SEC_DATA
9215                           | SEC_HAS_CONTENTS
9216                           | SEC_READONLY));
9217
9218   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
9219     {
9220       align_size_1 = 7;
9221       alignment = 3;
9222     }
9223   else
9224     {
9225       align_size_1 = 3;
9226       alignment = 2;
9227     }
9228
9229   bfd_set_section_alignment (sec, alignment);
9230   elf_section_type (sec) = SHT_NOTE;
9231
9232   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
9233                                   + 4-byte data  */
9234   isa_1_descsz_raw = 4 + 4 + 4;
9235   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
9236   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
9237
9238   feature_2_descsz_raw = isa_1_descsz;
9239   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
9240                                       + 4-byte data  */
9241   feature_2_descsz_raw += 4 + 4 + 4;
9242   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
9243   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
9244                       & ~align_size_1);
9245
9246   descsz = feature_2_descsz;
9247   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
9248   p = frag_more (4 + 4 + 4 + 4 + descsz);
9249
9250   /* Write n_namsz.  */
9251   md_number_to_chars (p, (valueT) 4, 4);
9252
9253   /* Write n_descsz.  */
9254   md_number_to_chars (p + 4, (valueT) descsz, 4);
9255
9256   /* Write n_type.  */
9257   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
9258
9259   /* Write n_name.  */
9260   memcpy (p + 4 * 3, "GNU", 4);
9261
9262   /* Write 4-byte type.  */
9263   md_number_to_chars (p + 4 * 4,
9264                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
9265
9266   /* Write 4-byte data size.  */
9267   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
9268
9269   /* Write 4-byte data.  */
9270   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
9271
9272   /* Zero out paddings.  */
9273   padding = isa_1_descsz - isa_1_descsz_raw;
9274   if (padding)
9275     memset (p + 4 * 7, 0, padding);
9276
9277   /* Write 4-byte type.  */
9278   md_number_to_chars (p + isa_1_descsz + 4 * 4,
9279                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
9280
9281   /* Write 4-byte data size.  */
9282   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
9283
9284   /* Write 4-byte data.  */
9285   md_number_to_chars (p + isa_1_descsz + 4 * 6,
9286                       (valueT) x86_feature_2_used, 4);
9287
9288   /* Zero out paddings.  */
9289   padding = feature_2_descsz - feature_2_descsz_raw;
9290   if (padding)
9291     memset (p + isa_1_descsz + 4 * 7, 0, padding);
9292
9293   /* We probably can't restore the current segment, for there likely
9294      isn't one yet...  */
9295   if (seg && subseg)
9296     subseg_set (seg, subseg);
9297 }
9298
9299 bool
9300 x86_support_sframe_p (void)
9301 {
9302   /* At this time, SFrame stack trace is supported for AMD64 ABI only.  */
9303   return (x86_elf_abi == X86_64_ABI);
9304 }
9305
9306 bool
9307 x86_sframe_ra_tracking_p (void)
9308 {
9309   /* In AMD64, return address is always stored on the stack at a fixed offset
9310      from the CFA (provided via x86_sframe_cfa_ra_offset ()).
9311      Do not track explicitly via an SFrame Frame Row Entry.  */
9312   return false;
9313 }
9314
9315 offsetT
9316 x86_sframe_cfa_ra_offset (void)
9317 {
9318   gas_assert (x86_elf_abi == X86_64_ABI);
9319   return (offsetT) -8;
9320 }
9321
9322 unsigned char
9323 x86_sframe_get_abi_arch (void)
9324 {
9325   unsigned char sframe_abi_arch = 0;
9326
9327   if (x86_support_sframe_p ())
9328     {
9329       gas_assert (!target_big_endian);
9330       sframe_abi_arch = SFRAME_ABI_AMD64_ENDIAN_LITTLE;
9331     }
9332
9333   return sframe_abi_arch;
9334 }
9335
9336 #endif
9337
9338 static unsigned int
9339 encoding_length (const fragS *start_frag, offsetT start_off,
9340                  const char *frag_now_ptr)
9341 {
9342   unsigned int len = 0;
9343
9344   if (start_frag != frag_now)
9345     {
9346       const fragS *fr = start_frag;
9347
9348       do {
9349         len += fr->fr_fix;
9350         fr = fr->fr_next;
9351       } while (fr && fr != frag_now);
9352     }
9353
9354   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
9355 }
9356
9357 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
9358    be macro-fused with conditional jumps.
9359    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
9360    or is one of the following format:
9361
9362     cmp m, imm
9363     add m, imm
9364     sub m, imm
9365    test m, imm
9366     and m, imm
9367     inc m
9368     dec m
9369
9370    it is unfusible.  */
9371
9372 static int
9373 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
9374 {
9375   /* No RIP address.  */
9376   if (i.base_reg && i.base_reg->reg_num == RegIP)
9377     return 0;
9378
9379   /* No opcodes outside of base encoding space.  */
9380   if (i.tm.opcode_space != SPACE_BASE)
9381     return 0;
9382
9383   /* add, sub without add/sub m, imm.  */
9384   if (i.tm.base_opcode <= 5
9385       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
9386       || ((i.tm.base_opcode | 3) == 0x83
9387           && (i.tm.extension_opcode == 0x5
9388               || i.tm.extension_opcode == 0x0)))
9389     {
9390       *mf_cmp_p = mf_cmp_alu_cmp;
9391       return !(i.mem_operands && i.imm_operands);
9392     }
9393
9394   /* and without and m, imm.  */
9395   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
9396       || ((i.tm.base_opcode | 3) == 0x83
9397           && i.tm.extension_opcode == 0x4))
9398     {
9399       *mf_cmp_p = mf_cmp_test_and;
9400       return !(i.mem_operands && i.imm_operands);
9401     }
9402
9403   /* test without test m imm.  */
9404   if ((i.tm.base_opcode | 1) == 0x85
9405       || (i.tm.base_opcode | 1) == 0xa9
9406       || ((i.tm.base_opcode | 1) == 0xf7
9407           && i.tm.extension_opcode == 0))
9408     {
9409       *mf_cmp_p = mf_cmp_test_and;
9410       return !(i.mem_operands && i.imm_operands);
9411     }
9412
9413   /* cmp without cmp m, imm.  */
9414   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
9415       || ((i.tm.base_opcode | 3) == 0x83
9416           && (i.tm.extension_opcode == 0x7)))
9417     {
9418       *mf_cmp_p = mf_cmp_alu_cmp;
9419       return !(i.mem_operands && i.imm_operands);
9420     }
9421
9422   /* inc, dec without inc/dec m.   */
9423   if ((i.tm.cpu_flags.bitfield.cpuno64
9424        && (i.tm.base_opcode | 0xf) == 0x4f)
9425       || ((i.tm.base_opcode | 1) == 0xff
9426           && i.tm.extension_opcode <= 0x1))
9427     {
9428       *mf_cmp_p = mf_cmp_incdec;
9429       return !i.mem_operands;
9430     }
9431
9432   return 0;
9433 }
9434
9435 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
9436
9437 static int
9438 add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
9439 {
9440   /* NB: Don't work with COND_JUMP86 without i386.  */
9441   if (!align_branch_power
9442       || now_seg == absolute_section
9443       || !cpu_arch_flags.bitfield.cpui386
9444       || !(align_branch & align_branch_fused_bit))
9445     return 0;
9446
9447   if (maybe_fused_with_jcc_p (mf_cmp_p))
9448     {
9449       if (last_insn.kind == last_insn_other
9450           || last_insn.seg != now_seg)
9451         return 1;
9452       if (flag_debug)
9453         as_warn_where (last_insn.file, last_insn.line,
9454                        _("`%s` skips -malign-branch-boundary on `%s`"),
9455                        last_insn.name, insn_name (&i.tm));
9456     }
9457
9458   return 0;
9459 }
9460
9461 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
9462
9463 static int
9464 add_branch_prefix_frag_p (void)
9465 {
9466   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
9467      to PadLock instructions since they include prefixes in opcode.  */
9468   if (!align_branch_power
9469       || !align_branch_prefix_size
9470       || now_seg == absolute_section
9471       || i.tm.cpu_flags.bitfield.cpupadlock
9472       || !cpu_arch_flags.bitfield.cpui386)
9473     return 0;
9474
9475   /* Don't add prefix if it is a prefix or there is no operand in case
9476      that segment prefix is special.  */
9477   if (!i.operands || i.tm.opcode_modifier.isprefix)
9478     return 0;
9479
9480   if (last_insn.kind == last_insn_other
9481       || last_insn.seg != now_seg)
9482     return 1;
9483
9484   if (flag_debug)
9485     as_warn_where (last_insn.file, last_insn.line,
9486                    _("`%s` skips -malign-branch-boundary on `%s`"),
9487                    last_insn.name, insn_name (&i.tm));
9488
9489   return 0;
9490 }
9491
9492 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
9493
9494 static int
9495 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
9496                            enum mf_jcc_kind *mf_jcc_p)
9497 {
9498   int add_padding;
9499
9500   /* NB: Don't work with COND_JUMP86 without i386.  */
9501   if (!align_branch_power
9502       || now_seg == absolute_section
9503       || !cpu_arch_flags.bitfield.cpui386
9504       || i.tm.opcode_space != SPACE_BASE)
9505     return 0;
9506
9507   add_padding = 0;
9508
9509   /* Check for jcc and direct jmp.  */
9510   if (i.tm.opcode_modifier.jump == JUMP)
9511     {
9512       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
9513         {
9514           *branch_p = align_branch_jmp;
9515           add_padding = align_branch & align_branch_jmp_bit;
9516         }
9517       else
9518         {
9519           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
9520              igore the lowest bit.  */
9521           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
9522           *branch_p = align_branch_jcc;
9523           if ((align_branch & align_branch_jcc_bit))
9524             add_padding = 1;
9525         }
9526     }
9527   else if ((i.tm.base_opcode | 1) == 0xc3)
9528     {
9529       /* Near ret.  */
9530       *branch_p = align_branch_ret;
9531       if ((align_branch & align_branch_ret_bit))
9532         add_padding = 1;
9533     }
9534   else
9535     {
9536       /* Check for indirect jmp, direct and indirect calls.  */
9537       if (i.tm.base_opcode == 0xe8)
9538         {
9539           /* Direct call.  */
9540           *branch_p = align_branch_call;
9541           if ((align_branch & align_branch_call_bit))
9542             add_padding = 1;
9543         }
9544       else if (i.tm.base_opcode == 0xff
9545                && (i.tm.extension_opcode == 2
9546                    || i.tm.extension_opcode == 4))
9547         {
9548           /* Indirect call and jmp.  */
9549           *branch_p = align_branch_indirect;
9550           if ((align_branch & align_branch_indirect_bit))
9551             add_padding = 1;
9552         }
9553
9554       if (add_padding
9555           && i.disp_operands
9556           && tls_get_addr
9557           && (i.op[0].disps->X_op == O_symbol
9558               || (i.op[0].disps->X_op == O_subtract
9559                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
9560         {
9561           symbolS *s = i.op[0].disps->X_add_symbol;
9562           /* No padding to call to global or undefined tls_get_addr.  */
9563           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
9564               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
9565             return 0;
9566         }
9567     }
9568
9569   if (add_padding
9570       && last_insn.kind != last_insn_other
9571       && last_insn.seg == now_seg)
9572     {
9573       if (flag_debug)
9574         as_warn_where (last_insn.file, last_insn.line,
9575                        _("`%s` skips -malign-branch-boundary on `%s`"),
9576                        last_insn.name, insn_name (&i.tm));
9577       return 0;
9578     }
9579
9580   return add_padding;
9581 }
9582
9583 static void
9584 output_insn (void)
9585 {
9586   fragS *insn_start_frag;
9587   offsetT insn_start_off;
9588   fragS *fragP = NULL;
9589   enum align_branch_kind branch = align_branch_none;
9590   /* The initializer is arbitrary just to avoid uninitialized error.
9591      it's actually either assigned in add_branch_padding_frag_p
9592      or never be used.  */
9593   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
9594
9595 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9596   if (IS_ELF && x86_used_note && now_seg != absolute_section)
9597     {
9598       if ((i.xstate & xstate_tmm) == xstate_tmm
9599           || i.tm.cpu_flags.bitfield.cpuamx_tile)
9600         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
9601
9602       if (i.tm.cpu_flags.bitfield.cpu8087
9603           || i.tm.cpu_flags.bitfield.cpu287
9604           || i.tm.cpu_flags.bitfield.cpu387
9605           || i.tm.cpu_flags.bitfield.cpu687
9606           || i.tm.cpu_flags.bitfield.cpufisttp)
9607         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
9608
9609       if ((i.xstate & xstate_mmx)
9610           || i.tm.mnem_off == MN_emms
9611           || i.tm.mnem_off == MN_femms)
9612         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
9613
9614       if (i.index_reg)
9615         {
9616           if (i.index_reg->reg_type.bitfield.zmmword)
9617             i.xstate |= xstate_zmm;
9618           else if (i.index_reg->reg_type.bitfield.ymmword)
9619             i.xstate |= xstate_ymm;
9620           else if (i.index_reg->reg_type.bitfield.xmmword)
9621             i.xstate |= xstate_xmm;
9622         }
9623
9624       /* vzeroall / vzeroupper */
9625       if (i.tm.base_opcode == 0x77 && i.tm.cpu_flags.bitfield.cpuavx)
9626         i.xstate |= xstate_ymm;
9627
9628       if ((i.xstate & xstate_xmm)
9629           /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
9630           || (i.tm.base_opcode == 0xae
9631               && (i.tm.cpu_flags.bitfield.cpusse
9632                   || i.tm.cpu_flags.bitfield.cpuavx))
9633           || i.tm.cpu_flags.bitfield.cpuwidekl
9634           || i.tm.cpu_flags.bitfield.cpukl)
9635         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
9636
9637       if ((i.xstate & xstate_ymm) == xstate_ymm)
9638         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
9639       if ((i.xstate & xstate_zmm) == xstate_zmm)
9640         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
9641       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
9642         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
9643       if (i.tm.cpu_flags.bitfield.cpufxsr)
9644         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
9645       if (i.tm.cpu_flags.bitfield.cpuxsave)
9646         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
9647       if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
9648         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
9649       if (i.tm.cpu_flags.bitfield.cpuxsavec)
9650         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
9651
9652       if (x86_feature_2_used
9653           || i.tm.cpu_flags.bitfield.cpucmov
9654           || i.tm.cpu_flags.bitfield.cpusyscall
9655           || i.tm.mnem_off == MN_cmpxchg8b)
9656         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
9657       if (i.tm.cpu_flags.bitfield.cpusse3
9658           || i.tm.cpu_flags.bitfield.cpussse3
9659           || i.tm.cpu_flags.bitfield.cpusse4_1
9660           || i.tm.cpu_flags.bitfield.cpusse4_2
9661           || i.tm.cpu_flags.bitfield.cpucx16
9662           || i.tm.cpu_flags.bitfield.cpupopcnt
9663           /* LAHF-SAHF insns in 64-bit mode.  */
9664           || (flag_code == CODE_64BIT
9665               && (i.tm.base_opcode | 1) == 0x9f
9666               && i.tm.opcode_space == SPACE_BASE))
9667         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
9668       if (i.tm.cpu_flags.bitfield.cpuavx
9669           || i.tm.cpu_flags.bitfield.cpuavx2
9670           /* Any VEX encoded insns execpt for AVX512F, AVX512BW, AVX512DQ,
9671              XOP, FMA4, LPW, TBM, and AMX.  */
9672           || (i.tm.opcode_modifier.vex
9673               && !i.tm.cpu_flags.bitfield.cpuavx512f
9674               && !i.tm.cpu_flags.bitfield.cpuavx512bw
9675               && !i.tm.cpu_flags.bitfield.cpuavx512dq
9676               && !i.tm.cpu_flags.bitfield.cpuxop
9677               && !i.tm.cpu_flags.bitfield.cpufma4
9678               && !i.tm.cpu_flags.bitfield.cpulwp
9679               && !i.tm.cpu_flags.bitfield.cputbm
9680               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
9681           || i.tm.cpu_flags.bitfield.cpuf16c
9682           || i.tm.cpu_flags.bitfield.cpufma
9683           || i.tm.cpu_flags.bitfield.cpulzcnt
9684           || i.tm.cpu_flags.bitfield.cpumovbe
9685           || i.tm.cpu_flags.bitfield.cpuxsaves
9686           || (x86_feature_2_used
9687               & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
9688                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
9689                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
9690         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
9691       if (i.tm.cpu_flags.bitfield.cpuavx512f
9692           || i.tm.cpu_flags.bitfield.cpuavx512bw
9693           || i.tm.cpu_flags.bitfield.cpuavx512dq
9694           || i.tm.cpu_flags.bitfield.cpuavx512vl
9695           /* Any EVEX encoded insns except for AVX512ER, AVX512PF,
9696              AVX512-4FMAPS, and AVX512-4VNNIW.  */
9697           || (i.tm.opcode_modifier.evex
9698               && !i.tm.cpu_flags.bitfield.cpuavx512er
9699               && !i.tm.cpu_flags.bitfield.cpuavx512pf
9700               && !i.tm.cpu_flags.bitfield.cpuavx512_4fmaps
9701               && !i.tm.cpu_flags.bitfield.cpuavx512_4vnniw))
9702         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
9703     }
9704 #endif
9705
9706   /* Tie dwarf2 debug info to the address at the start of the insn.
9707      We can't do this after the insn has been output as the current
9708      frag may have been closed off.  eg. by frag_var.  */
9709   dwarf2_emit_insn (0);
9710
9711   insn_start_frag = frag_now;
9712   insn_start_off = frag_now_fix ();
9713
9714   if (add_branch_padding_frag_p (&branch, &mf_jcc))
9715     {
9716       char *p;
9717       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
9718       unsigned int max_branch_padding_size = 14;
9719
9720       /* Align section to boundary.  */
9721       record_alignment (now_seg, align_branch_power);
9722
9723       /* Make room for padding.  */
9724       frag_grow (max_branch_padding_size);
9725
9726       /* Start of the padding.  */
9727       p = frag_more (0);
9728
9729       fragP = frag_now;
9730
9731       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
9732                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
9733                 NULL, 0, p);
9734
9735       fragP->tc_frag_data.mf_type = mf_jcc;
9736       fragP->tc_frag_data.branch_type = branch;
9737       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
9738     }
9739
9740   if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT)
9741       && !pre_386_16bit_warned)
9742     {
9743       as_warn (_("use .code16 to ensure correct addressing mode"));
9744       pre_386_16bit_warned = true;
9745     }
9746
9747   /* Output jumps.  */
9748   if (i.tm.opcode_modifier.jump == JUMP)
9749     output_branch ();
9750   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
9751            || i.tm.opcode_modifier.jump == JUMP_DWORD)
9752     output_jump ();
9753   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
9754     output_interseg_jump ();
9755   else
9756     {
9757       /* Output normal instructions here.  */
9758       char *p;
9759       unsigned char *q;
9760       unsigned int j;
9761       enum mf_cmp_kind mf_cmp;
9762
9763       if (avoid_fence
9764           && (i.tm.base_opcode == 0xaee8
9765               || i.tm.base_opcode == 0xaef0
9766               || i.tm.base_opcode == 0xaef8))
9767         {
9768           /* Encode lfence, mfence, and sfence as
9769              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
9770           if (flag_code == CODE_16BIT)
9771             as_bad (_("Cannot convert `%s' in 16-bit mode"), insn_name (&i.tm));
9772           else if (omit_lock_prefix)
9773             as_bad (_("Cannot convert `%s' with `-momit-lock-prefix=yes' in effect"),
9774                     insn_name (&i.tm));
9775           else if (now_seg != absolute_section)
9776             {
9777               offsetT val = 0x240483f0ULL;
9778
9779               p = frag_more (5);
9780               md_number_to_chars (p, val, 5);
9781             }
9782           else
9783             abs_section_offset += 5;
9784           return;
9785         }
9786
9787       /* Some processors fail on LOCK prefix. This options makes
9788          assembler ignore LOCK prefix and serves as a workaround.  */
9789       if (omit_lock_prefix)
9790         {
9791           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
9792               && i.tm.opcode_modifier.isprefix)
9793             return;
9794           i.prefix[LOCK_PREFIX] = 0;
9795         }
9796
9797       if (branch)
9798         /* Skip if this is a branch.  */
9799         ;
9800       else if (add_fused_jcc_padding_frag_p (&mf_cmp))
9801         {
9802           /* Make room for padding.  */
9803           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
9804           p = frag_more (0);
9805
9806           fragP = frag_now;
9807
9808           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
9809                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
9810                     NULL, 0, p);
9811
9812           fragP->tc_frag_data.mf_type = mf_cmp;
9813           fragP->tc_frag_data.branch_type = align_branch_fused;
9814           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
9815         }
9816       else if (add_branch_prefix_frag_p ())
9817         {
9818           unsigned int max_prefix_size = align_branch_prefix_size;
9819
9820           /* Make room for padding.  */
9821           frag_grow (max_prefix_size);
9822           p = frag_more (0);
9823
9824           fragP = frag_now;
9825
9826           frag_var (rs_machine_dependent, max_prefix_size, 0,
9827                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
9828                     NULL, 0, p);
9829
9830           fragP->tc_frag_data.max_bytes = max_prefix_size;
9831         }
9832
9833       /* Since the VEX/EVEX prefix contains the implicit prefix, we
9834          don't need the explicit prefix.  */
9835       if (!is_any_vex_encoding (&i.tm))
9836         {
9837           switch (i.tm.opcode_modifier.opcodeprefix)
9838             {
9839             case PREFIX_0X66:
9840               add_prefix (0x66);
9841               break;
9842             case PREFIX_0XF2:
9843               add_prefix (0xf2);
9844               break;
9845             case PREFIX_0XF3:
9846               if (!i.tm.cpu_flags.bitfield.cpupadlock
9847                   || (i.prefix[REP_PREFIX] != 0xf3))
9848                 add_prefix (0xf3);
9849               break;
9850             case PREFIX_NONE:
9851               switch (i.opcode_length)
9852                 {
9853                 case 2:
9854                   break;
9855                 case 1:
9856                   /* Check for pseudo prefixes.  */
9857                   if (!i.tm.opcode_modifier.isprefix || i.tm.base_opcode)
9858                     break;
9859                   as_bad_where (insn_start_frag->fr_file,
9860                                 insn_start_frag->fr_line,
9861                                 _("pseudo prefix without instruction"));
9862                   return;
9863                 default:
9864                   abort ();
9865                 }
9866               break;
9867             default:
9868               abort ();
9869             }
9870
9871 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
9872           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
9873              R_X86_64_GOTTPOFF relocation so that linker can safely
9874              perform IE->LE optimization.  A dummy REX_OPCODE prefix
9875              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
9876              relocation for GDesc -> IE/LE optimization.  */
9877           if (x86_elf_abi == X86_64_X32_ABI
9878               && i.operands == 2
9879               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
9880                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
9881               && i.prefix[REX_PREFIX] == 0)
9882             add_prefix (REX_OPCODE);
9883 #endif
9884
9885           /* The prefix bytes.  */
9886           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
9887             if (*q)
9888               frag_opcode_byte (*q);
9889         }
9890       else
9891         {
9892           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
9893             if (*q)
9894               switch (j)
9895                 {
9896                 case SEG_PREFIX:
9897                 case ADDR_PREFIX:
9898                   frag_opcode_byte (*q);
9899                   break;
9900                 default:
9901                   /* There should be no other prefixes for instructions
9902                      with VEX prefix.  */
9903                   abort ();
9904                 }
9905
9906           /* For EVEX instructions i.vrex should become 0 after
9907              build_evex_prefix.  For VEX instructions upper 16 registers
9908              aren't available, so VREX should be 0.  */
9909           if (i.vrex)
9910             abort ();
9911           /* Now the VEX prefix.  */
9912           if (now_seg != absolute_section)
9913             {
9914               p = frag_more (i.vex.length);
9915               for (j = 0; j < i.vex.length; j++)
9916                 p[j] = i.vex.bytes[j];
9917             }
9918           else
9919             abs_section_offset += i.vex.length;
9920         }
9921
9922       /* Now the opcode; be careful about word order here!  */
9923       j = i.opcode_length;
9924       if (!i.vex.length)
9925         switch (i.tm.opcode_space)
9926           {
9927           case SPACE_BASE:
9928             break;
9929           case SPACE_0F:
9930             ++j;
9931             break;
9932           case SPACE_0F38:
9933           case SPACE_0F3A:
9934             j += 2;
9935             break;
9936           default:
9937             abort ();
9938           }
9939
9940       if (now_seg == absolute_section)
9941         abs_section_offset += j;
9942       else if (j == 1)
9943         {
9944           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
9945         }
9946       else
9947         {
9948           p = frag_more (j);
9949           if (!i.vex.length
9950               && i.tm.opcode_space != SPACE_BASE)
9951             {
9952               *p++ = 0x0f;
9953               if (i.tm.opcode_space != SPACE_0F)
9954                 *p++ = i.tm.opcode_space == SPACE_0F38
9955                        ? 0x38 : 0x3a;
9956             }
9957
9958           switch (i.opcode_length)
9959             {
9960             case 2:
9961               /* Put out high byte first: can't use md_number_to_chars!  */
9962               *p++ = (i.tm.base_opcode >> 8) & 0xff;
9963               /* Fall through.  */
9964             case 1:
9965               *p = i.tm.base_opcode & 0xff;
9966               break;
9967             default:
9968               abort ();
9969               break;
9970             }
9971
9972         }
9973
9974       /* Now the modrm byte and sib byte (if present).  */
9975       if (i.tm.opcode_modifier.modrm)
9976         {
9977           frag_opcode_byte ((i.rm.regmem << 0)
9978                              | (i.rm.reg << 3)
9979                              | (i.rm.mode << 6));
9980           /* If i.rm.regmem == ESP (4)
9981              && i.rm.mode != (Register mode)
9982              && not 16 bit
9983              ==> need second modrm byte.  */
9984           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
9985               && i.rm.mode != 3
9986               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
9987             frag_opcode_byte ((i.sib.base << 0)
9988                               | (i.sib.index << 3)
9989                               | (i.sib.scale << 6));
9990         }
9991
9992       if (i.disp_operands)
9993         output_disp (insn_start_frag, insn_start_off);
9994
9995       if (i.imm_operands)
9996         output_imm (insn_start_frag, insn_start_off);
9997
9998       /*
9999        * frag_now_fix () returning plain abs_section_offset when we're in the
10000        * absolute section, and abs_section_offset not getting updated as data
10001        * gets added to the frag breaks the logic below.
10002        */
10003       if (now_seg != absolute_section)
10004         {
10005           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
10006           if (j > 15)
10007             as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
10008                      j);
10009           else if (fragP)
10010             {
10011               /* NB: Don't add prefix with GOTPC relocation since
10012                  output_disp() above depends on the fixed encoding
10013                  length.  Can't add prefix with TLS relocation since
10014                  it breaks TLS linker optimization.  */
10015               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
10016               /* Prefix count on the current instruction.  */
10017               unsigned int count = i.vex.length;
10018               unsigned int k;
10019               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
10020                 /* REX byte is encoded in VEX/EVEX prefix.  */
10021                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
10022                   count++;
10023
10024               /* Count prefixes for extended opcode maps.  */
10025               if (!i.vex.length)
10026                 switch (i.tm.opcode_space)
10027                   {
10028                   case SPACE_BASE:
10029                     break;
10030                   case SPACE_0F:
10031                     count++;
10032                     break;
10033                   case SPACE_0F38:
10034                   case SPACE_0F3A:
10035                     count += 2;
10036                     break;
10037                   default:
10038                     abort ();
10039                   }
10040
10041               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
10042                   == BRANCH_PREFIX)
10043                 {
10044                   /* Set the maximum prefix size in BRANCH_PREFIX
10045                      frag.  */
10046                   if (fragP->tc_frag_data.max_bytes > max)
10047                     fragP->tc_frag_data.max_bytes = max;
10048                   if (fragP->tc_frag_data.max_bytes > count)
10049                     fragP->tc_frag_data.max_bytes -= count;
10050                   else
10051                     fragP->tc_frag_data.max_bytes = 0;
10052                 }
10053               else
10054                 {
10055                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
10056                      frag.  */
10057                   unsigned int max_prefix_size;
10058                   if (align_branch_prefix_size > max)
10059                     max_prefix_size = max;
10060                   else
10061                     max_prefix_size = align_branch_prefix_size;
10062                   if (max_prefix_size > count)
10063                     fragP->tc_frag_data.max_prefix_length
10064                       = max_prefix_size - count;
10065                 }
10066
10067               /* Use existing segment prefix if possible.  Use CS
10068                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
10069                  segment prefix with ESP/EBP base register and use DS
10070                  segment prefix without ESP/EBP base register.  */
10071               if (i.prefix[SEG_PREFIX])
10072                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
10073               else if (flag_code == CODE_64BIT)
10074                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
10075               else if (i.base_reg
10076                        && (i.base_reg->reg_num == 4
10077                            || i.base_reg->reg_num == 5))
10078                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
10079               else
10080                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
10081             }
10082         }
10083     }
10084
10085   /* NB: Don't work with COND_JUMP86 without i386.  */
10086   if (align_branch_power
10087       && now_seg != absolute_section
10088       && cpu_arch_flags.bitfield.cpui386)
10089     {
10090       /* Terminate each frag so that we can add prefix and check for
10091          fused jcc.  */
10092       frag_wane (frag_now);
10093       frag_new (0);
10094     }
10095
10096 #ifdef DEBUG386
10097   if (flag_debug)
10098     {
10099       pi ("" /*line*/, &i);
10100     }
10101 #endif /* DEBUG386  */
10102 }
10103
10104 /* Return the size of the displacement operand N.  */
10105
10106 static int
10107 disp_size (unsigned int n)
10108 {
10109   int size = 4;
10110
10111   if (i.types[n].bitfield.disp64)
10112     size = 8;
10113   else if (i.types[n].bitfield.disp8)
10114     size = 1;
10115   else if (i.types[n].bitfield.disp16)
10116     size = 2;
10117   return size;
10118 }
10119
10120 /* Return the size of the immediate operand N.  */
10121
10122 static int
10123 imm_size (unsigned int n)
10124 {
10125   int size = 4;
10126   if (i.types[n].bitfield.imm64)
10127     size = 8;
10128   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
10129     size = 1;
10130   else if (i.types[n].bitfield.imm16)
10131     size = 2;
10132   return size;
10133 }
10134
10135 static void
10136 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
10137 {
10138   char *p;
10139   unsigned int n;
10140
10141   for (n = 0; n < i.operands; n++)
10142     {
10143       if (operand_type_check (i.types[n], disp))
10144         {
10145           int size = disp_size (n);
10146
10147           if (now_seg == absolute_section)
10148             abs_section_offset += size;
10149           else if (i.op[n].disps->X_op == O_constant)
10150             {
10151               offsetT val = i.op[n].disps->X_add_number;
10152
10153               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
10154                                      size);
10155               p = frag_more (size);
10156               md_number_to_chars (p, val, size);
10157             }
10158           else
10159             {
10160               enum bfd_reloc_code_real reloc_type;
10161               bool pcrel = (i.flags[n] & Operand_PCrel) != 0;
10162               bool sign = (flag_code == CODE_64BIT && size == 4
10163                            && (!want_disp32 (&i.tm)
10164                                || (i.tm.opcode_modifier.jump && !i.jumpabsolute
10165                                    && !i.types[n].bitfield.baseindex)))
10166                           || pcrel;
10167               fixS *fixP;
10168
10169               /* We can't have 8 bit displacement here.  */
10170               gas_assert (!i.types[n].bitfield.disp8);
10171
10172               /* The PC relative address is computed relative
10173                  to the instruction boundary, so in case immediate
10174                  fields follows, we need to adjust the value.  */
10175               if (pcrel && i.imm_operands)
10176                 {
10177                   unsigned int n1;
10178                   int sz = 0;
10179
10180                   for (n1 = 0; n1 < i.operands; n1++)
10181                     if (operand_type_check (i.types[n1], imm))
10182                       {
10183                         /* Only one immediate is allowed for PC
10184                            relative address.  */
10185                         gas_assert (sz == 0);
10186                         sz = imm_size (n1);
10187                         i.op[n].disps->X_add_number -= sz;
10188                       }
10189                   /* We should find the immediate.  */
10190                   gas_assert (sz != 0);
10191                 }
10192
10193               p = frag_more (size);
10194               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
10195               if (GOT_symbol
10196                   && GOT_symbol == i.op[n].disps->X_add_symbol
10197                   && (((reloc_type == BFD_RELOC_32
10198                         || reloc_type == BFD_RELOC_X86_64_32S
10199                         || (reloc_type == BFD_RELOC_64
10200                             && object_64bit))
10201                        && (i.op[n].disps->X_op == O_symbol
10202                            || (i.op[n].disps->X_op == O_add
10203                                && ((symbol_get_value_expression
10204                                     (i.op[n].disps->X_op_symbol)->X_op)
10205                                    == O_subtract))))
10206                       || reloc_type == BFD_RELOC_32_PCREL))
10207                 {
10208                   if (!object_64bit)
10209                     {
10210                       reloc_type = BFD_RELOC_386_GOTPC;
10211                       i.has_gotpc_tls_reloc = true;
10212                       i.op[n].disps->X_add_number +=
10213                         encoding_length (insn_start_frag, insn_start_off, p);
10214                     }
10215                   else if (reloc_type == BFD_RELOC_64)
10216                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10217                   else
10218                     /* Don't do the adjustment for x86-64, as there
10219                        the pcrel addressing is relative to the _next_
10220                        insn, and that is taken care of in other code.  */
10221                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10222                 }
10223               else if (align_branch_power)
10224                 {
10225                   switch (reloc_type)
10226                     {
10227                     case BFD_RELOC_386_TLS_GD:
10228                     case BFD_RELOC_386_TLS_LDM:
10229                     case BFD_RELOC_386_TLS_IE:
10230                     case BFD_RELOC_386_TLS_IE_32:
10231                     case BFD_RELOC_386_TLS_GOTIE:
10232                     case BFD_RELOC_386_TLS_GOTDESC:
10233                     case BFD_RELOC_386_TLS_DESC_CALL:
10234                     case BFD_RELOC_X86_64_TLSGD:
10235                     case BFD_RELOC_X86_64_TLSLD:
10236                     case BFD_RELOC_X86_64_GOTTPOFF:
10237                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
10238                     case BFD_RELOC_X86_64_TLSDESC_CALL:
10239                       i.has_gotpc_tls_reloc = true;
10240                     default:
10241                       break;
10242                     }
10243                 }
10244               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
10245                                   size, i.op[n].disps, pcrel,
10246                                   reloc_type);
10247
10248               if (flag_code == CODE_64BIT && size == 4 && pcrel
10249                   && !i.prefix[ADDR_PREFIX])
10250                 fixP->fx_signed = 1;
10251
10252               /* Check for "call/jmp *mem", "mov mem, %reg",
10253                  "test %reg, mem" and "binop mem, %reg" where binop
10254                  is one of adc, add, and, cmp, or, sbb, sub, xor
10255                  instructions without data prefix.  Always generate
10256                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
10257               if (i.prefix[DATA_PREFIX] == 0
10258                   && (generate_relax_relocations
10259                       || (!object_64bit
10260                           && i.rm.mode == 0
10261                           && i.rm.regmem == 5))
10262                   && (i.rm.mode == 2
10263                       || (i.rm.mode == 0 && i.rm.regmem == 5))
10264                   && i.tm.opcode_space == SPACE_BASE
10265                   && ((i.operands == 1
10266                        && i.tm.base_opcode == 0xff
10267                        && (i.rm.reg == 2 || i.rm.reg == 4))
10268                       || (i.operands == 2
10269                           && (i.tm.base_opcode == 0x8b
10270                               || i.tm.base_opcode == 0x85
10271                               || (i.tm.base_opcode & ~0x38) == 0x03))))
10272                 {
10273                   if (object_64bit)
10274                     {
10275                       fixP->fx_tcbit = i.rex != 0;
10276                       if (i.base_reg
10277                           && (i.base_reg->reg_num == RegIP))
10278                       fixP->fx_tcbit2 = 1;
10279                     }
10280                   else
10281                     fixP->fx_tcbit2 = 1;
10282                 }
10283             }
10284         }
10285     }
10286 }
10287
10288 static void
10289 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
10290 {
10291   char *p;
10292   unsigned int n;
10293
10294   for (n = 0; n < i.operands; n++)
10295     {
10296       if (operand_type_check (i.types[n], imm))
10297         {
10298           int size = imm_size (n);
10299
10300           if (now_seg == absolute_section)
10301             abs_section_offset += size;
10302           else if (i.op[n].imms->X_op == O_constant)
10303             {
10304               offsetT val;
10305
10306               val = offset_in_range (i.op[n].imms->X_add_number,
10307                                      size);
10308               p = frag_more (size);
10309               md_number_to_chars (p, val, size);
10310             }
10311           else
10312             {
10313               /* Not absolute_section.
10314                  Need a 32-bit fixup (don't support 8bit
10315                  non-absolute imms).  Try to support other
10316                  sizes ...  */
10317               enum bfd_reloc_code_real reloc_type;
10318               int sign;
10319
10320               if (i.types[n].bitfield.imm32s
10321                   && (i.suffix == QWORD_MNEM_SUFFIX
10322                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)))
10323                 sign = 1;
10324               else
10325                 sign = 0;
10326
10327               p = frag_more (size);
10328               reloc_type = reloc (size, 0, sign, i.reloc[n]);
10329
10330               /*   This is tough to explain.  We end up with this one if we
10331                * have operands that look like
10332                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
10333                * obtain the absolute address of the GOT, and it is strongly
10334                * preferable from a performance point of view to avoid using
10335                * a runtime relocation for this.  The actual sequence of
10336                * instructions often look something like:
10337                *
10338                *        call    .L66
10339                * .L66:
10340                *        popl    %ebx
10341                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
10342                *
10343                *   The call and pop essentially return the absolute address
10344                * of the label .L66 and store it in %ebx.  The linker itself
10345                * will ultimately change the first operand of the addl so
10346                * that %ebx points to the GOT, but to keep things simple, the
10347                * .o file must have this operand set so that it generates not
10348                * the absolute address of .L66, but the absolute address of
10349                * itself.  This allows the linker itself simply treat a GOTPC
10350                * relocation as asking for a pcrel offset to the GOT to be
10351                * added in, and the addend of the relocation is stored in the
10352                * operand field for the instruction itself.
10353                *
10354                *   Our job here is to fix the operand so that it would add
10355                * the correct offset so that %ebx would point to itself.  The
10356                * thing that is tricky is that .-.L66 will point to the
10357                * beginning of the instruction, so we need to further modify
10358                * the operand so that it will point to itself.  There are
10359                * other cases where you have something like:
10360                *
10361                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
10362                *
10363                * and here no correction would be required.  Internally in
10364                * the assembler we treat operands of this form as not being
10365                * pcrel since the '.' is explicitly mentioned, and I wonder
10366                * whether it would simplify matters to do it this way.  Who
10367                * knows.  In earlier versions of the PIC patches, the
10368                * pcrel_adjust field was used to store the correction, but
10369                * since the expression is not pcrel, I felt it would be
10370                * confusing to do it this way.  */
10371
10372               if ((reloc_type == BFD_RELOC_32
10373                    || reloc_type == BFD_RELOC_X86_64_32S
10374                    || reloc_type == BFD_RELOC_64)
10375                   && GOT_symbol
10376                   && GOT_symbol == i.op[n].imms->X_add_symbol
10377                   && (i.op[n].imms->X_op == O_symbol
10378                       || (i.op[n].imms->X_op == O_add
10379                           && ((symbol_get_value_expression
10380                                (i.op[n].imms->X_op_symbol)->X_op)
10381                               == O_subtract))))
10382                 {
10383                   if (!object_64bit)
10384                     reloc_type = BFD_RELOC_386_GOTPC;
10385                   else if (size == 4)
10386                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10387                   else if (size == 8)
10388                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10389                   i.has_gotpc_tls_reloc = true;
10390                   i.op[n].imms->X_add_number +=
10391                     encoding_length (insn_start_frag, insn_start_off, p);
10392                 }
10393               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
10394                            i.op[n].imms, 0, reloc_type);
10395             }
10396         }
10397     }
10398 }
10399 \f
10400 /* x86_cons_fix_new is called via the expression parsing code when a
10401    reloc is needed.  We use this hook to get the correct .got reloc.  */
10402 static int cons_sign = -1;
10403
10404 void
10405 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
10406                   expressionS *exp, bfd_reloc_code_real_type r)
10407 {
10408   r = reloc (len, 0, cons_sign, r);
10409
10410 #ifdef TE_PE
10411   if (exp->X_op == O_secrel)
10412     {
10413       exp->X_op = O_symbol;
10414       r = BFD_RELOC_32_SECREL;
10415     }
10416   else if (exp->X_op == O_secidx)
10417     r = BFD_RELOC_16_SECIDX;
10418 #endif
10419
10420   fix_new_exp (frag, off, len, exp, 0, r);
10421 }
10422
10423 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
10424    purpose of the `.dc.a' internal pseudo-op.  */
10425
10426 int
10427 x86_address_bytes (void)
10428 {
10429   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
10430     return 4;
10431   return stdoutput->arch_info->bits_per_address / 8;
10432 }
10433
10434 #if (!(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
10435      || defined (LEX_AT)) && !defined (TE_PE)
10436 # define lex_got(reloc, adjust, types) NULL
10437 #else
10438 /* Parse operands of the form
10439    <symbol>@GOTOFF+<nnn>
10440    and similar .plt or .got references.
10441
10442    If we find one, set up the correct relocation in RELOC and copy the
10443    input string, minus the `@GOTOFF' into a malloc'd buffer for
10444    parsing by the calling routine.  Return this buffer, and if ADJUST
10445    is non-null set it to the length of the string we removed from the
10446    input line.  Otherwise return NULL.  */
10447 static char *
10448 lex_got (enum bfd_reloc_code_real *rel,
10449          int *adjust,
10450          i386_operand_type *types)
10451 {
10452   /* Some of the relocations depend on the size of what field is to
10453      be relocated.  But in our callers i386_immediate and i386_displacement
10454      we don't yet know the operand size (this will be set by insn
10455      matching).  Hence we record the word32 relocation here,
10456      and adjust the reloc according to the real size in reloc().  */
10457   static const struct
10458   {
10459     const char *str;
10460     int len;
10461     const enum bfd_reloc_code_real rel[2];
10462     const i386_operand_type types64;
10463     bool need_GOT_symbol;
10464   }
10465     gotrel[] =
10466   {
10467
10468 #define OPERAND_TYPE_IMM32_32S_DISP32 { .bitfield = \
10469   { .imm32 = 1, .imm32s = 1, .disp32 = 1 } }
10470 #define OPERAND_TYPE_IMM32_32S_64_DISP32 { .bitfield = \
10471   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1 } }
10472 #define OPERAND_TYPE_IMM32_32S_64_DISP32_64 { .bitfield = \
10473   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1, .disp64 = 1 } }
10474 #define OPERAND_TYPE_IMM64_DISP64 { .bitfield = \
10475   { .imm64 = 1, .disp64 = 1 } }
10476
10477 #ifndef TE_PE
10478 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10479     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
10480                                         BFD_RELOC_SIZE32 },
10481       { .bitfield = { .imm32 = 1, .imm64 = 1 } }, false },
10482 #endif
10483     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
10484                                        BFD_RELOC_X86_64_PLTOFF64 },
10485       { .bitfield = { .imm64 = 1 } }, true },
10486     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
10487                                        BFD_RELOC_X86_64_PLT32    },
10488       OPERAND_TYPE_IMM32_32S_DISP32, false },
10489     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
10490                                        BFD_RELOC_X86_64_GOTPLT64 },
10491       OPERAND_TYPE_IMM64_DISP64, true },
10492     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
10493                                        BFD_RELOC_X86_64_GOTOFF64 },
10494       OPERAND_TYPE_IMM64_DISP64, true },
10495     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
10496                                        BFD_RELOC_X86_64_GOTPCREL },
10497       OPERAND_TYPE_IMM32_32S_DISP32, true },
10498     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
10499                                        BFD_RELOC_X86_64_TLSGD    },
10500       OPERAND_TYPE_IMM32_32S_DISP32, true },
10501     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
10502                                        _dummy_first_bfd_reloc_code_real },
10503       OPERAND_TYPE_NONE, true },
10504     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
10505                                        BFD_RELOC_X86_64_TLSLD    },
10506       OPERAND_TYPE_IMM32_32S_DISP32, true },
10507     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
10508                                        BFD_RELOC_X86_64_GOTTPOFF },
10509       OPERAND_TYPE_IMM32_32S_DISP32, true },
10510     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
10511                                        BFD_RELOC_X86_64_TPOFF32  },
10512       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10513     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
10514                                        _dummy_first_bfd_reloc_code_real },
10515       OPERAND_TYPE_NONE, true },
10516     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
10517                                        BFD_RELOC_X86_64_DTPOFF32 },
10518       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10519     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
10520                                        _dummy_first_bfd_reloc_code_real },
10521       OPERAND_TYPE_NONE, true },
10522     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
10523                                        _dummy_first_bfd_reloc_code_real },
10524       OPERAND_TYPE_NONE, true },
10525     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
10526                                        BFD_RELOC_X86_64_GOT32    },
10527       OPERAND_TYPE_IMM32_32S_64_DISP32, true },
10528     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
10529                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
10530       OPERAND_TYPE_IMM32_32S_DISP32, true },
10531     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
10532                                        BFD_RELOC_X86_64_TLSDESC_CALL },
10533       OPERAND_TYPE_IMM32_32S_DISP32, true },
10534 #else /* TE_PE */
10535     { STRING_COMMA_LEN ("SECREL32"), { BFD_RELOC_32_SECREL,
10536                                        BFD_RELOC_32_SECREL },
10537       OPERAND_TYPE_IMM32_32S_64_DISP32_64, false },
10538 #endif
10539
10540 #undef OPERAND_TYPE_IMM32_32S_DISP32
10541 #undef OPERAND_TYPE_IMM32_32S_64_DISP32
10542 #undef OPERAND_TYPE_IMM32_32S_64_DISP32_64
10543 #undef OPERAND_TYPE_IMM64_DISP64
10544
10545   };
10546   char *cp;
10547   unsigned int j;
10548
10549 #if defined (OBJ_MAYBE_ELF) && !defined (TE_PE)
10550   if (!IS_ELF)
10551     return NULL;
10552 #endif
10553
10554   for (cp = input_line_pointer; *cp != '@'; cp++)
10555     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
10556       return NULL;
10557
10558   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
10559     {
10560       int len = gotrel[j].len;
10561       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
10562         {
10563           if (gotrel[j].rel[object_64bit] != 0)
10564             {
10565               int first, second;
10566               char *tmpbuf, *past_reloc;
10567
10568               *rel = gotrel[j].rel[object_64bit];
10569
10570               if (types)
10571                 {
10572                   if (flag_code != CODE_64BIT)
10573                     {
10574                       types->bitfield.imm32 = 1;
10575                       types->bitfield.disp32 = 1;
10576                     }
10577                   else
10578                     *types = gotrel[j].types64;
10579                 }
10580
10581               if (gotrel[j].need_GOT_symbol && GOT_symbol == NULL)
10582                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
10583
10584               /* The length of the first part of our input line.  */
10585               first = cp - input_line_pointer;
10586
10587               /* The second part goes from after the reloc token until
10588                  (and including) an end_of_line char or comma.  */
10589               past_reloc = cp + 1 + len;
10590               cp = past_reloc;
10591               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
10592                 ++cp;
10593               second = cp + 1 - past_reloc;
10594
10595               /* Allocate and copy string.  The trailing NUL shouldn't
10596                  be necessary, but be safe.  */
10597               tmpbuf = XNEWVEC (char, first + second + 2);
10598               memcpy (tmpbuf, input_line_pointer, first);
10599               if (second != 0 && *past_reloc != ' ')
10600                 /* Replace the relocation token with ' ', so that
10601                    errors like foo@GOTOFF1 will be detected.  */
10602                 tmpbuf[first++] = ' ';
10603               else
10604                 /* Increment length by 1 if the relocation token is
10605                    removed.  */
10606                 len++;
10607               if (adjust)
10608                 *adjust = len;
10609               memcpy (tmpbuf + first, past_reloc, second);
10610               tmpbuf[first + second] = '\0';
10611               return tmpbuf;
10612             }
10613
10614           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10615                   gotrel[j].str, 1 << (5 + object_64bit));
10616           return NULL;
10617         }
10618     }
10619
10620   /* Might be a symbol version string.  Don't as_bad here.  */
10621   return NULL;
10622 }
10623 #endif
10624
10625 bfd_reloc_code_real_type
10626 x86_cons (expressionS *exp, int size)
10627 {
10628   bfd_reloc_code_real_type got_reloc = NO_RELOC;
10629
10630 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
10631       && !defined (LEX_AT)) \
10632     || defined (TE_PE)
10633   intel_syntax = -intel_syntax;
10634
10635   exp->X_md = 0;
10636   if (size == 4 || (object_64bit && size == 8))
10637     {
10638       /* Handle @GOTOFF and the like in an expression.  */
10639       char *save;
10640       char *gotfree_input_line;
10641       int adjust = 0;
10642
10643       save = input_line_pointer;
10644       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
10645       if (gotfree_input_line)
10646         input_line_pointer = gotfree_input_line;
10647
10648       expression (exp);
10649
10650       if (gotfree_input_line)
10651         {
10652           /* expression () has merrily parsed up to the end of line,
10653              or a comma - in the wrong buffer.  Transfer how far
10654              input_line_pointer has moved to the right buffer.  */
10655           input_line_pointer = (save
10656                                 + (input_line_pointer - gotfree_input_line)
10657                                 + adjust);
10658           free (gotfree_input_line);
10659           if (exp->X_op == O_constant
10660               || exp->X_op == O_absent
10661               || exp->X_op == O_illegal
10662               || exp->X_op == O_register
10663               || exp->X_op == O_big)
10664             {
10665               char c = *input_line_pointer;
10666               *input_line_pointer = 0;
10667               as_bad (_("missing or invalid expression `%s'"), save);
10668               *input_line_pointer = c;
10669             }
10670           else if ((got_reloc == BFD_RELOC_386_PLT32
10671                     || got_reloc == BFD_RELOC_X86_64_PLT32)
10672                    && exp->X_op != O_symbol)
10673             {
10674               char c = *input_line_pointer;
10675               *input_line_pointer = 0;
10676               as_bad (_("invalid PLT expression `%s'"), save);
10677               *input_line_pointer = c;
10678             }
10679         }
10680     }
10681   else
10682     expression (exp);
10683
10684   intel_syntax = -intel_syntax;
10685
10686   if (intel_syntax)
10687     i386_intel_simplify (exp);
10688 #else
10689   expression (exp);
10690 #endif
10691
10692   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
10693   if (size == 4 && exp->X_op == O_constant && !object_64bit)
10694     exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10695
10696   return got_reloc;
10697 }
10698
10699 static void
10700 signed_cons (int size)
10701 {
10702   if (object_64bit)
10703     cons_sign = 1;
10704   cons (size);
10705   cons_sign = -1;
10706 }
10707
10708 #ifdef TE_PE
10709 static void
10710 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
10711 {
10712   expressionS exp;
10713
10714   do
10715     {
10716       expression (&exp);
10717       if (exp.X_op == O_symbol)
10718         exp.X_op = O_secrel;
10719
10720       emit_expr (&exp, 4);
10721     }
10722   while (*input_line_pointer++ == ',');
10723
10724   input_line_pointer--;
10725   demand_empty_rest_of_line ();
10726 }
10727
10728 static void
10729 pe_directive_secidx (int dummy ATTRIBUTE_UNUSED)
10730 {
10731   expressionS exp;
10732
10733   do
10734     {
10735       expression (&exp);
10736       if (exp.X_op == O_symbol)
10737         exp.X_op = O_secidx;
10738
10739       emit_expr (&exp, 2);
10740     }
10741   while (*input_line_pointer++ == ',');
10742
10743   input_line_pointer--;
10744   demand_empty_rest_of_line ();
10745 }
10746 #endif
10747
10748 /* Handle Rounding Control / SAE specifiers.  */
10749
10750 static char *
10751 RC_SAE_specifier (const char *pstr)
10752 {
10753   unsigned int j;
10754
10755   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
10756     {
10757       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
10758         {
10759           if (i.rounding.type != rc_none)
10760             {
10761               as_bad (_("duplicated `{%s}'"), RC_NamesTable[j].name);
10762               return NULL;
10763             }
10764
10765           i.rounding.type = RC_NamesTable[j].type;
10766
10767           return (char *)(pstr + RC_NamesTable[j].len);
10768         }
10769     }
10770
10771   return NULL;
10772 }
10773
10774 /* Handle Vector operations.  */
10775
10776 static char *
10777 check_VecOperations (char *op_string)
10778 {
10779   const reg_entry *mask;
10780   const char *saved;
10781   char *end_op;
10782
10783   while (*op_string)
10784     {
10785       saved = op_string;
10786       if (*op_string == '{')
10787         {
10788           op_string++;
10789
10790           /* Check broadcasts.  */
10791           if (startswith (op_string, "1to"))
10792             {
10793               unsigned int bcst_type;
10794
10795               if (i.broadcast.type)
10796                 goto duplicated_vec_op;
10797
10798               op_string += 3;
10799               if (*op_string == '8')
10800                 bcst_type = 8;
10801               else if (*op_string == '4')
10802                 bcst_type = 4;
10803               else if (*op_string == '2')
10804                 bcst_type = 2;
10805               else if (*op_string == '1'
10806                        && *(op_string+1) == '6')
10807                 {
10808                   bcst_type = 16;
10809                   op_string++;
10810                 }
10811               else if (*op_string == '3'
10812                        && *(op_string+1) == '2')
10813                 {
10814                   bcst_type = 32;
10815                   op_string++;
10816                 }
10817               else
10818                 {
10819                   as_bad (_("Unsupported broadcast: `%s'"), saved);
10820                   return NULL;
10821                 }
10822               op_string++;
10823
10824               i.broadcast.type = bcst_type;
10825               i.broadcast.operand = this_operand;
10826             }
10827           /* Check masking operation.  */
10828           else if ((mask = parse_register (op_string, &end_op)) != NULL)
10829             {
10830               if (mask == &bad_reg)
10831                 return NULL;
10832
10833               /* k0 can't be used for write mask.  */
10834               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
10835                 {
10836                   as_bad (_("`%s%s' can't be used for write mask"),
10837                           register_prefix, mask->reg_name);
10838                   return NULL;
10839                 }
10840
10841               if (!i.mask.reg)
10842                 {
10843                   i.mask.reg = mask;
10844                   i.mask.operand = this_operand;
10845                 }
10846               else if (i.mask.reg->reg_num)
10847                 goto duplicated_vec_op;
10848               else
10849                 {
10850                   i.mask.reg = mask;
10851
10852                   /* Only "{z}" is allowed here.  No need to check
10853                      zeroing mask explicitly.  */
10854                   if (i.mask.operand != (unsigned int) this_operand)
10855                     {
10856                       as_bad (_("invalid write mask `%s'"), saved);
10857                       return NULL;
10858                     }
10859                 }
10860
10861               op_string = end_op;
10862             }
10863           /* Check zeroing-flag for masking operation.  */
10864           else if (*op_string == 'z')
10865             {
10866               if (!i.mask.reg)
10867                 {
10868                   i.mask.reg = reg_k0;
10869                   i.mask.zeroing = 1;
10870                   i.mask.operand = this_operand;
10871                 }
10872               else
10873                 {
10874                   if (i.mask.zeroing)
10875                     {
10876                     duplicated_vec_op:
10877                       as_bad (_("duplicated `%s'"), saved);
10878                       return NULL;
10879                     }
10880
10881                   i.mask.zeroing = 1;
10882
10883                   /* Only "{%k}" is allowed here.  No need to check mask
10884                      register explicitly.  */
10885                   if (i.mask.operand != (unsigned int) this_operand)
10886                     {
10887                       as_bad (_("invalid zeroing-masking `%s'"),
10888                               saved);
10889                       return NULL;
10890                     }
10891                 }
10892
10893               op_string++;
10894             }
10895           else if (intel_syntax
10896                    && (op_string = RC_SAE_specifier (op_string)) != NULL)
10897             i.rounding.modifier = true;
10898           else
10899             goto unknown_vec_op;
10900
10901           if (*op_string != '}')
10902             {
10903               as_bad (_("missing `}' in `%s'"), saved);
10904               return NULL;
10905             }
10906           op_string++;
10907
10908           /* Strip whitespace since the addition of pseudo prefixes
10909              changed how the scrubber treats '{'.  */
10910           if (is_space_char (*op_string))
10911             ++op_string;
10912
10913           continue;
10914         }
10915     unknown_vec_op:
10916       /* We don't know this one.  */
10917       as_bad (_("unknown vector operation: `%s'"), saved);
10918       return NULL;
10919     }
10920
10921   if (i.mask.reg && i.mask.zeroing && !i.mask.reg->reg_num)
10922     {
10923       as_bad (_("zeroing-masking only allowed with write mask"));
10924       return NULL;
10925     }
10926
10927   return op_string;
10928 }
10929
10930 static int
10931 i386_immediate (char *imm_start)
10932 {
10933   char *save_input_line_pointer;
10934   char *gotfree_input_line;
10935   segT exp_seg = 0;
10936   expressionS *exp;
10937   i386_operand_type types;
10938
10939   operand_type_set (&types, ~0);
10940
10941   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
10942     {
10943       as_bad (_("at most %d immediate operands are allowed"),
10944               MAX_IMMEDIATE_OPERANDS);
10945       return 0;
10946     }
10947
10948   exp = &im_expressions[i.imm_operands++];
10949   i.op[this_operand].imms = exp;
10950
10951   if (is_space_char (*imm_start))
10952     ++imm_start;
10953
10954   save_input_line_pointer = input_line_pointer;
10955   input_line_pointer = imm_start;
10956
10957   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10958   if (gotfree_input_line)
10959     input_line_pointer = gotfree_input_line;
10960
10961   exp_seg = expression (exp);
10962
10963   SKIP_WHITESPACE ();
10964   if (*input_line_pointer)
10965     as_bad (_("junk `%s' after expression"), input_line_pointer);
10966
10967   input_line_pointer = save_input_line_pointer;
10968   if (gotfree_input_line)
10969     {
10970       free (gotfree_input_line);
10971
10972       if (exp->X_op == O_constant)
10973         exp->X_op = O_illegal;
10974     }
10975
10976   if (exp_seg == reg_section)
10977     {
10978       as_bad (_("illegal immediate register operand %s"), imm_start);
10979       return 0;
10980     }
10981
10982   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
10983 }
10984
10985 static int
10986 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10987                          i386_operand_type types, const char *imm_start)
10988 {
10989   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
10990     {
10991       if (imm_start)
10992         as_bad (_("missing or invalid immediate expression `%s'"),
10993                 imm_start);
10994       return 0;
10995     }
10996   else if (exp->X_op == O_constant)
10997     {
10998       /* Size it properly later.  */
10999       i.types[this_operand].bitfield.imm64 = 1;
11000
11001       /* If not 64bit, sign/zero extend val, to account for wraparound
11002          when !BFD64.  */
11003       if (flag_code != CODE_64BIT)
11004         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
11005     }
11006 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
11007   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
11008            && exp_seg != absolute_section
11009            && exp_seg != text_section
11010            && exp_seg != data_section
11011            && exp_seg != bss_section
11012            && exp_seg != undefined_section
11013            && !bfd_is_com_section (exp_seg))
11014     {
11015       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
11016       return 0;
11017     }
11018 #endif
11019   else
11020     {
11021       /* This is an address.  The size of the address will be
11022          determined later, depending on destination register,
11023          suffix, or the default for the section.  */
11024       i.types[this_operand].bitfield.imm8 = 1;
11025       i.types[this_operand].bitfield.imm16 = 1;
11026       i.types[this_operand].bitfield.imm32 = 1;
11027       i.types[this_operand].bitfield.imm32s = 1;
11028       i.types[this_operand].bitfield.imm64 = 1;
11029       i.types[this_operand] = operand_type_and (i.types[this_operand],
11030                                                 types);
11031     }
11032
11033   return 1;
11034 }
11035
11036 static char *
11037 i386_scale (char *scale)
11038 {
11039   offsetT val;
11040   char *save = input_line_pointer;
11041
11042   input_line_pointer = scale;
11043   val = get_absolute_expression ();
11044
11045   switch (val)
11046     {
11047     case 1:
11048       i.log2_scale_factor = 0;
11049       break;
11050     case 2:
11051       i.log2_scale_factor = 1;
11052       break;
11053     case 4:
11054       i.log2_scale_factor = 2;
11055       break;
11056     case 8:
11057       i.log2_scale_factor = 3;
11058       break;
11059     default:
11060       {
11061         char sep = *input_line_pointer;
11062
11063         *input_line_pointer = '\0';
11064         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
11065                 scale);
11066         *input_line_pointer = sep;
11067         input_line_pointer = save;
11068         return NULL;
11069       }
11070     }
11071   if (i.log2_scale_factor != 0 && i.index_reg == 0)
11072     {
11073       as_warn (_("scale factor of %d without an index register"),
11074                1 << i.log2_scale_factor);
11075       i.log2_scale_factor = 0;
11076     }
11077   scale = input_line_pointer;
11078   input_line_pointer = save;
11079   return scale;
11080 }
11081
11082 static int
11083 i386_displacement (char *disp_start, char *disp_end)
11084 {
11085   expressionS *exp;
11086   segT exp_seg = 0;
11087   char *save_input_line_pointer;
11088   char *gotfree_input_line;
11089   int override;
11090   i386_operand_type bigdisp, types = anydisp;
11091   int ret;
11092
11093   if (i.disp_operands == MAX_MEMORY_OPERANDS)
11094     {
11095       as_bad (_("at most %d displacement operands are allowed"),
11096               MAX_MEMORY_OPERANDS);
11097       return 0;
11098     }
11099
11100   operand_type_set (&bigdisp, 0);
11101   if (i.jumpabsolute
11102       || i.types[this_operand].bitfield.baseindex
11103       || (current_templates->start->opcode_modifier.jump != JUMP
11104           && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
11105     {
11106       i386_addressing_mode ();
11107       override = (i.prefix[ADDR_PREFIX] != 0);
11108       if (flag_code == CODE_64BIT)
11109         {
11110           bigdisp.bitfield.disp32 = 1;
11111           if (!override)
11112             bigdisp.bitfield.disp64 = 1;
11113         }
11114       else if ((flag_code == CODE_16BIT) ^ override)
11115           bigdisp.bitfield.disp16 = 1;
11116       else
11117           bigdisp.bitfield.disp32 = 1;
11118     }
11119   else
11120     {
11121       /* For PC-relative branches, the width of the displacement may be
11122          dependent upon data size, but is never dependent upon address size.
11123          Also make sure to not unintentionally match against a non-PC-relative
11124          branch template.  */
11125       static templates aux_templates;
11126       const insn_template *t = current_templates->start;
11127       bool has_intel64 = false;
11128
11129       aux_templates.start = t;
11130       while (++t < current_templates->end)
11131         {
11132           if (t->opcode_modifier.jump
11133               != current_templates->start->opcode_modifier.jump)
11134             break;
11135           if ((t->opcode_modifier.isa64 >= INTEL64))
11136             has_intel64 = true;
11137         }
11138       if (t < current_templates->end)
11139         {
11140           aux_templates.end = t;
11141           current_templates = &aux_templates;
11142         }
11143
11144       override = (i.prefix[DATA_PREFIX] != 0);
11145       if (flag_code == CODE_64BIT)
11146         {
11147           if ((override || i.suffix == WORD_MNEM_SUFFIX)
11148               && (!intel64 || !has_intel64))
11149             bigdisp.bitfield.disp16 = 1;
11150           else
11151             bigdisp.bitfield.disp32 = 1;
11152         }
11153       else
11154         {
11155           if (!override)
11156             override = (i.suffix == (flag_code != CODE_16BIT
11157                                      ? WORD_MNEM_SUFFIX
11158                                      : LONG_MNEM_SUFFIX));
11159           bigdisp.bitfield.disp32 = 1;
11160           if ((flag_code == CODE_16BIT) ^ override)
11161             {
11162               bigdisp.bitfield.disp32 = 0;
11163               bigdisp.bitfield.disp16 = 1;
11164             }
11165         }
11166     }
11167   i.types[this_operand] = operand_type_or (i.types[this_operand],
11168                                            bigdisp);
11169
11170   exp = &disp_expressions[i.disp_operands];
11171   i.op[this_operand].disps = exp;
11172   i.disp_operands++;
11173   save_input_line_pointer = input_line_pointer;
11174   input_line_pointer = disp_start;
11175   END_STRING_AND_SAVE (disp_end);
11176
11177 #ifndef GCC_ASM_O_HACK
11178 #define GCC_ASM_O_HACK 0
11179 #endif
11180 #if GCC_ASM_O_HACK
11181   END_STRING_AND_SAVE (disp_end + 1);
11182   if (i.types[this_operand].bitfield.baseIndex
11183       && displacement_string_end[-1] == '+')
11184     {
11185       /* This hack is to avoid a warning when using the "o"
11186          constraint within gcc asm statements.
11187          For instance:
11188
11189          #define _set_tssldt_desc(n,addr,limit,type) \
11190          __asm__ __volatile__ ( \
11191          "movw %w2,%0\n\t" \
11192          "movw %w1,2+%0\n\t" \
11193          "rorl $16,%1\n\t" \
11194          "movb %b1,4+%0\n\t" \
11195          "movb %4,5+%0\n\t" \
11196          "movb $0,6+%0\n\t" \
11197          "movb %h1,7+%0\n\t" \
11198          "rorl $16,%1" \
11199          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
11200
11201          This works great except that the output assembler ends
11202          up looking a bit weird if it turns out that there is
11203          no offset.  You end up producing code that looks like:
11204
11205          #APP
11206          movw $235,(%eax)
11207          movw %dx,2+(%eax)
11208          rorl $16,%edx
11209          movb %dl,4+(%eax)
11210          movb $137,5+(%eax)
11211          movb $0,6+(%eax)
11212          movb %dh,7+(%eax)
11213          rorl $16,%edx
11214          #NO_APP
11215
11216          So here we provide the missing zero.  */
11217
11218       *displacement_string_end = '0';
11219     }
11220 #endif
11221   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
11222   if (gotfree_input_line)
11223     input_line_pointer = gotfree_input_line;
11224
11225   exp_seg = expression (exp);
11226
11227   SKIP_WHITESPACE ();
11228   if (*input_line_pointer)
11229     as_bad (_("junk `%s' after expression"), input_line_pointer);
11230 #if GCC_ASM_O_HACK
11231   RESTORE_END_STRING (disp_end + 1);
11232 #endif
11233   input_line_pointer = save_input_line_pointer;
11234   if (gotfree_input_line)
11235     {
11236       free (gotfree_input_line);
11237
11238       if (exp->X_op == O_constant || exp->X_op == O_register)
11239         exp->X_op = O_illegal;
11240     }
11241
11242   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
11243
11244   RESTORE_END_STRING (disp_end);
11245
11246   return ret;
11247 }
11248
11249 static int
11250 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
11251                             i386_operand_type types, const char *disp_start)
11252 {
11253   int ret = 1;
11254
11255   /* We do this to make sure that the section symbol is in
11256      the symbol table.  We will ultimately change the relocation
11257      to be relative to the beginning of the section.  */
11258   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
11259       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
11260       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11261     {
11262       if (exp->X_op != O_symbol)
11263         goto inv_disp;
11264
11265       if (S_IS_LOCAL (exp->X_add_symbol)
11266           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
11267           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
11268         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
11269       exp->X_op = O_subtract;
11270       exp->X_op_symbol = GOT_symbol;
11271       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
11272         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
11273       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11274         i.reloc[this_operand] = BFD_RELOC_64;
11275       else
11276         i.reloc[this_operand] = BFD_RELOC_32;
11277     }
11278
11279   else if (exp->X_op == O_absent
11280            || exp->X_op == O_illegal
11281            || exp->X_op == O_big)
11282     {
11283     inv_disp:
11284       as_bad (_("missing or invalid displacement expression `%s'"),
11285               disp_start);
11286       ret = 0;
11287     }
11288
11289   else if (exp->X_op == O_constant)
11290     {
11291       /* Sizing gets taken care of by optimize_disp().
11292
11293          If not 64bit, sign/zero extend val, to account for wraparound
11294          when !BFD64.  */
11295       if (flag_code != CODE_64BIT)
11296         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
11297     }
11298
11299 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
11300   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
11301            && exp_seg != absolute_section
11302            && exp_seg != text_section
11303            && exp_seg != data_section
11304            && exp_seg != bss_section
11305            && exp_seg != undefined_section
11306            && !bfd_is_com_section (exp_seg))
11307     {
11308       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
11309       ret = 0;
11310     }
11311 #endif
11312
11313   else if (current_templates->start->opcode_modifier.jump == JUMP_BYTE)
11314     i.types[this_operand].bitfield.disp8 = 1;
11315
11316   /* Check if this is a displacement only operand.  */
11317   if (!i.types[this_operand].bitfield.baseindex)
11318     i.types[this_operand] =
11319       operand_type_or (operand_type_and_not (i.types[this_operand], anydisp),
11320                        operand_type_and (i.types[this_operand], types));
11321
11322   return ret;
11323 }
11324
11325 /* Return the active addressing mode, taking address override and
11326    registers forming the address into consideration.  Update the
11327    address override prefix if necessary.  */
11328
11329 static enum flag_code
11330 i386_addressing_mode (void)
11331 {
11332   enum flag_code addr_mode;
11333
11334   if (i.prefix[ADDR_PREFIX])
11335     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
11336   else if (flag_code == CODE_16BIT
11337            && current_templates->start->cpu_flags.bitfield.cpumpx
11338            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
11339               from md_assemble() by "is not a valid base/index expression"
11340               when there is a base and/or index.  */
11341            && !i.types[this_operand].bitfield.baseindex)
11342     {
11343       /* MPX insn memory operands with neither base nor index must be forced
11344          to use 32-bit addressing in 16-bit mode.  */
11345       addr_mode = CODE_32BIT;
11346       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11347       ++i.prefixes;
11348       gas_assert (!i.types[this_operand].bitfield.disp16);
11349       gas_assert (!i.types[this_operand].bitfield.disp32);
11350     }
11351   else
11352     {
11353       addr_mode = flag_code;
11354
11355 #if INFER_ADDR_PREFIX
11356       if (i.mem_operands == 0)
11357         {
11358           /* Infer address prefix from the first memory operand.  */
11359           const reg_entry *addr_reg = i.base_reg;
11360
11361           if (addr_reg == NULL)
11362             addr_reg = i.index_reg;
11363
11364           if (addr_reg)
11365             {
11366               if (addr_reg->reg_type.bitfield.dword)
11367                 addr_mode = CODE_32BIT;
11368               else if (flag_code != CODE_64BIT
11369                        && addr_reg->reg_type.bitfield.word)
11370                 addr_mode = CODE_16BIT;
11371
11372               if (addr_mode != flag_code)
11373                 {
11374                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11375                   i.prefixes += 1;
11376                   /* Change the size of any displacement too.  At most one
11377                      of Disp16 or Disp32 is set.
11378                      FIXME.  There doesn't seem to be any real need for
11379                      separate Disp16 and Disp32 flags.  The same goes for
11380                      Imm16 and Imm32.  Removing them would probably clean
11381                      up the code quite a lot.  */
11382                   if (flag_code != CODE_64BIT
11383                       && (i.types[this_operand].bitfield.disp16
11384                           || i.types[this_operand].bitfield.disp32))
11385                     {
11386                       static const i386_operand_type disp16_32 = {
11387                         .bitfield = { .disp16 = 1, .disp32 = 1 }
11388                       };
11389
11390                       i.types[this_operand]
11391                         = operand_type_xor (i.types[this_operand], disp16_32);
11392                     }
11393                 }
11394             }
11395         }
11396 #endif
11397     }
11398
11399   return addr_mode;
11400 }
11401
11402 /* Make sure the memory operand we've been dealt is valid.
11403    Return 1 on success, 0 on a failure.  */
11404
11405 static int
11406 i386_index_check (const char *operand_string)
11407 {
11408   const char *kind = "base/index";
11409   enum flag_code addr_mode = i386_addressing_mode ();
11410   const insn_template *t = current_templates->end - 1;
11411
11412   if (t->opcode_modifier.isstring)
11413     {
11414       /* Memory operands of string insns are special in that they only allow
11415          a single register (rDI, rSI, or rBX) as their memory address.  */
11416       const reg_entry *expected_reg;
11417       static const char *di_si[][2] =
11418         {
11419           { "esi", "edi" },
11420           { "si", "di" },
11421           { "rsi", "rdi" }
11422         };
11423       static const char *bx[] = { "ebx", "bx", "rbx" };
11424
11425       kind = "string address";
11426
11427       if (t->opcode_modifier.prefixok == PrefixRep)
11428         {
11429           int es_op = t->opcode_modifier.isstring - IS_STRING_ES_OP0;
11430           int op = 0;
11431
11432           if (!t->operand_types[0].bitfield.baseindex
11433               || ((!i.mem_operands != !intel_syntax)
11434                   && t->operand_types[1].bitfield.baseindex))
11435             op = 1;
11436           expected_reg
11437             = (const reg_entry *) str_hash_find (reg_hash,
11438                                                  di_si[addr_mode][op == es_op]);
11439         }
11440       else
11441         expected_reg
11442           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
11443
11444       if (i.base_reg != expected_reg
11445           || i.index_reg
11446           || operand_type_check (i.types[this_operand], disp))
11447         {
11448           /* The second memory operand must have the same size as
11449              the first one.  */
11450           if (i.mem_operands
11451               && i.base_reg
11452               && !((addr_mode == CODE_64BIT
11453                     && i.base_reg->reg_type.bitfield.qword)
11454                    || (addr_mode == CODE_32BIT
11455                        ? i.base_reg->reg_type.bitfield.dword
11456                        : i.base_reg->reg_type.bitfield.word)))
11457             goto bad_address;
11458
11459           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
11460                    operand_string,
11461                    intel_syntax ? '[' : '(',
11462                    register_prefix,
11463                    expected_reg->reg_name,
11464                    intel_syntax ? ']' : ')');
11465           return 1;
11466         }
11467       else
11468         return 1;
11469
11470     bad_address:
11471       as_bad (_("`%s' is not a valid %s expression"),
11472               operand_string, kind);
11473       return 0;
11474     }
11475   else
11476     {
11477       t = current_templates->start;
11478
11479       if (addr_mode != CODE_16BIT)
11480         {
11481           /* 32-bit/64-bit checks.  */
11482           if (i.disp_encoding == disp_encoding_16bit)
11483             {
11484             bad_disp:
11485               as_bad (_("invalid `%s' prefix"),
11486                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
11487               return 0;
11488             }
11489
11490           if ((i.base_reg
11491                && ((addr_mode == CODE_64BIT
11492                     ? !i.base_reg->reg_type.bitfield.qword
11493                     : !i.base_reg->reg_type.bitfield.dword)
11494                    || (i.index_reg && i.base_reg->reg_num == RegIP)
11495                    || i.base_reg->reg_num == RegIZ))
11496               || (i.index_reg
11497                   && !i.index_reg->reg_type.bitfield.xmmword
11498                   && !i.index_reg->reg_type.bitfield.ymmword
11499                   && !i.index_reg->reg_type.bitfield.zmmword
11500                   && ((addr_mode == CODE_64BIT
11501                        ? !i.index_reg->reg_type.bitfield.qword
11502                        : !i.index_reg->reg_type.bitfield.dword)
11503                       || !i.index_reg->reg_type.bitfield.baseindex)))
11504             goto bad_address;
11505
11506           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
11507           if (t->mnem_off == MN_bndmk
11508               || t->mnem_off == MN_bndldx
11509               || t->mnem_off == MN_bndstx
11510               || t->opcode_modifier.sib == SIBMEM)
11511             {
11512               /* They cannot use RIP-relative addressing. */
11513               if (i.base_reg && i.base_reg->reg_num == RegIP)
11514                 {
11515                   as_bad (_("`%s' cannot be used here"), operand_string);
11516                   return 0;
11517                 }
11518
11519               /* bndldx and bndstx ignore their scale factor. */
11520               if ((t->mnem_off == MN_bndldx || t->mnem_off == MN_bndstx)
11521                   && i.log2_scale_factor)
11522                 as_warn (_("register scaling is being ignored here"));
11523             }
11524         }
11525       else
11526         {
11527           /* 16-bit checks.  */
11528           if (i.disp_encoding == disp_encoding_32bit)
11529             goto bad_disp;
11530
11531           if ((i.base_reg
11532                && (!i.base_reg->reg_type.bitfield.word
11533                    || !i.base_reg->reg_type.bitfield.baseindex))
11534               || (i.index_reg
11535                   && (!i.index_reg->reg_type.bitfield.word
11536                       || !i.index_reg->reg_type.bitfield.baseindex
11537                       || !(i.base_reg
11538                            && i.base_reg->reg_num < 6
11539                            && i.index_reg->reg_num >= 6
11540                            && i.log2_scale_factor == 0))))
11541             goto bad_address;
11542         }
11543     }
11544   return 1;
11545 }
11546
11547 /* Handle vector immediates.  */
11548
11549 static int
11550 RC_SAE_immediate (const char *imm_start)
11551 {
11552   const char *pstr = imm_start;
11553
11554   if (*pstr != '{')
11555     return 0;
11556
11557   pstr = RC_SAE_specifier (pstr + 1);
11558   if (pstr == NULL)
11559     return 0;
11560
11561   if (*pstr++ != '}')
11562     {
11563       as_bad (_("Missing '}': '%s'"), imm_start);
11564       return 0;
11565     }
11566   /* RC/SAE immediate string should contain nothing more.  */;
11567   if (*pstr != 0)
11568     {
11569       as_bad (_("Junk after '}': '%s'"), imm_start);
11570       return 0;
11571     }
11572
11573   /* Internally this doesn't count as an operand.  */
11574   --i.operands;
11575
11576   return 1;
11577 }
11578
11579 static INLINE bool starts_memory_operand (char c)
11580 {
11581   return ISDIGIT (c)
11582          || is_identifier_char (c)
11583          || strchr ("([\"+-!~", c);
11584 }
11585
11586 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
11587    on error.  */
11588
11589 static int
11590 i386_att_operand (char *operand_string)
11591 {
11592   const reg_entry *r;
11593   char *end_op;
11594   char *op_string = operand_string;
11595
11596   if (is_space_char (*op_string))
11597     ++op_string;
11598
11599   /* We check for an absolute prefix (differentiating,
11600      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
11601   if (*op_string == ABSOLUTE_PREFIX)
11602     {
11603       ++op_string;
11604       if (is_space_char (*op_string))
11605         ++op_string;
11606       i.jumpabsolute = true;
11607     }
11608
11609   /* Check if operand is a register.  */
11610   if ((r = parse_register (op_string, &end_op)) != NULL)
11611     {
11612       i386_operand_type temp;
11613
11614       if (r == &bad_reg)
11615         return 0;
11616
11617       /* Check for a segment override by searching for ':' after a
11618          segment register.  */
11619       op_string = end_op;
11620       if (is_space_char (*op_string))
11621         ++op_string;
11622       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
11623         {
11624           i.seg[i.mem_operands] = r;
11625
11626           /* Skip the ':' and whitespace.  */
11627           ++op_string;
11628           if (is_space_char (*op_string))
11629             ++op_string;
11630
11631           /* Handle case of %es:*foo.  */
11632           if (!i.jumpabsolute && *op_string == ABSOLUTE_PREFIX)
11633             {
11634               ++op_string;
11635               if (is_space_char (*op_string))
11636                 ++op_string;
11637               i.jumpabsolute = true;
11638             }
11639
11640           if (!starts_memory_operand (*op_string))
11641             {
11642               as_bad (_("bad memory operand `%s'"), op_string);
11643               return 0;
11644             }
11645           goto do_memory_reference;
11646         }
11647
11648       /* Handle vector operations.  */
11649       if (*op_string == '{')
11650         {
11651           op_string = check_VecOperations (op_string);
11652           if (op_string == NULL)
11653             return 0;
11654         }
11655
11656       if (*op_string)
11657         {
11658           as_bad (_("junk `%s' after register"), op_string);
11659           return 0;
11660         }
11661       temp = r->reg_type;
11662       temp.bitfield.baseindex = 0;
11663       i.types[this_operand] = operand_type_or (i.types[this_operand],
11664                                                temp);
11665       i.types[this_operand].bitfield.unspecified = 0;
11666       i.op[this_operand].regs = r;
11667       i.reg_operands++;
11668
11669       /* A GPR may follow an RC or SAE immediate only if a (vector) register
11670          operand was also present earlier on.  */
11671       if (i.rounding.type != rc_none && temp.bitfield.class == Reg
11672           && i.reg_operands == 1)
11673         {
11674           unsigned int j;
11675
11676           for (j = 0; j < ARRAY_SIZE (RC_NamesTable); ++j)
11677             if (i.rounding.type == RC_NamesTable[j].type)
11678               break;
11679           as_bad (_("`%s': misplaced `{%s}'"),
11680                   insn_name (current_templates->start), RC_NamesTable[j].name);
11681           return 0;
11682         }
11683     }
11684   else if (*op_string == REGISTER_PREFIX)
11685     {
11686       as_bad (_("bad register name `%s'"), op_string);
11687       return 0;
11688     }
11689   else if (*op_string == IMMEDIATE_PREFIX)
11690     {
11691       ++op_string;
11692       if (i.jumpabsolute)
11693         {
11694           as_bad (_("immediate operand illegal with absolute jump"));
11695           return 0;
11696         }
11697       if (!i386_immediate (op_string))
11698         return 0;
11699       if (i.rounding.type != rc_none)
11700         {
11701           as_bad (_("`%s': RC/SAE operand must follow immediate operands"),
11702                   insn_name (current_templates->start));
11703           return 0;
11704         }
11705     }
11706   else if (RC_SAE_immediate (operand_string))
11707     {
11708       /* If it is a RC or SAE immediate, do the necessary placement check:
11709          Only another immediate or a GPR may precede it.  */
11710       if (i.mem_operands || i.reg_operands + i.imm_operands > 1
11711           || (i.reg_operands == 1
11712               && i.op[0].regs->reg_type.bitfield.class != Reg))
11713         {
11714           as_bad (_("`%s': misplaced `%s'"),
11715                   insn_name (current_templates->start), operand_string);
11716           return 0;
11717         }
11718     }
11719   else if (starts_memory_operand (*op_string))
11720     {
11721       /* This is a memory reference of some sort.  */
11722       char *base_string;
11723
11724       /* Start and end of displacement string expression (if found).  */
11725       char *displacement_string_start;
11726       char *displacement_string_end;
11727
11728     do_memory_reference:
11729       /* Check for base index form.  We detect the base index form by
11730          looking for an ')' at the end of the operand, searching
11731          for the '(' matching it, and finding a REGISTER_PREFIX or ','
11732          after the '('.  */
11733       base_string = op_string + strlen (op_string);
11734
11735       /* Handle vector operations.  */
11736       --base_string;
11737       if (is_space_char (*base_string))
11738         --base_string;
11739
11740       if (*base_string == '}')
11741         {
11742           char *vop_start = NULL;
11743
11744           while (base_string-- > op_string)
11745             {
11746               if (*base_string == '"')
11747                 break;
11748               if (*base_string != '{')
11749                 continue;
11750
11751               vop_start = base_string;
11752
11753               --base_string;
11754               if (is_space_char (*base_string))
11755                 --base_string;
11756
11757               if (*base_string != '}')
11758                 break;
11759
11760               vop_start = NULL;
11761             }
11762
11763           if (!vop_start)
11764             {
11765               as_bad (_("unbalanced figure braces"));
11766               return 0;
11767             }
11768
11769           if (check_VecOperations (vop_start) == NULL)
11770             return 0;
11771         }
11772
11773       /* If we only have a displacement, set-up for it to be parsed later.  */
11774       displacement_string_start = op_string;
11775       displacement_string_end = base_string + 1;
11776
11777       if (*base_string == ')')
11778         {
11779           char *temp_string;
11780           unsigned int parens_not_balanced = 0;
11781           bool in_quotes = false;
11782
11783           /* We've already checked that the number of left & right ()'s are
11784              equal, and that there's a matching set of double quotes.  */
11785           end_op = base_string;
11786           for (temp_string = op_string; temp_string < end_op; temp_string++)
11787             {
11788               if (*temp_string == '\\' && temp_string[1] == '"')
11789                 ++temp_string;
11790               else if (*temp_string == '"')
11791                 in_quotes = !in_quotes;
11792               else if (!in_quotes)
11793                 {
11794                   if (*temp_string == '(' && !parens_not_balanced++)
11795                     base_string = temp_string;
11796                   if (*temp_string == ')')
11797                     --parens_not_balanced;
11798                 }
11799             }
11800
11801           temp_string = base_string;
11802
11803           /* Skip past '(' and whitespace.  */
11804           gas_assert (*base_string == '(');
11805           ++base_string;
11806           if (is_space_char (*base_string))
11807             ++base_string;
11808
11809           if (*base_string == ','
11810               || ((i.base_reg = parse_register (base_string, &end_op))
11811                   != NULL))
11812             {
11813               displacement_string_end = temp_string;
11814
11815               i.types[this_operand].bitfield.baseindex = 1;
11816
11817               if (i.base_reg)
11818                 {
11819                   if (i.base_reg == &bad_reg)
11820                     return 0;
11821                   base_string = end_op;
11822                   if (is_space_char (*base_string))
11823                     ++base_string;
11824                 }
11825
11826               /* There may be an index reg or scale factor here.  */
11827               if (*base_string == ',')
11828                 {
11829                   ++base_string;
11830                   if (is_space_char (*base_string))
11831                     ++base_string;
11832
11833                   if ((i.index_reg = parse_register (base_string, &end_op))
11834                       != NULL)
11835                     {
11836                       if (i.index_reg == &bad_reg)
11837                         return 0;
11838                       base_string = end_op;
11839                       if (is_space_char (*base_string))
11840                         ++base_string;
11841                       if (*base_string == ',')
11842                         {
11843                           ++base_string;
11844                           if (is_space_char (*base_string))
11845                             ++base_string;
11846                         }
11847                       else if (*base_string != ')')
11848                         {
11849                           as_bad (_("expecting `,' or `)' "
11850                                     "after index register in `%s'"),
11851                                   operand_string);
11852                           return 0;
11853                         }
11854                     }
11855                   else if (*base_string == REGISTER_PREFIX)
11856                     {
11857                       end_op = strchr (base_string, ',');
11858                       if (end_op)
11859                         *end_op = '\0';
11860                       as_bad (_("bad register name `%s'"), base_string);
11861                       return 0;
11862                     }
11863
11864                   /* Check for scale factor.  */
11865                   if (*base_string != ')')
11866                     {
11867                       char *end_scale = i386_scale (base_string);
11868
11869                       if (!end_scale)
11870                         return 0;
11871
11872                       base_string = end_scale;
11873                       if (is_space_char (*base_string))
11874                         ++base_string;
11875                       if (*base_string != ')')
11876                         {
11877                           as_bad (_("expecting `)' "
11878                                     "after scale factor in `%s'"),
11879                                   operand_string);
11880                           return 0;
11881                         }
11882                     }
11883                   else if (!i.index_reg)
11884                     {
11885                       as_bad (_("expecting index register or scale factor "
11886                                 "after `,'; got '%c'"),
11887                               *base_string);
11888                       return 0;
11889                     }
11890                 }
11891               else if (*base_string != ')')
11892                 {
11893                   as_bad (_("expecting `,' or `)' "
11894                             "after base register in `%s'"),
11895                           operand_string);
11896                   return 0;
11897                 }
11898             }
11899           else if (*base_string == REGISTER_PREFIX)
11900             {
11901               end_op = strchr (base_string, ',');
11902               if (end_op)
11903                 *end_op = '\0';
11904               as_bad (_("bad register name `%s'"), base_string);
11905               return 0;
11906             }
11907         }
11908
11909       /* If there's an expression beginning the operand, parse it,
11910          assuming displacement_string_start and
11911          displacement_string_end are meaningful.  */
11912       if (displacement_string_start != displacement_string_end)
11913         {
11914           if (!i386_displacement (displacement_string_start,
11915                                   displacement_string_end))
11916             return 0;
11917         }
11918
11919       /* Special case for (%dx) while doing input/output op.  */
11920       if (i.base_reg
11921           && i.base_reg->reg_type.bitfield.instance == RegD
11922           && i.base_reg->reg_type.bitfield.word
11923           && i.index_reg == 0
11924           && i.log2_scale_factor == 0
11925           && i.seg[i.mem_operands] == 0
11926           && !operand_type_check (i.types[this_operand], disp))
11927         {
11928           i.types[this_operand] = i.base_reg->reg_type;
11929           i.input_output_operand = true;
11930           return 1;
11931         }
11932
11933       if (i386_index_check (operand_string) == 0)
11934         return 0;
11935       i.flags[this_operand] |= Operand_Mem;
11936       i.mem_operands++;
11937     }
11938   else
11939     {
11940       /* It's not a memory operand; argh!  */
11941       as_bad (_("invalid char %s beginning operand %d `%s'"),
11942               output_invalid (*op_string),
11943               this_operand + 1,
11944               op_string);
11945       return 0;
11946     }
11947   return 1;                     /* Normal return.  */
11948 }
11949 \f
11950 /* Calculate the maximum variable size (i.e., excluding fr_fix)
11951    that an rs_machine_dependent frag may reach.  */
11952
11953 unsigned int
11954 i386_frag_max_var (fragS *frag)
11955 {
11956   /* The only relaxable frags are for jumps.
11957      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
11958   gas_assert (frag->fr_type == rs_machine_dependent);
11959   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
11960 }
11961
11962 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11963 static int
11964 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
11965 {
11966   /* STT_GNU_IFUNC symbol must go through PLT.  */
11967   if ((symbol_get_bfdsym (fr_symbol)->flags
11968        & BSF_GNU_INDIRECT_FUNCTION) != 0)
11969     return 0;
11970
11971   if (!S_IS_EXTERNAL (fr_symbol))
11972     /* Symbol may be weak or local.  */
11973     return !S_IS_WEAK (fr_symbol);
11974
11975   /* Global symbols with non-default visibility can't be preempted. */
11976   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
11977     return 1;
11978
11979   if (fr_var != NO_RELOC)
11980     switch ((enum bfd_reloc_code_real) fr_var)
11981       {
11982       case BFD_RELOC_386_PLT32:
11983       case BFD_RELOC_X86_64_PLT32:
11984         /* Symbol with PLT relocation may be preempted. */
11985         return 0;
11986       default:
11987         abort ();
11988       }
11989
11990   /* Global symbols with default visibility in a shared library may be
11991      preempted by another definition.  */
11992   return !shared;
11993 }
11994 #endif
11995
11996 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
11997    Note also work for Skylake and Cascadelake.
11998 ---------------------------------------------------------------------
11999 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
12000 | ------  | ----------- | ------- | -------- |
12001 |   Jo    |      N      |    N    |     Y    |
12002 |   Jno   |      N      |    N    |     Y    |
12003 |  Jc/Jb  |      Y      |    N    |     Y    |
12004 | Jae/Jnb |      Y      |    N    |     Y    |
12005 |  Je/Jz  |      Y      |    Y    |     Y    |
12006 | Jne/Jnz |      Y      |    Y    |     Y    |
12007 | Jna/Jbe |      Y      |    N    |     Y    |
12008 | Ja/Jnbe |      Y      |    N    |     Y    |
12009 |   Js    |      N      |    N    |     Y    |
12010 |   Jns   |      N      |    N    |     Y    |
12011 |  Jp/Jpe |      N      |    N    |     Y    |
12012 | Jnp/Jpo |      N      |    N    |     Y    |
12013 | Jl/Jnge |      Y      |    Y    |     Y    |
12014 | Jge/Jnl |      Y      |    Y    |     Y    |
12015 | Jle/Jng |      Y      |    Y    |     Y    |
12016 | Jg/Jnle |      Y      |    Y    |     Y    |
12017 ---------------------------------------------------------------------  */
12018 static int
12019 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
12020 {
12021   if (mf_cmp == mf_cmp_alu_cmp)
12022     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
12023             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
12024   if (mf_cmp == mf_cmp_incdec)
12025     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
12026             || mf_jcc == mf_jcc_jle);
12027   if (mf_cmp == mf_cmp_test_and)
12028     return 1;
12029   return 0;
12030 }
12031
12032 /* Return the next non-empty frag.  */
12033
12034 static fragS *
12035 i386_next_non_empty_frag (fragS *fragP)
12036 {
12037   /* There may be a frag with a ".fill 0" when there is no room in
12038      the current frag for frag_grow in output_insn.  */
12039   for (fragP = fragP->fr_next;
12040        (fragP != NULL
12041         && fragP->fr_type == rs_fill
12042         && fragP->fr_fix == 0);
12043        fragP = fragP->fr_next)
12044     ;
12045   return fragP;
12046 }
12047
12048 /* Return the next jcc frag after BRANCH_PADDING.  */
12049
12050 static fragS *
12051 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
12052 {
12053   fragS *branch_fragP;
12054   if (!pad_fragP)
12055     return NULL;
12056
12057   if (pad_fragP->fr_type == rs_machine_dependent
12058       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
12059           == BRANCH_PADDING))
12060     {
12061       branch_fragP = i386_next_non_empty_frag (pad_fragP);
12062       if (branch_fragP->fr_type != rs_machine_dependent)
12063         return NULL;
12064       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
12065           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
12066                                    pad_fragP->tc_frag_data.mf_type))
12067         return branch_fragP;
12068     }
12069
12070   return NULL;
12071 }
12072
12073 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
12074
12075 static void
12076 i386_classify_machine_dependent_frag (fragS *fragP)
12077 {
12078   fragS *cmp_fragP;
12079   fragS *pad_fragP;
12080   fragS *branch_fragP;
12081   fragS *next_fragP;
12082   unsigned int max_prefix_length;
12083
12084   if (fragP->tc_frag_data.classified)
12085     return;
12086
12087   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
12088      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
12089   for (next_fragP = fragP;
12090        next_fragP != NULL;
12091        next_fragP = next_fragP->fr_next)
12092     {
12093       next_fragP->tc_frag_data.classified = 1;
12094       if (next_fragP->fr_type == rs_machine_dependent)
12095         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
12096           {
12097           case BRANCH_PADDING:
12098             /* The BRANCH_PADDING frag must be followed by a branch
12099                frag.  */
12100             branch_fragP = i386_next_non_empty_frag (next_fragP);
12101             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
12102             break;
12103           case FUSED_JCC_PADDING:
12104             /* Check if this is a fused jcc:
12105                FUSED_JCC_PADDING
12106                CMP like instruction
12107                BRANCH_PADDING
12108                COND_JUMP
12109                */
12110             cmp_fragP = i386_next_non_empty_frag (next_fragP);
12111             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
12112             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
12113             if (branch_fragP)
12114               {
12115                 /* The BRANCH_PADDING frag is merged with the
12116                    FUSED_JCC_PADDING frag.  */
12117                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
12118                 /* CMP like instruction size.  */
12119                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
12120                 frag_wane (pad_fragP);
12121                 /* Skip to branch_fragP.  */
12122                 next_fragP = branch_fragP;
12123               }
12124             else if (next_fragP->tc_frag_data.max_prefix_length)
12125               {
12126                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
12127                    a fused jcc.  */
12128                 next_fragP->fr_subtype
12129                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
12130                 next_fragP->tc_frag_data.max_bytes
12131                   = next_fragP->tc_frag_data.max_prefix_length;
12132                 /* This will be updated in the BRANCH_PREFIX scan.  */
12133                 next_fragP->tc_frag_data.max_prefix_length = 0;
12134               }
12135             else
12136               frag_wane (next_fragP);
12137             break;
12138           }
12139     }
12140
12141   /* Stop if there is no BRANCH_PREFIX.  */
12142   if (!align_branch_prefix_size)
12143     return;
12144
12145   /* Scan for BRANCH_PREFIX.  */
12146   for (; fragP != NULL; fragP = fragP->fr_next)
12147     {
12148       if (fragP->fr_type != rs_machine_dependent
12149           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12150               != BRANCH_PREFIX))
12151         continue;
12152
12153       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
12154          COND_JUMP_PREFIX.  */
12155       max_prefix_length = 0;
12156       for (next_fragP = fragP;
12157            next_fragP != NULL;
12158            next_fragP = next_fragP->fr_next)
12159         {
12160           if (next_fragP->fr_type == rs_fill)
12161             /* Skip rs_fill frags.  */
12162             continue;
12163           else if (next_fragP->fr_type != rs_machine_dependent)
12164             /* Stop for all other frags.  */
12165             break;
12166
12167           /* rs_machine_dependent frags.  */
12168           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12169               == BRANCH_PREFIX)
12170             {
12171               /* Count BRANCH_PREFIX frags.  */
12172               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
12173                 {
12174                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
12175                   frag_wane (next_fragP);
12176                 }
12177               else
12178                 max_prefix_length
12179                   += next_fragP->tc_frag_data.max_bytes;
12180             }
12181           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12182                     == BRANCH_PADDING)
12183                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12184                        == FUSED_JCC_PADDING))
12185             {
12186               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
12187               fragP->tc_frag_data.u.padding_fragP = next_fragP;
12188               break;
12189             }
12190           else
12191             /* Stop for other rs_machine_dependent frags.  */
12192             break;
12193         }
12194
12195       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
12196
12197       /* Skip to the next frag.  */
12198       fragP = next_fragP;
12199     }
12200 }
12201
12202 /* Compute padding size for
12203
12204         FUSED_JCC_PADDING
12205         CMP like instruction
12206         BRANCH_PADDING
12207         COND_JUMP/UNCOND_JUMP
12208
12209    or
12210
12211         BRANCH_PADDING
12212         COND_JUMP/UNCOND_JUMP
12213  */
12214
12215 static int
12216 i386_branch_padding_size (fragS *fragP, offsetT address)
12217 {
12218   unsigned int offset, size, padding_size;
12219   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
12220
12221   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
12222   if (!address)
12223     address = fragP->fr_address;
12224   address += fragP->fr_fix;
12225
12226   /* CMP like instrunction size.  */
12227   size = fragP->tc_frag_data.cmp_size;
12228
12229   /* The base size of the branch frag.  */
12230   size += branch_fragP->fr_fix;
12231
12232   /* Add opcode and displacement bytes for the rs_machine_dependent
12233      branch frag.  */
12234   if (branch_fragP->fr_type == rs_machine_dependent)
12235     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
12236
12237   /* Check if branch is within boundary and doesn't end at the last
12238      byte.  */
12239   offset = address & ((1U << align_branch_power) - 1);
12240   if ((offset + size) >= (1U << align_branch_power))
12241     /* Padding needed to avoid crossing boundary.  */
12242     padding_size = (1U << align_branch_power) - offset;
12243   else
12244     /* No padding needed.  */
12245     padding_size = 0;
12246
12247   /* The return value may be saved in tc_frag_data.length which is
12248      unsigned byte.  */
12249   if (!fits_in_unsigned_byte (padding_size))
12250     abort ();
12251
12252   return padding_size;
12253 }
12254
12255 /* i386_generic_table_relax_frag()
12256
12257    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
12258    grow/shrink padding to align branch frags.  Hand others to
12259    relax_frag().  */
12260
12261 long
12262 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
12263 {
12264   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12265       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12266     {
12267       long padding_size = i386_branch_padding_size (fragP, 0);
12268       long grow = padding_size - fragP->tc_frag_data.length;
12269
12270       /* When the BRANCH_PREFIX frag is used, the computed address
12271          must match the actual address and there should be no padding.  */
12272       if (fragP->tc_frag_data.padding_address
12273           && (fragP->tc_frag_data.padding_address != fragP->fr_address
12274               || padding_size))
12275         abort ();
12276
12277       /* Update the padding size.  */
12278       if (grow)
12279         fragP->tc_frag_data.length = padding_size;
12280
12281       return grow;
12282     }
12283   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12284     {
12285       fragS *padding_fragP, *next_fragP;
12286       long padding_size, left_size, last_size;
12287
12288       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12289       if (!padding_fragP)
12290         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
12291         return (fragP->tc_frag_data.length
12292                 - fragP->tc_frag_data.last_length);
12293
12294       /* Compute the relative address of the padding frag in the very
12295         first time where the BRANCH_PREFIX frag sizes are zero.  */
12296       if (!fragP->tc_frag_data.padding_address)
12297         fragP->tc_frag_data.padding_address
12298           = padding_fragP->fr_address - (fragP->fr_address - stretch);
12299
12300       /* First update the last length from the previous interation.  */
12301       left_size = fragP->tc_frag_data.prefix_length;
12302       for (next_fragP = fragP;
12303            next_fragP != padding_fragP;
12304            next_fragP = next_fragP->fr_next)
12305         if (next_fragP->fr_type == rs_machine_dependent
12306             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12307                 == BRANCH_PREFIX))
12308           {
12309             if (left_size)
12310               {
12311                 int max = next_fragP->tc_frag_data.max_bytes;
12312                 if (max)
12313                   {
12314                     int size;
12315                     if (max > left_size)
12316                       size = left_size;
12317                     else
12318                       size = max;
12319                     left_size -= size;
12320                     next_fragP->tc_frag_data.last_length = size;
12321                   }
12322               }
12323             else
12324               next_fragP->tc_frag_data.last_length = 0;
12325           }
12326
12327       /* Check the padding size for the padding frag.  */
12328       padding_size = i386_branch_padding_size
12329         (padding_fragP, (fragP->fr_address
12330                          + fragP->tc_frag_data.padding_address));
12331
12332       last_size = fragP->tc_frag_data.prefix_length;
12333       /* Check if there is change from the last interation.  */
12334       if (padding_size == last_size)
12335         {
12336           /* Update the expected address of the padding frag.  */
12337           padding_fragP->tc_frag_data.padding_address
12338             = (fragP->fr_address + padding_size
12339                + fragP->tc_frag_data.padding_address);
12340           return 0;
12341         }
12342
12343       if (padding_size > fragP->tc_frag_data.max_prefix_length)
12344         {
12345           /* No padding if there is no sufficient room.  Clear the
12346              expected address of the padding frag.  */
12347           padding_fragP->tc_frag_data.padding_address = 0;
12348           padding_size = 0;
12349         }
12350       else
12351         /* Store the expected address of the padding frag.  */
12352         padding_fragP->tc_frag_data.padding_address
12353           = (fragP->fr_address + padding_size
12354              + fragP->tc_frag_data.padding_address);
12355
12356       fragP->tc_frag_data.prefix_length = padding_size;
12357
12358       /* Update the length for the current interation.  */
12359       left_size = padding_size;
12360       for (next_fragP = fragP;
12361            next_fragP != padding_fragP;
12362            next_fragP = next_fragP->fr_next)
12363         if (next_fragP->fr_type == rs_machine_dependent
12364             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12365                 == BRANCH_PREFIX))
12366           {
12367             if (left_size)
12368               {
12369                 int max = next_fragP->tc_frag_data.max_bytes;
12370                 if (max)
12371                   {
12372                     int size;
12373                     if (max > left_size)
12374                       size = left_size;
12375                     else
12376                       size = max;
12377                     left_size -= size;
12378                     next_fragP->tc_frag_data.length = size;
12379                   }
12380               }
12381             else
12382               next_fragP->tc_frag_data.length = 0;
12383           }
12384
12385       return (fragP->tc_frag_data.length
12386               - fragP->tc_frag_data.last_length);
12387     }
12388   return relax_frag (segment, fragP, stretch);
12389 }
12390
12391 /* md_estimate_size_before_relax()
12392
12393    Called just before relax() for rs_machine_dependent frags.  The x86
12394    assembler uses these frags to handle variable size jump
12395    instructions.
12396
12397    Any symbol that is now undefined will not become defined.
12398    Return the correct fr_subtype in the frag.
12399    Return the initial "guess for variable size of frag" to caller.
12400    The guess is actually the growth beyond the fixed part.  Whatever
12401    we do to grow the fixed or variable part contributes to our
12402    returned value.  */
12403
12404 int
12405 md_estimate_size_before_relax (fragS *fragP, segT segment)
12406 {
12407   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12408       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
12409       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12410     {
12411       i386_classify_machine_dependent_frag (fragP);
12412       return fragP->tc_frag_data.length;
12413     }
12414
12415   /* We've already got fragP->fr_subtype right;  all we have to do is
12416      check for un-relaxable symbols.  On an ELF system, we can't relax
12417      an externally visible symbol, because it may be overridden by a
12418      shared library.  */
12419   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
12420 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12421       || (IS_ELF
12422           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
12423                                                 fragP->fr_var))
12424 #endif
12425 #if defined (OBJ_COFF) && defined (TE_PE)
12426       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
12427           && S_IS_WEAK (fragP->fr_symbol))
12428 #endif
12429       )
12430     {
12431       /* Symbol is undefined in this segment, or we need to keep a
12432          reloc so that weak symbols can be overridden.  */
12433       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
12434       enum bfd_reloc_code_real reloc_type;
12435       unsigned char *opcode;
12436       int old_fr_fix;
12437       fixS *fixP = NULL;
12438
12439       if (fragP->fr_var != NO_RELOC)
12440         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
12441       else if (size == 2)
12442         reloc_type = BFD_RELOC_16_PCREL;
12443 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12444       else if (fragP->tc_frag_data.code64 && fragP->fr_offset == 0
12445                && need_plt32_p (fragP->fr_symbol))
12446         reloc_type = BFD_RELOC_X86_64_PLT32;
12447 #endif
12448       else
12449         reloc_type = BFD_RELOC_32_PCREL;
12450
12451       old_fr_fix = fragP->fr_fix;
12452       opcode = (unsigned char *) fragP->fr_opcode;
12453
12454       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
12455         {
12456         case UNCOND_JUMP:
12457           /* Make jmp (0xeb) a (d)word displacement jump.  */
12458           opcode[0] = 0xe9;
12459           fragP->fr_fix += size;
12460           fixP = fix_new (fragP, old_fr_fix, size,
12461                           fragP->fr_symbol,
12462                           fragP->fr_offset, 1,
12463                           reloc_type);
12464           break;
12465
12466         case COND_JUMP86:
12467           if (size == 2
12468               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
12469             {
12470               /* Negate the condition, and branch past an
12471                  unconditional jump.  */
12472               opcode[0] ^= 1;
12473               opcode[1] = 3;
12474               /* Insert an unconditional jump.  */
12475               opcode[2] = 0xe9;
12476               /* We added two extra opcode bytes, and have a two byte
12477                  offset.  */
12478               fragP->fr_fix += 2 + 2;
12479               fix_new (fragP, old_fr_fix + 2, 2,
12480                        fragP->fr_symbol,
12481                        fragP->fr_offset, 1,
12482                        reloc_type);
12483               break;
12484             }
12485           /* Fall through.  */
12486
12487         case COND_JUMP:
12488           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
12489             {
12490               fragP->fr_fix += 1;
12491               fixP = fix_new (fragP, old_fr_fix, 1,
12492                               fragP->fr_symbol,
12493                               fragP->fr_offset, 1,
12494                               BFD_RELOC_8_PCREL);
12495               fixP->fx_signed = 1;
12496               break;
12497             }
12498
12499           /* This changes the byte-displacement jump 0x7N
12500              to the (d)word-displacement jump 0x0f,0x8N.  */
12501           opcode[1] = opcode[0] + 0x10;
12502           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12503           /* We've added an opcode byte.  */
12504           fragP->fr_fix += 1 + size;
12505           fixP = fix_new (fragP, old_fr_fix + 1, size,
12506                           fragP->fr_symbol,
12507                           fragP->fr_offset, 1,
12508                           reloc_type);
12509           break;
12510
12511         default:
12512           BAD_CASE (fragP->fr_subtype);
12513           break;
12514         }
12515
12516       /* All jumps handled here are signed, but don't unconditionally use a
12517          signed limit check for 32 and 16 bit jumps as we want to allow wrap
12518          around at 4G (outside of 64-bit mode) and 64k.  */
12519       if (size == 4 && flag_code == CODE_64BIT)
12520         fixP->fx_signed = 1;
12521
12522       frag_wane (fragP);
12523       return fragP->fr_fix - old_fr_fix;
12524     }
12525
12526   /* Guess size depending on current relax state.  Initially the relax
12527      state will correspond to a short jump and we return 1, because
12528      the variable part of the frag (the branch offset) is one byte
12529      long.  However, we can relax a section more than once and in that
12530      case we must either set fr_subtype back to the unrelaxed state,
12531      or return the value for the appropriate branch.  */
12532   return md_relax_table[fragP->fr_subtype].rlx_length;
12533 }
12534
12535 /* Called after relax() is finished.
12536
12537    In:  Address of frag.
12538         fr_type == rs_machine_dependent.
12539         fr_subtype is what the address relaxed to.
12540
12541    Out: Any fixSs and constants are set up.
12542         Caller will turn frag into a ".space 0".  */
12543
12544 void
12545 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
12546                  fragS *fragP)
12547 {
12548   unsigned char *opcode;
12549   unsigned char *where_to_put_displacement = NULL;
12550   offsetT target_address;
12551   offsetT opcode_address;
12552   unsigned int extension = 0;
12553   offsetT displacement_from_opcode_start;
12554
12555   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12556       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
12557       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12558     {
12559       /* Generate nop padding.  */
12560       unsigned int size = fragP->tc_frag_data.length;
12561       if (size)
12562         {
12563           if (size > fragP->tc_frag_data.max_bytes)
12564             abort ();
12565
12566           if (flag_debug)
12567             {
12568               const char *msg;
12569               const char *branch = "branch";
12570               const char *prefix = "";
12571               fragS *padding_fragP;
12572               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12573                   == BRANCH_PREFIX)
12574                 {
12575                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12576                   switch (fragP->tc_frag_data.default_prefix)
12577                     {
12578                     default:
12579                       abort ();
12580                       break;
12581                     case CS_PREFIX_OPCODE:
12582                       prefix = " cs";
12583                       break;
12584                     case DS_PREFIX_OPCODE:
12585                       prefix = " ds";
12586                       break;
12587                     case ES_PREFIX_OPCODE:
12588                       prefix = " es";
12589                       break;
12590                     case FS_PREFIX_OPCODE:
12591                       prefix = " fs";
12592                       break;
12593                     case GS_PREFIX_OPCODE:
12594                       prefix = " gs";
12595                       break;
12596                     case SS_PREFIX_OPCODE:
12597                       prefix = " ss";
12598                       break;
12599                     }
12600                   if (padding_fragP)
12601                     msg = _("%s:%u: add %d%s at 0x%llx to align "
12602                             "%s within %d-byte boundary\n");
12603                   else
12604                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
12605                             "align %s within %d-byte boundary\n");
12606                 }
12607               else
12608                 {
12609                   padding_fragP = fragP;
12610                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
12611                           "%s within %d-byte boundary\n");
12612                 }
12613
12614               if (padding_fragP)
12615                 switch (padding_fragP->tc_frag_data.branch_type)
12616                   {
12617                   case align_branch_jcc:
12618                     branch = "jcc";
12619                     break;
12620                   case align_branch_fused:
12621                     branch = "fused jcc";
12622                     break;
12623                   case align_branch_jmp:
12624                     branch = "jmp";
12625                     break;
12626                   case align_branch_call:
12627                     branch = "call";
12628                     break;
12629                   case align_branch_indirect:
12630                     branch = "indiret branch";
12631                     break;
12632                   case align_branch_ret:
12633                     branch = "ret";
12634                     break;
12635                   default:
12636                     break;
12637                   }
12638
12639               fprintf (stdout, msg,
12640                        fragP->fr_file, fragP->fr_line, size, prefix,
12641                        (long long) fragP->fr_address, branch,
12642                        1 << align_branch_power);
12643             }
12644           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12645             memset (fragP->fr_opcode,
12646                     fragP->tc_frag_data.default_prefix, size);
12647           else
12648             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
12649                                 size, 0);
12650           fragP->fr_fix += size;
12651         }
12652       return;
12653     }
12654
12655   opcode = (unsigned char *) fragP->fr_opcode;
12656
12657   /* Address we want to reach in file space.  */
12658   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
12659
12660   /* Address opcode resides at in file space.  */
12661   opcode_address = fragP->fr_address + fragP->fr_fix;
12662
12663   /* Displacement from opcode start to fill into instruction.  */
12664   displacement_from_opcode_start = target_address - opcode_address;
12665
12666   if ((fragP->fr_subtype & BIG) == 0)
12667     {
12668       /* Don't have to change opcode.  */
12669       extension = 1;            /* 1 opcode + 1 displacement  */
12670       where_to_put_displacement = &opcode[1];
12671     }
12672   else
12673     {
12674       if (no_cond_jump_promotion
12675           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
12676         as_warn_where (fragP->fr_file, fragP->fr_line,
12677                        _("long jump required"));
12678
12679       switch (fragP->fr_subtype)
12680         {
12681         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
12682           extension = 4;                /* 1 opcode + 4 displacement  */
12683           opcode[0] = 0xe9;
12684           where_to_put_displacement = &opcode[1];
12685           break;
12686
12687         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
12688           extension = 2;                /* 1 opcode + 2 displacement  */
12689           opcode[0] = 0xe9;
12690           where_to_put_displacement = &opcode[1];
12691           break;
12692
12693         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
12694         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
12695           extension = 5;                /* 2 opcode + 4 displacement  */
12696           opcode[1] = opcode[0] + 0x10;
12697           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12698           where_to_put_displacement = &opcode[2];
12699           break;
12700
12701         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
12702           extension = 3;                /* 2 opcode + 2 displacement  */
12703           opcode[1] = opcode[0] + 0x10;
12704           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12705           where_to_put_displacement = &opcode[2];
12706           break;
12707
12708         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
12709           extension = 4;
12710           opcode[0] ^= 1;
12711           opcode[1] = 3;
12712           opcode[2] = 0xe9;
12713           where_to_put_displacement = &opcode[3];
12714           break;
12715
12716         default:
12717           BAD_CASE (fragP->fr_subtype);
12718           break;
12719         }
12720     }
12721
12722   /* If size if less then four we are sure that the operand fits,
12723      but if it's 4, then it could be that the displacement is larger
12724      then -/+ 2GB.  */
12725   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
12726       && object_64bit
12727       && ((addressT) (displacement_from_opcode_start - extension
12728                       + ((addressT) 1 << 31))
12729           > (((addressT) 2 << 31) - 1)))
12730     {
12731       as_bad_where (fragP->fr_file, fragP->fr_line,
12732                     _("jump target out of range"));
12733       /* Make us emit 0.  */
12734       displacement_from_opcode_start = extension;
12735     }
12736   /* Now put displacement after opcode.  */
12737   md_number_to_chars ((char *) where_to_put_displacement,
12738                       (valueT) (displacement_from_opcode_start - extension),
12739                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
12740   fragP->fr_fix += extension;
12741 }
12742 \f
12743 /* Apply a fixup (fixP) to segment data, once it has been determined
12744    by our caller that we have all the info we need to fix it up.
12745
12746    Parameter valP is the pointer to the value of the bits.
12747
12748    On the 386, immediates, displacements, and data pointers are all in
12749    the same (little-endian) format, so we don't need to care about which
12750    we are handling.  */
12751
12752 void
12753 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
12754 {
12755   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
12756   valueT value = *valP;
12757
12758 #if !defined (TE_Mach)
12759   if (fixP->fx_pcrel)
12760     {
12761       switch (fixP->fx_r_type)
12762         {
12763         default:
12764           break;
12765
12766         case BFD_RELOC_64:
12767           fixP->fx_r_type = BFD_RELOC_64_PCREL;
12768           break;
12769         case BFD_RELOC_32:
12770         case BFD_RELOC_X86_64_32S:
12771           fixP->fx_r_type = BFD_RELOC_32_PCREL;
12772           break;
12773         case BFD_RELOC_16:
12774           fixP->fx_r_type = BFD_RELOC_16_PCREL;
12775           break;
12776         case BFD_RELOC_8:
12777           fixP->fx_r_type = BFD_RELOC_8_PCREL;
12778           break;
12779         }
12780     }
12781
12782   if (fixP->fx_addsy != NULL
12783       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
12784           || fixP->fx_r_type == BFD_RELOC_64_PCREL
12785           || fixP->fx_r_type == BFD_RELOC_16_PCREL
12786           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
12787       && !use_rela_relocations)
12788     {
12789       /* This is a hack.  There should be a better way to handle this.
12790          This covers for the fact that bfd_install_relocation will
12791          subtract the current location (for partial_inplace, PC relative
12792          relocations); see more below.  */
12793 #ifndef OBJ_AOUT
12794       if (IS_ELF
12795 #ifdef TE_PE
12796           || OUTPUT_FLAVOR == bfd_target_coff_flavour
12797 #endif
12798           )
12799         value += fixP->fx_where + fixP->fx_frag->fr_address;
12800 #endif
12801 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12802       if (IS_ELF)
12803         {
12804           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
12805
12806           if ((sym_seg == seg
12807                || (symbol_section_p (fixP->fx_addsy)
12808                    && sym_seg != absolute_section))
12809               && !generic_force_reloc (fixP))
12810             {
12811               /* Yes, we add the values in twice.  This is because
12812                  bfd_install_relocation subtracts them out again.  I think
12813                  bfd_install_relocation is broken, but I don't dare change
12814                  it.  FIXME.  */
12815               value += fixP->fx_where + fixP->fx_frag->fr_address;
12816             }
12817         }
12818 #endif
12819 #if defined (OBJ_COFF) && defined (TE_PE)
12820       /* For some reason, the PE format does not store a
12821          section address offset for a PC relative symbol.  */
12822       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
12823           || S_IS_WEAK (fixP->fx_addsy))
12824         value += md_pcrel_from (fixP);
12825 #endif
12826     }
12827 #if defined (OBJ_COFF) && defined (TE_PE)
12828   if (fixP->fx_addsy != NULL
12829       && S_IS_WEAK (fixP->fx_addsy)
12830       /* PR 16858: Do not modify weak function references.  */
12831       && ! fixP->fx_pcrel)
12832     {
12833 #if !defined (TE_PEP)
12834       /* For x86 PE weak function symbols are neither PC-relative
12835          nor do they set S_IS_FUNCTION.  So the only reliable way
12836          to detect them is to check the flags of their containing
12837          section.  */
12838       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
12839           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
12840         ;
12841       else
12842 #endif
12843       value -= S_GET_VALUE (fixP->fx_addsy);
12844     }
12845 #endif
12846
12847   /* Fix a few things - the dynamic linker expects certain values here,
12848      and we must not disappoint it.  */
12849 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12850   if (IS_ELF && fixP->fx_addsy)
12851     switch (fixP->fx_r_type)
12852       {
12853       case BFD_RELOC_386_PLT32:
12854       case BFD_RELOC_X86_64_PLT32:
12855         /* Make the jump instruction point to the address of the operand.
12856            At runtime we merely add the offset to the actual PLT entry.
12857            NB: Subtract the offset size only for jump instructions.  */
12858         if (fixP->fx_pcrel)
12859           value = -4;
12860         break;
12861
12862       case BFD_RELOC_386_TLS_GD:
12863       case BFD_RELOC_386_TLS_LDM:
12864       case BFD_RELOC_386_TLS_IE_32:
12865       case BFD_RELOC_386_TLS_IE:
12866       case BFD_RELOC_386_TLS_GOTIE:
12867       case BFD_RELOC_386_TLS_GOTDESC:
12868       case BFD_RELOC_X86_64_TLSGD:
12869       case BFD_RELOC_X86_64_TLSLD:
12870       case BFD_RELOC_X86_64_GOTTPOFF:
12871       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
12872         value = 0; /* Fully resolved at runtime.  No addend.  */
12873         /* Fallthrough */
12874       case BFD_RELOC_386_TLS_LE:
12875       case BFD_RELOC_386_TLS_LDO_32:
12876       case BFD_RELOC_386_TLS_LE_32:
12877       case BFD_RELOC_X86_64_DTPOFF32:
12878       case BFD_RELOC_X86_64_DTPOFF64:
12879       case BFD_RELOC_X86_64_TPOFF32:
12880       case BFD_RELOC_X86_64_TPOFF64:
12881         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12882         break;
12883
12884       case BFD_RELOC_386_TLS_DESC_CALL:
12885       case BFD_RELOC_X86_64_TLSDESC_CALL:
12886         value = 0; /* Fully resolved at runtime.  No addend.  */
12887         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12888         fixP->fx_done = 0;
12889         return;
12890
12891       case BFD_RELOC_VTABLE_INHERIT:
12892       case BFD_RELOC_VTABLE_ENTRY:
12893         fixP->fx_done = 0;
12894         return;
12895
12896       default:
12897         break;
12898       }
12899 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
12900
12901   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
12902   if (!object_64bit)
12903     value = extend_to_32bit_address (value);
12904
12905   *valP = value;
12906 #endif /* !defined (TE_Mach)  */
12907
12908   /* Are we finished with this relocation now?  */
12909   if (fixP->fx_addsy == NULL)
12910     {
12911       fixP->fx_done = 1;
12912       switch (fixP->fx_r_type)
12913         {
12914         case BFD_RELOC_X86_64_32S:
12915           fixP->fx_signed = 1;
12916           break;
12917
12918         default:
12919           break;
12920         }
12921     }
12922 #if defined (OBJ_COFF) && defined (TE_PE)
12923   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
12924     {
12925       fixP->fx_done = 0;
12926       /* Remember value for tc_gen_reloc.  */
12927       fixP->fx_addnumber = value;
12928       /* Clear out the frag for now.  */
12929       value = 0;
12930     }
12931 #endif
12932   else if (use_rela_relocations)
12933     {
12934       if (!disallow_64bit_reloc || fixP->fx_r_type == NO_RELOC)
12935         fixP->fx_no_overflow = 1;
12936       /* Remember value for tc_gen_reloc.  */
12937       fixP->fx_addnumber = value;
12938       value = 0;
12939     }
12940
12941   md_number_to_chars (p, value, fixP->fx_size);
12942 }
12943 \f
12944 const char *
12945 md_atof (int type, char *litP, int *sizeP)
12946 {
12947   /* This outputs the LITTLENUMs in REVERSE order;
12948      in accord with the bigendian 386.  */
12949   return ieee_md_atof (type, litP, sizeP, false);
12950 }
12951 \f
12952 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
12953
12954 static char *
12955 output_invalid (int c)
12956 {
12957   if (ISPRINT (c))
12958     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12959               "'%c'", c);
12960   else
12961     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12962               "(0x%x)", (unsigned char) c);
12963   return output_invalid_buf;
12964 }
12965
12966 /* Verify that @r can be used in the current context.  */
12967
12968 static bool check_register (const reg_entry *r)
12969 {
12970   if (allow_pseudo_reg)
12971     return true;
12972
12973   if (operand_type_all_zero (&r->reg_type))
12974     return false;
12975
12976   if ((r->reg_type.bitfield.dword
12977        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
12978        || r->reg_type.bitfield.class == RegCR
12979        || r->reg_type.bitfield.class == RegDR)
12980       && !cpu_arch_flags.bitfield.cpui386)
12981     return false;
12982
12983   if (r->reg_type.bitfield.class == RegTR
12984       && (flag_code == CODE_64BIT
12985           || !cpu_arch_flags.bitfield.cpui386
12986           || cpu_arch_isa_flags.bitfield.cpui586
12987           || cpu_arch_isa_flags.bitfield.cpui686))
12988     return false;
12989
12990   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
12991     return false;
12992
12993   if (!cpu_arch_flags.bitfield.cpuavx512f)
12994     {
12995       if (r->reg_type.bitfield.zmmword
12996           || r->reg_type.bitfield.class == RegMask)
12997         return false;
12998
12999       if (!cpu_arch_flags.bitfield.cpuavx)
13000         {
13001           if (r->reg_type.bitfield.ymmword)
13002             return false;
13003
13004           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
13005             return false;
13006         }
13007     }
13008
13009   if (r->reg_type.bitfield.tmmword
13010       && (!cpu_arch_flags.bitfield.cpuamx_tile
13011           || flag_code != CODE_64BIT))
13012     return false;
13013
13014   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
13015     return false;
13016
13017   /* Don't allow fake index register unless allow_index_reg isn't 0. */
13018   if (!allow_index_reg && r->reg_num == RegIZ)
13019     return false;
13020
13021   /* Upper 16 vector registers are only available with VREX in 64bit
13022      mode, and require EVEX encoding.  */
13023   if (r->reg_flags & RegVRex)
13024     {
13025       if (!cpu_arch_flags.bitfield.cpuavx512f
13026           || flag_code != CODE_64BIT)
13027         return false;
13028
13029       if (i.vec_encoding == vex_encoding_default)
13030         i.vec_encoding = vex_encoding_evex;
13031       else if (i.vec_encoding != vex_encoding_evex)
13032         i.vec_encoding = vex_encoding_error;
13033     }
13034
13035   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
13036       && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
13037       && flag_code != CODE_64BIT)
13038     return false;
13039
13040   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
13041       && !intel_syntax)
13042     return false;
13043
13044   return true;
13045 }
13046
13047 /* REG_STRING starts *before* REGISTER_PREFIX.  */
13048
13049 static const reg_entry *
13050 parse_real_register (char *reg_string, char **end_op)
13051 {
13052   char *s = reg_string;
13053   char *p;
13054   char reg_name_given[MAX_REG_NAME_SIZE + 1];
13055   const reg_entry *r;
13056
13057   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
13058   if (*s == REGISTER_PREFIX)
13059     ++s;
13060
13061   if (is_space_char (*s))
13062     ++s;
13063
13064   p = reg_name_given;
13065   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
13066     {
13067       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
13068         return (const reg_entry *) NULL;
13069       s++;
13070     }
13071
13072   /* For naked regs, make sure that we are not dealing with an identifier.
13073      This prevents confusing an identifier like `eax_var' with register
13074      `eax'.  */
13075   if (allow_naked_reg && identifier_chars[(unsigned char) *s])
13076     return (const reg_entry *) NULL;
13077
13078   *end_op = s;
13079
13080   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
13081
13082   /* Handle floating point regs, allowing spaces in the (i) part.  */
13083   if (r == reg_st0)
13084     {
13085       if (!cpu_arch_flags.bitfield.cpu8087
13086           && !cpu_arch_flags.bitfield.cpu287
13087           && !cpu_arch_flags.bitfield.cpu387
13088           && !allow_pseudo_reg)
13089         return (const reg_entry *) NULL;
13090
13091       if (is_space_char (*s))
13092         ++s;
13093       if (*s == '(')
13094         {
13095           ++s;
13096           if (is_space_char (*s))
13097             ++s;
13098           if (*s >= '0' && *s <= '7')
13099             {
13100               int fpr = *s - '0';
13101               ++s;
13102               if (is_space_char (*s))
13103                 ++s;
13104               if (*s == ')')
13105                 {
13106                   *end_op = s + 1;
13107                   know (r[fpr].reg_num == fpr);
13108                   return r + fpr;
13109                 }
13110             }
13111           /* We have "%st(" then garbage.  */
13112           return (const reg_entry *) NULL;
13113         }
13114     }
13115
13116   return r && check_register (r) ? r : NULL;
13117 }
13118
13119 /* REG_STRING starts *before* REGISTER_PREFIX.  */
13120
13121 static const reg_entry *
13122 parse_register (char *reg_string, char **end_op)
13123 {
13124   const reg_entry *r;
13125
13126   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
13127     r = parse_real_register (reg_string, end_op);
13128   else
13129     r = NULL;
13130   if (!r)
13131     {
13132       char *save = input_line_pointer;
13133       char c;
13134       symbolS *symbolP;
13135
13136       input_line_pointer = reg_string;
13137       c = get_symbol_name (&reg_string);
13138       symbolP = symbol_find (reg_string);
13139       while (symbolP && S_GET_SEGMENT (symbolP) != reg_section)
13140         {
13141           const expressionS *e = symbol_get_value_expression(symbolP);
13142
13143           if (e->X_op != O_symbol || e->X_add_number)
13144             break;
13145           symbolP = e->X_add_symbol;
13146         }
13147       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
13148         {
13149           const expressionS *e = symbol_get_value_expression (symbolP);
13150
13151           if (e->X_op == O_register)
13152             {
13153               know (e->X_add_number >= 0
13154                     && (valueT) e->X_add_number < i386_regtab_size);
13155               r = i386_regtab + e->X_add_number;
13156               *end_op = input_line_pointer;
13157             }
13158           if (r && !check_register (r))
13159             {
13160               as_bad (_("register '%s%s' cannot be used here"),
13161                       register_prefix, r->reg_name);
13162               r = &bad_reg;
13163             }
13164         }
13165       *input_line_pointer = c;
13166       input_line_pointer = save;
13167     }
13168   return r;
13169 }
13170
13171 int
13172 i386_parse_name (char *name, expressionS *e, char *nextcharP)
13173 {
13174   const reg_entry *r = NULL;
13175   char *end = input_line_pointer;
13176
13177   *end = *nextcharP;
13178   if (*name == REGISTER_PREFIX || allow_naked_reg)
13179     r = parse_real_register (name, &input_line_pointer);
13180   if (r && end <= input_line_pointer)
13181     {
13182       *nextcharP = *input_line_pointer;
13183       *input_line_pointer = 0;
13184       if (r != &bad_reg)
13185         {
13186           e->X_op = O_register;
13187           e->X_add_number = r - i386_regtab;
13188         }
13189       else
13190           e->X_op = O_illegal;
13191       return 1;
13192     }
13193   input_line_pointer = end;
13194   *end = 0;
13195   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
13196 }
13197
13198 void
13199 md_operand (expressionS *e)
13200 {
13201   char *end;
13202   const reg_entry *r;
13203
13204   switch (*input_line_pointer)
13205     {
13206     case REGISTER_PREFIX:
13207       r = parse_real_register (input_line_pointer, &end);
13208       if (r)
13209         {
13210           e->X_op = O_register;
13211           e->X_add_number = r - i386_regtab;
13212           input_line_pointer = end;
13213         }
13214       break;
13215
13216     case '[':
13217       gas_assert (intel_syntax);
13218       end = input_line_pointer++;
13219       expression (e);
13220       if (*input_line_pointer == ']')
13221         {
13222           ++input_line_pointer;
13223           e->X_op_symbol = make_expr_symbol (e);
13224           e->X_add_symbol = NULL;
13225           e->X_add_number = 0;
13226           e->X_op = O_index;
13227         }
13228       else
13229         {
13230           e->X_op = O_absent;
13231           input_line_pointer = end;
13232         }
13233       break;
13234     }
13235 }
13236
13237 \f
13238 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13239 const char *md_shortopts = "kVQ:sqnO::";
13240 #else
13241 const char *md_shortopts = "qnO::";
13242 #endif
13243
13244 #define OPTION_32 (OPTION_MD_BASE + 0)
13245 #define OPTION_64 (OPTION_MD_BASE + 1)
13246 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
13247 #define OPTION_MARCH (OPTION_MD_BASE + 3)
13248 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
13249 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
13250 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
13251 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
13252 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
13253 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
13254 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
13255 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
13256 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
13257 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
13258 #define OPTION_X32 (OPTION_MD_BASE + 14)
13259 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
13260 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
13261 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
13262 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
13263 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
13264 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
13265 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
13266 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
13267 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
13268 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
13269 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
13270 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
13271 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
13272 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
13273 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
13274 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
13275 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
13276 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
13277 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
13278 #define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34)
13279
13280 struct option md_longopts[] =
13281 {
13282   {"32", no_argument, NULL, OPTION_32},
13283 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13284      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13285   {"64", no_argument, NULL, OPTION_64},
13286 #endif
13287 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13288   {"x32", no_argument, NULL, OPTION_X32},
13289   {"mshared", no_argument, NULL, OPTION_MSHARED},
13290   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
13291 #endif
13292   {"divide", no_argument, NULL, OPTION_DIVIDE},
13293   {"march", required_argument, NULL, OPTION_MARCH},
13294   {"mtune", required_argument, NULL, OPTION_MTUNE},
13295   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
13296   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
13297   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
13298   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
13299   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
13300   {"muse-unaligned-vector-move", no_argument, NULL, OPTION_MUSE_UNALIGNED_VECTOR_MOVE},
13301   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
13302   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
13303   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
13304   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
13305   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
13306   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
13307   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
13308 # if defined (TE_PE) || defined (TE_PEP)
13309   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
13310 #endif
13311   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
13312   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
13313   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
13314   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
13315   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
13316   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
13317   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
13318   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
13319   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
13320   {"mlfence-before-indirect-branch", required_argument, NULL,
13321    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
13322   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
13323   {"mamd64", no_argument, NULL, OPTION_MAMD64},
13324   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
13325   {NULL, no_argument, NULL, 0}
13326 };
13327 size_t md_longopts_size = sizeof (md_longopts);
13328
13329 int
13330 md_parse_option (int c, const char *arg)
13331 {
13332   unsigned int j;
13333   char *arch, *next, *saved, *type;
13334
13335   switch (c)
13336     {
13337     case 'n':
13338       optimize_align_code = 0;
13339       break;
13340
13341     case 'q':
13342       quiet_warnings = 1;
13343       break;
13344
13345 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13346       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
13347          should be emitted or not.  FIXME: Not implemented.  */
13348     case 'Q':
13349       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
13350         return 0;
13351       break;
13352
13353       /* -V: SVR4 argument to print version ID.  */
13354     case 'V':
13355       print_version_id ();
13356       break;
13357
13358       /* -k: Ignore for FreeBSD compatibility.  */
13359     case 'k':
13360       break;
13361
13362     case 's':
13363       /* -s: On i386 Solaris, this tells the native assembler to use
13364          .stab instead of .stab.excl.  We always use .stab anyhow.  */
13365       break;
13366
13367     case OPTION_MSHARED:
13368       shared = 1;
13369       break;
13370
13371     case OPTION_X86_USED_NOTE:
13372       if (strcasecmp (arg, "yes") == 0)
13373         x86_used_note = 1;
13374       else if (strcasecmp (arg, "no") == 0)
13375         x86_used_note = 0;
13376       else
13377         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
13378       break;
13379
13380
13381 #endif
13382 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13383      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13384     case OPTION_64:
13385       {
13386         const char **list, **l;
13387
13388         list = bfd_target_list ();
13389         for (l = list; *l != NULL; l++)
13390           if (startswith (*l, "elf64-x86-64")
13391               || strcmp (*l, "coff-x86-64") == 0
13392               || strcmp (*l, "pe-x86-64") == 0
13393               || strcmp (*l, "pei-x86-64") == 0
13394               || strcmp (*l, "mach-o-x86-64") == 0)
13395             {
13396               default_arch = "x86_64";
13397               break;
13398             }
13399         if (*l == NULL)
13400           as_fatal (_("no compiled in support for x86_64"));
13401         free (list);
13402       }
13403       break;
13404 #endif
13405
13406 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13407     case OPTION_X32:
13408       if (IS_ELF)
13409         {
13410           const char **list, **l;
13411
13412           list = bfd_target_list ();
13413           for (l = list; *l != NULL; l++)
13414             if (startswith (*l, "elf32-x86-64"))
13415               {
13416                 default_arch = "x86_64:32";
13417                 break;
13418               }
13419           if (*l == NULL)
13420             as_fatal (_("no compiled in support for 32bit x86_64"));
13421           free (list);
13422         }
13423       else
13424         as_fatal (_("32bit x86_64 is only supported for ELF"));
13425       break;
13426 #endif
13427
13428     case OPTION_32:
13429       default_arch = "i386";
13430       break;
13431
13432     case OPTION_DIVIDE:
13433 #ifdef SVR4_COMMENT_CHARS
13434       {
13435         char *n, *t;
13436         const char *s;
13437
13438         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
13439         t = n;
13440         for (s = i386_comment_chars; *s != '\0'; s++)
13441           if (*s != '/')
13442             *t++ = *s;
13443         *t = '\0';
13444         i386_comment_chars = n;
13445       }
13446 #endif
13447       break;
13448
13449     case OPTION_MARCH:
13450       saved = xstrdup (arg);
13451       arch = saved;
13452       /* Allow -march=+nosse.  */
13453       if (*arch == '+')
13454         arch++;
13455       do
13456         {
13457           if (*arch == '.')
13458             as_fatal (_("invalid -march= option: `%s'"), arg);
13459           next = strchr (arch, '+');
13460           if (next)
13461             *next++ = '\0';
13462           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13463             {
13464               if (arch == saved && cpu_arch[j].type != PROCESSOR_NONE
13465                   && strcmp (arch, cpu_arch[j].name) == 0)
13466                 {
13467                   /* Processor.  */
13468                   if (! cpu_arch[j].enable.bitfield.cpui386)
13469                     continue;
13470
13471                   cpu_arch_name = cpu_arch[j].name;
13472                   free (cpu_sub_arch_name);
13473                   cpu_sub_arch_name = NULL;
13474                   cpu_arch_flags = cpu_arch[j].enable;
13475                   cpu_arch_isa = cpu_arch[j].type;
13476                   cpu_arch_isa_flags = cpu_arch[j].enable;
13477                   if (!cpu_arch_tune_set)
13478                     {
13479                       cpu_arch_tune = cpu_arch_isa;
13480                       cpu_arch_tune_flags = cpu_arch_isa_flags;
13481                     }
13482                   break;
13483                 }
13484               else if (cpu_arch[j].type == PROCESSOR_NONE
13485                        && strcmp (arch, cpu_arch[j].name) == 0
13486                        && !cpu_flags_all_zero (&cpu_arch[j].enable))
13487                 {
13488                   /* ISA extension.  */
13489                   i386_cpu_flags flags;
13490
13491                   flags = cpu_flags_or (cpu_arch_flags,
13492                                         cpu_arch[j].enable);
13493
13494                   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13495                     {
13496                       extend_cpu_sub_arch_name (arch);
13497                       cpu_arch_flags = flags;
13498                       cpu_arch_isa_flags = flags;
13499                     }
13500                   else
13501                     cpu_arch_isa_flags
13502                       = cpu_flags_or (cpu_arch_isa_flags,
13503                                       cpu_arch[j].enable);
13504                   break;
13505                 }
13506             }
13507
13508           if (j >= ARRAY_SIZE (cpu_arch) && startswith (arch, "no"))
13509             {
13510               /* Disable an ISA extension.  */
13511               for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13512                 if (cpu_arch[j].type == PROCESSOR_NONE
13513                     && strcmp (arch + 2, cpu_arch[j].name) == 0)
13514                   {
13515                     i386_cpu_flags flags;
13516
13517                     flags = cpu_flags_and_not (cpu_arch_flags,
13518                                                cpu_arch[j].disable);
13519                     if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13520                       {
13521                         extend_cpu_sub_arch_name (arch);
13522                         cpu_arch_flags = flags;
13523                         cpu_arch_isa_flags = flags;
13524                       }
13525                     break;
13526                   }
13527             }
13528
13529           if (j >= ARRAY_SIZE (cpu_arch))
13530             as_fatal (_("invalid -march= option: `%s'"), arg);
13531
13532           arch = next;
13533         }
13534       while (next != NULL);
13535       free (saved);
13536       break;
13537
13538     case OPTION_MTUNE:
13539       if (*arg == '.')
13540         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13541       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13542         {
13543           if (cpu_arch[j].type != PROCESSOR_NONE
13544               && strcmp (arg, cpu_arch[j].name) == 0)
13545             {
13546               cpu_arch_tune_set = 1;
13547               cpu_arch_tune = cpu_arch [j].type;
13548               cpu_arch_tune_flags = cpu_arch[j].enable;
13549               break;
13550             }
13551         }
13552       if (j >= ARRAY_SIZE (cpu_arch))
13553         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13554       break;
13555
13556     case OPTION_MMNEMONIC:
13557       if (strcasecmp (arg, "att") == 0)
13558         intel_mnemonic = 0;
13559       else if (strcasecmp (arg, "intel") == 0)
13560         intel_mnemonic = 1;
13561       else
13562         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
13563       break;
13564
13565     case OPTION_MSYNTAX:
13566       if (strcasecmp (arg, "att") == 0)
13567         intel_syntax = 0;
13568       else if (strcasecmp (arg, "intel") == 0)
13569         intel_syntax = 1;
13570       else
13571         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
13572       break;
13573
13574     case OPTION_MINDEX_REG:
13575       allow_index_reg = 1;
13576       break;
13577
13578     case OPTION_MNAKED_REG:
13579       allow_naked_reg = 1;
13580       break;
13581
13582     case OPTION_MSSE2AVX:
13583       sse2avx = 1;
13584       break;
13585
13586     case OPTION_MUSE_UNALIGNED_VECTOR_MOVE:
13587       use_unaligned_vector_move = 1;
13588       break;
13589
13590     case OPTION_MSSE_CHECK:
13591       if (strcasecmp (arg, "error") == 0)
13592         sse_check = check_error;
13593       else if (strcasecmp (arg, "warning") == 0)
13594         sse_check = check_warning;
13595       else if (strcasecmp (arg, "none") == 0)
13596         sse_check = check_none;
13597       else
13598         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
13599       break;
13600
13601     case OPTION_MOPERAND_CHECK:
13602       if (strcasecmp (arg, "error") == 0)
13603         operand_check = check_error;
13604       else if (strcasecmp (arg, "warning") == 0)
13605         operand_check = check_warning;
13606       else if (strcasecmp (arg, "none") == 0)
13607         operand_check = check_none;
13608       else
13609         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
13610       break;
13611
13612     case OPTION_MAVXSCALAR:
13613       if (strcasecmp (arg, "128") == 0)
13614         avxscalar = vex128;
13615       else if (strcasecmp (arg, "256") == 0)
13616         avxscalar = vex256;
13617       else
13618         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
13619       break;
13620
13621     case OPTION_MVEXWIG:
13622       if (strcmp (arg, "0") == 0)
13623         vexwig = vexw0;
13624       else if (strcmp (arg, "1") == 0)
13625         vexwig = vexw1;
13626       else
13627         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
13628       break;
13629
13630     case OPTION_MADD_BND_PREFIX:
13631       add_bnd_prefix = 1;
13632       break;
13633
13634     case OPTION_MEVEXLIG:
13635       if (strcmp (arg, "128") == 0)
13636         evexlig = evexl128;
13637       else if (strcmp (arg, "256") == 0)
13638         evexlig = evexl256;
13639       else  if (strcmp (arg, "512") == 0)
13640         evexlig = evexl512;
13641       else
13642         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
13643       break;
13644
13645     case OPTION_MEVEXRCIG:
13646       if (strcmp (arg, "rne") == 0)
13647         evexrcig = rne;
13648       else if (strcmp (arg, "rd") == 0)
13649         evexrcig = rd;
13650       else if (strcmp (arg, "ru") == 0)
13651         evexrcig = ru;
13652       else if (strcmp (arg, "rz") == 0)
13653         evexrcig = rz;
13654       else
13655         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
13656       break;
13657
13658     case OPTION_MEVEXWIG:
13659       if (strcmp (arg, "0") == 0)
13660         evexwig = evexw0;
13661       else if (strcmp (arg, "1") == 0)
13662         evexwig = evexw1;
13663       else
13664         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
13665       break;
13666
13667 # if defined (TE_PE) || defined (TE_PEP)
13668     case OPTION_MBIG_OBJ:
13669       use_big_obj = 1;
13670       break;
13671 #endif
13672
13673     case OPTION_MOMIT_LOCK_PREFIX:
13674       if (strcasecmp (arg, "yes") == 0)
13675         omit_lock_prefix = 1;
13676       else if (strcasecmp (arg, "no") == 0)
13677         omit_lock_prefix = 0;
13678       else
13679         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
13680       break;
13681
13682     case OPTION_MFENCE_AS_LOCK_ADD:
13683       if (strcasecmp (arg, "yes") == 0)
13684         avoid_fence = 1;
13685       else if (strcasecmp (arg, "no") == 0)
13686         avoid_fence = 0;
13687       else
13688         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
13689       break;
13690
13691     case OPTION_MLFENCE_AFTER_LOAD:
13692       if (strcasecmp (arg, "yes") == 0)
13693         lfence_after_load = 1;
13694       else if (strcasecmp (arg, "no") == 0)
13695         lfence_after_load = 0;
13696       else
13697         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
13698       break;
13699
13700     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
13701       if (strcasecmp (arg, "all") == 0)
13702         {
13703           lfence_before_indirect_branch = lfence_branch_all;
13704           if (lfence_before_ret == lfence_before_ret_none)
13705             lfence_before_ret = lfence_before_ret_shl;
13706         }
13707       else if (strcasecmp (arg, "memory") == 0)
13708         lfence_before_indirect_branch = lfence_branch_memory;
13709       else if (strcasecmp (arg, "register") == 0)
13710         lfence_before_indirect_branch = lfence_branch_register;
13711       else if (strcasecmp (arg, "none") == 0)
13712         lfence_before_indirect_branch = lfence_branch_none;
13713       else
13714         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
13715                   arg);
13716       break;
13717
13718     case OPTION_MLFENCE_BEFORE_RET:
13719       if (strcasecmp (arg, "or") == 0)
13720         lfence_before_ret = lfence_before_ret_or;
13721       else if (strcasecmp (arg, "not") == 0)
13722         lfence_before_ret = lfence_before_ret_not;
13723       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
13724         lfence_before_ret = lfence_before_ret_shl;
13725       else if (strcasecmp (arg, "none") == 0)
13726         lfence_before_ret = lfence_before_ret_none;
13727       else
13728         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
13729                   arg);
13730       break;
13731
13732     case OPTION_MRELAX_RELOCATIONS:
13733       if (strcasecmp (arg, "yes") == 0)
13734         generate_relax_relocations = 1;
13735       else if (strcasecmp (arg, "no") == 0)
13736         generate_relax_relocations = 0;
13737       else
13738         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
13739       break;
13740
13741     case OPTION_MALIGN_BRANCH_BOUNDARY:
13742       {
13743         char *end;
13744         long int align = strtoul (arg, &end, 0);
13745         if (*end == '\0')
13746           {
13747             if (align == 0)
13748               {
13749                 align_branch_power = 0;
13750                 break;
13751               }
13752             else if (align >= 16)
13753               {
13754                 int align_power;
13755                 for (align_power = 0;
13756                      (align & 1) == 0;
13757                      align >>= 1, align_power++)
13758                   continue;
13759                 /* Limit alignment power to 31.  */
13760                 if (align == 1 && align_power < 32)
13761                   {
13762                     align_branch_power = align_power;
13763                     break;
13764                   }
13765               }
13766           }
13767         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
13768       }
13769       break;
13770
13771     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
13772       {
13773         char *end;
13774         int align = strtoul (arg, &end, 0);
13775         /* Some processors only support 5 prefixes.  */
13776         if (*end == '\0' && align >= 0 && align < 6)
13777           {
13778             align_branch_prefix_size = align;
13779             break;
13780           }
13781         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
13782                   arg);
13783       }
13784       break;
13785
13786     case OPTION_MALIGN_BRANCH:
13787       align_branch = 0;
13788       saved = xstrdup (arg);
13789       type = saved;
13790       do
13791         {
13792           next = strchr (type, '+');
13793           if (next)
13794             *next++ = '\0';
13795           if (strcasecmp (type, "jcc") == 0)
13796             align_branch |= align_branch_jcc_bit;
13797           else if (strcasecmp (type, "fused") == 0)
13798             align_branch |= align_branch_fused_bit;
13799           else if (strcasecmp (type, "jmp") == 0)
13800             align_branch |= align_branch_jmp_bit;
13801           else if (strcasecmp (type, "call") == 0)
13802             align_branch |= align_branch_call_bit;
13803           else if (strcasecmp (type, "ret") == 0)
13804             align_branch |= align_branch_ret_bit;
13805           else if (strcasecmp (type, "indirect") == 0)
13806             align_branch |= align_branch_indirect_bit;
13807           else
13808             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
13809           type = next;
13810         }
13811       while (next != NULL);
13812       free (saved);
13813       break;
13814
13815     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
13816       align_branch_power = 5;
13817       align_branch_prefix_size = 5;
13818       align_branch = (align_branch_jcc_bit
13819                       | align_branch_fused_bit
13820                       | align_branch_jmp_bit);
13821       break;
13822
13823     case OPTION_MAMD64:
13824       isa64 = amd64;
13825       break;
13826
13827     case OPTION_MINTEL64:
13828       isa64 = intel64;
13829       break;
13830
13831     case 'O':
13832       if (arg == NULL)
13833         {
13834           optimize = 1;
13835           /* Turn off -Os.  */
13836           optimize_for_space = 0;
13837         }
13838       else if (*arg == 's')
13839         {
13840           optimize_for_space = 1;
13841           /* Turn on all encoding optimizations.  */
13842           optimize = INT_MAX;
13843         }
13844       else
13845         {
13846           optimize = atoi (arg);
13847           /* Turn off -Os.  */
13848           optimize_for_space = 0;
13849         }
13850       break;
13851
13852     default:
13853       return 0;
13854     }
13855   return 1;
13856 }
13857
13858 #define MESSAGE_TEMPLATE \
13859 "                                                                                "
13860
13861 static char *
13862 output_message (FILE *stream, char *p, char *message, char *start,
13863                 int *left_p, const char *name, int len)
13864 {
13865   int size = sizeof (MESSAGE_TEMPLATE);
13866   int left = *left_p;
13867
13868   /* Reserve 2 spaces for ", " or ",\0" */
13869   left -= len + 2;
13870
13871   /* Check if there is any room.  */
13872   if (left >= 0)
13873     {
13874       if (p != start)
13875         {
13876           *p++ = ',';
13877           *p++ = ' ';
13878         }
13879       p = mempcpy (p, name, len);
13880     }
13881   else
13882     {
13883       /* Output the current message now and start a new one.  */
13884       *p++ = ',';
13885       *p = '\0';
13886       fprintf (stream, "%s\n", message);
13887       p = start;
13888       left = size - (start - message) - len - 2;
13889
13890       gas_assert (left >= 0);
13891
13892       p = mempcpy (p, name, len);
13893     }
13894
13895   *left_p = left;
13896   return p;
13897 }
13898
13899 static void
13900 show_arch (FILE *stream, int ext, int check)
13901 {
13902   static char message[] = MESSAGE_TEMPLATE;
13903   char *start = message + 27;
13904   char *p;
13905   int size = sizeof (MESSAGE_TEMPLATE);
13906   int left;
13907   const char *name;
13908   int len;
13909   unsigned int j;
13910
13911   p = start;
13912   left = size - (start - message);
13913
13914   if (!ext && check)
13915     {
13916       p = output_message (stream, p, message, start, &left,
13917                           STRING_COMMA_LEN ("default"));
13918       p = output_message (stream, p, message, start, &left,
13919                           STRING_COMMA_LEN ("push"));
13920       p = output_message (stream, p, message, start, &left,
13921                           STRING_COMMA_LEN ("pop"));
13922     }
13923
13924   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13925     {
13926       /* Should it be skipped?  */
13927       if (cpu_arch [j].skip)
13928         continue;
13929
13930       name = cpu_arch [j].name;
13931       len = cpu_arch [j].len;
13932       if (cpu_arch[j].type == PROCESSOR_NONE)
13933         {
13934           /* It is an extension.  Skip if we aren't asked to show it.  */
13935           if (!ext || cpu_flags_all_zero (&cpu_arch[j].enable))
13936             continue;
13937         }
13938       else if (ext)
13939         {
13940           /* It is an processor.  Skip if we show only extension.  */
13941           continue;
13942         }
13943       else if (check && ! cpu_arch[j].enable.bitfield.cpui386)
13944         {
13945           /* It is an impossible processor - skip.  */
13946           continue;
13947         }
13948
13949       p = output_message (stream, p, message, start, &left, name, len);
13950     }
13951
13952   /* Display disabled extensions.  */
13953   if (ext)
13954     for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13955       {
13956         char *str;
13957
13958         if (cpu_arch[j].type != PROCESSOR_NONE
13959             || !cpu_flags_all_zero (&cpu_arch[j].enable))
13960           continue;
13961         str = xasprintf ("no%s", cpu_arch[j].name);
13962         p = output_message (stream, p, message, start, &left, str,
13963                             strlen (str));
13964         free (str);
13965       }
13966
13967   *p = '\0';
13968   fprintf (stream, "%s\n", message);
13969 }
13970
13971 void
13972 md_show_usage (FILE *stream)
13973 {
13974 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13975   fprintf (stream, _("\
13976   -Qy, -Qn                ignored\n\
13977   -V                      print assembler version number\n\
13978   -k                      ignored\n"));
13979 #endif
13980   fprintf (stream, _("\
13981   -n                      do not optimize code alignment\n\
13982   -O{012s}                attempt some code optimizations\n\
13983   -q                      quieten some warnings\n"));
13984 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13985   fprintf (stream, _("\
13986   -s                      ignored\n"));
13987 #endif
13988 #ifdef BFD64
13989 # if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13990   fprintf (stream, _("\
13991   --32/--64/--x32         generate 32bit/64bit/x32 object\n"));
13992 # elif defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O)
13993   fprintf (stream, _("\
13994   --32/--64               generate 32bit/64bit object\n"));
13995 # endif
13996 #endif
13997 #ifdef SVR4_COMMENT_CHARS
13998   fprintf (stream, _("\
13999   --divide                do not treat `/' as a comment character\n"));
14000 #else
14001   fprintf (stream, _("\
14002   --divide                ignored\n"));
14003 #endif
14004   fprintf (stream, _("\
14005   -march=CPU[,+EXTENSION...]\n\
14006                           generate code for CPU and EXTENSION, CPU is one of:\n"));
14007   show_arch (stream, 0, 1);
14008   fprintf (stream, _("\
14009                           EXTENSION is combination of (possibly \"no\"-prefixed):\n"));
14010   show_arch (stream, 1, 0);
14011   fprintf (stream, _("\
14012   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
14013   show_arch (stream, 0, 0);
14014   fprintf (stream, _("\
14015   -msse2avx               encode SSE instructions with VEX prefix\n"));
14016   fprintf (stream, _("\
14017   -muse-unaligned-vector-move\n\
14018                           encode aligned vector move as unaligned vector move\n"));
14019   fprintf (stream, _("\
14020   -msse-check=[none|error|warning] (default: warning)\n\
14021                           check SSE instructions\n"));
14022   fprintf (stream, _("\
14023   -moperand-check=[none|error|warning] (default: warning)\n\
14024                           check operand combinations for validity\n"));
14025   fprintf (stream, _("\
14026   -mavxscalar=[128|256] (default: 128)\n\
14027                           encode scalar AVX instructions with specific vector\n\
14028                            length\n"));
14029   fprintf (stream, _("\
14030   -mvexwig=[0|1] (default: 0)\n\
14031                           encode VEX instructions with specific VEX.W value\n\
14032                            for VEX.W bit ignored instructions\n"));
14033   fprintf (stream, _("\
14034   -mevexlig=[128|256|512] (default: 128)\n\
14035                           encode scalar EVEX instructions with specific vector\n\
14036                            length\n"));
14037   fprintf (stream, _("\
14038   -mevexwig=[0|1] (default: 0)\n\
14039                           encode EVEX instructions with specific EVEX.W value\n\
14040                            for EVEX.W bit ignored instructions\n"));
14041   fprintf (stream, _("\
14042   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
14043                           encode EVEX instructions with specific EVEX.RC value\n\
14044                            for SAE-only ignored instructions\n"));
14045   fprintf (stream, _("\
14046   -mmnemonic=[att|intel] "));
14047   if (SYSV386_COMPAT)
14048     fprintf (stream, _("(default: att)\n"));
14049   else
14050     fprintf (stream, _("(default: intel)\n"));
14051   fprintf (stream, _("\
14052                           use AT&T/Intel mnemonic\n"));
14053   fprintf (stream, _("\
14054   -msyntax=[att|intel] (default: att)\n\
14055                           use AT&T/Intel syntax\n"));
14056   fprintf (stream, _("\
14057   -mindex-reg             support pseudo index registers\n"));
14058   fprintf (stream, _("\
14059   -mnaked-reg             don't require `%%' prefix for registers\n"));
14060   fprintf (stream, _("\
14061   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
14062 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14063   fprintf (stream, _("\
14064   -mshared                disable branch optimization for shared code\n"));
14065   fprintf (stream, _("\
14066   -mx86-used-note=[no|yes] "));
14067   if (DEFAULT_X86_USED_NOTE)
14068     fprintf (stream, _("(default: yes)\n"));
14069   else
14070     fprintf (stream, _("(default: no)\n"));
14071   fprintf (stream, _("\
14072                           generate x86 used ISA and feature properties\n"));
14073 #endif
14074 #if defined (TE_PE) || defined (TE_PEP)
14075   fprintf (stream, _("\
14076   -mbig-obj               generate big object files\n"));
14077 #endif
14078   fprintf (stream, _("\
14079   -momit-lock-prefix=[no|yes] (default: no)\n\
14080                           strip all lock prefixes\n"));
14081   fprintf (stream, _("\
14082   -mfence-as-lock-add=[no|yes] (default: no)\n\
14083                           encode lfence, mfence and sfence as\n\
14084                            lock addl $0x0, (%%{re}sp)\n"));
14085   fprintf (stream, _("\
14086   -mrelax-relocations=[no|yes] "));
14087   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
14088     fprintf (stream, _("(default: yes)\n"));
14089   else
14090     fprintf (stream, _("(default: no)\n"));
14091   fprintf (stream, _("\
14092                           generate relax relocations\n"));
14093   fprintf (stream, _("\
14094   -malign-branch-boundary=NUM (default: 0)\n\
14095                           align branches within NUM byte boundary\n"));
14096   fprintf (stream, _("\
14097   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
14098                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
14099                            indirect\n\
14100                           specify types of branches to align\n"));
14101   fprintf (stream, _("\
14102   -malign-branch-prefix-size=NUM (default: 5)\n\
14103                           align branches with NUM prefixes per instruction\n"));
14104   fprintf (stream, _("\
14105   -mbranches-within-32B-boundaries\n\
14106                           align branches within 32 byte boundary\n"));
14107   fprintf (stream, _("\
14108   -mlfence-after-load=[no|yes] (default: no)\n\
14109                           generate lfence after load\n"));
14110   fprintf (stream, _("\
14111   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
14112                           generate lfence before indirect near branch\n"));
14113   fprintf (stream, _("\
14114   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
14115                           generate lfence before ret\n"));
14116   fprintf (stream, _("\
14117   -mamd64                 accept only AMD64 ISA [default]\n"));
14118   fprintf (stream, _("\
14119   -mintel64               accept only Intel64 ISA\n"));
14120 }
14121
14122 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
14123      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
14124      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
14125
14126 /* Pick the target format to use.  */
14127
14128 const char *
14129 i386_target_format (void)
14130 {
14131   if (startswith (default_arch, "x86_64"))
14132     {
14133       update_code_flag (CODE_64BIT, 1);
14134       if (default_arch[6] == '\0')
14135         x86_elf_abi = X86_64_ABI;
14136       else
14137         x86_elf_abi = X86_64_X32_ABI;
14138     }
14139   else if (!strcmp (default_arch, "i386"))
14140     update_code_flag (CODE_32BIT, 1);
14141   else if (!strcmp (default_arch, "iamcu"))
14142     {
14143       update_code_flag (CODE_32BIT, 1);
14144       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
14145         {
14146           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
14147           cpu_arch_name = "iamcu";
14148           free (cpu_sub_arch_name);
14149           cpu_sub_arch_name = NULL;
14150           cpu_arch_flags = iamcu_flags;
14151           cpu_arch_isa = PROCESSOR_IAMCU;
14152           cpu_arch_isa_flags = iamcu_flags;
14153           if (!cpu_arch_tune_set)
14154             {
14155               cpu_arch_tune = cpu_arch_isa;
14156               cpu_arch_tune_flags = cpu_arch_isa_flags;
14157             }
14158         }
14159       else if (cpu_arch_isa != PROCESSOR_IAMCU)
14160         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
14161                   cpu_arch_name);
14162     }
14163   else
14164     as_fatal (_("unknown architecture"));
14165
14166   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
14167     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
14168   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
14169     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].enable;
14170
14171   switch (OUTPUT_FLAVOR)
14172     {
14173 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
14174     case bfd_target_aout_flavour:
14175       return AOUT_TARGET_FORMAT;
14176 #endif
14177 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
14178 # if defined (TE_PE) || defined (TE_PEP)
14179     case bfd_target_coff_flavour:
14180       if (flag_code == CODE_64BIT)
14181         {
14182           object_64bit = 1;
14183           return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
14184         }
14185       return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
14186 # elif defined (TE_GO32)
14187     case bfd_target_coff_flavour:
14188       return "coff-go32";
14189 # else
14190     case bfd_target_coff_flavour:
14191       return "coff-i386";
14192 # endif
14193 #endif
14194 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14195     case bfd_target_elf_flavour:
14196       {
14197         const char *format;
14198
14199         switch (x86_elf_abi)
14200           {
14201           default:
14202             format = ELF_TARGET_FORMAT;
14203 #ifndef TE_SOLARIS
14204             tls_get_addr = "___tls_get_addr";
14205 #endif
14206             break;
14207           case X86_64_ABI:
14208             use_rela_relocations = 1;
14209             object_64bit = 1;
14210 #ifndef TE_SOLARIS
14211             tls_get_addr = "__tls_get_addr";
14212 #endif
14213             format = ELF_TARGET_FORMAT64;
14214             break;
14215           case X86_64_X32_ABI:
14216             use_rela_relocations = 1;
14217             object_64bit = 1;
14218 #ifndef TE_SOLARIS
14219             tls_get_addr = "__tls_get_addr";
14220 #endif
14221             disallow_64bit_reloc = 1;
14222             format = ELF_TARGET_FORMAT32;
14223             break;
14224           }
14225         if (cpu_arch_isa == PROCESSOR_IAMCU)
14226           {
14227             if (x86_elf_abi != I386_ABI)
14228               as_fatal (_("Intel MCU is 32bit only"));
14229             return ELF_TARGET_IAMCU_FORMAT;
14230           }
14231         else
14232           return format;
14233       }
14234 #endif
14235 #if defined (OBJ_MACH_O)
14236     case bfd_target_mach_o_flavour:
14237       if (flag_code == CODE_64BIT)
14238         {
14239           use_rela_relocations = 1;
14240           object_64bit = 1;
14241           return "mach-o-x86-64";
14242         }
14243       else
14244         return "mach-o-i386";
14245 #endif
14246     default:
14247       abort ();
14248       return NULL;
14249     }
14250 }
14251
14252 #endif /* OBJ_MAYBE_ more than one  */
14253 \f
14254 symbolS *
14255 md_undefined_symbol (char *name)
14256 {
14257   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
14258       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
14259       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
14260       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
14261     {
14262       if (!GOT_symbol)
14263         {
14264           if (symbol_find (name))
14265             as_bad (_("GOT already in symbol table"));
14266           GOT_symbol = symbol_new (name, undefined_section,
14267                                    &zero_address_frag, 0);
14268         };
14269       return GOT_symbol;
14270     }
14271   return 0;
14272 }
14273
14274 /* Round up a section size to the appropriate boundary.  */
14275
14276 valueT
14277 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
14278 {
14279 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
14280   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
14281     {
14282       /* For a.out, force the section size to be aligned.  If we don't do
14283          this, BFD will align it for us, but it will not write out the
14284          final bytes of the section.  This may be a bug in BFD, but it is
14285          easier to fix it here since that is how the other a.out targets
14286          work.  */
14287       int align;
14288
14289       align = bfd_section_alignment (segment);
14290       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
14291     }
14292 #endif
14293
14294   return size;
14295 }
14296
14297 /* On the i386, PC-relative offsets are relative to the start of the
14298    next instruction.  That is, the address of the offset, plus its
14299    size, since the offset is always the last part of the insn.  */
14300
14301 long
14302 md_pcrel_from (fixS *fixP)
14303 {
14304   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
14305 }
14306
14307 #ifndef I386COFF
14308
14309 static void
14310 s_bss (int ignore ATTRIBUTE_UNUSED)
14311 {
14312   int temp;
14313
14314 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14315   if (IS_ELF)
14316     obj_elf_section_change_hook ();
14317 #endif
14318   temp = get_absolute_expression ();
14319   subseg_set (bss_section, (subsegT) temp);
14320   demand_empty_rest_of_line ();
14321 }
14322
14323 #endif
14324
14325 /* Remember constant directive.  */
14326
14327 void
14328 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
14329 {
14330   if (last_insn.kind != last_insn_directive
14331       && (bfd_section_flags (now_seg) & SEC_CODE))
14332     {
14333       last_insn.seg = now_seg;
14334       last_insn.kind = last_insn_directive;
14335       last_insn.name = "constant directive";
14336       last_insn.file = as_where (&last_insn.line);
14337       if (lfence_before_ret != lfence_before_ret_none)
14338         {
14339           if (lfence_before_indirect_branch != lfence_branch_none)
14340             as_warn (_("constant directive skips -mlfence-before-ret "
14341                        "and -mlfence-before-indirect-branch"));
14342           else
14343             as_warn (_("constant directive skips -mlfence-before-ret"));
14344         }
14345       else if (lfence_before_indirect_branch != lfence_branch_none)
14346         as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
14347     }
14348 }
14349
14350 int
14351 i386_validate_fix (fixS *fixp)
14352 {
14353   if (fixp->fx_addsy && S_GET_SEGMENT(fixp->fx_addsy) == reg_section)
14354     {
14355       reloc_howto_type *howto;
14356
14357       howto = bfd_reloc_type_lookup (stdoutput, fixp->fx_r_type);
14358       as_bad_where (fixp->fx_file, fixp->fx_line,
14359                     _("invalid %s relocation against register"),
14360                     howto ? howto->name : "<unknown>");
14361       return 0;
14362     }
14363
14364 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14365   if (fixp->fx_r_type == BFD_RELOC_SIZE32
14366       || fixp->fx_r_type == BFD_RELOC_SIZE64)
14367     return IS_ELF && fixp->fx_addsy
14368            && (!S_IS_DEFINED (fixp->fx_addsy)
14369                || S_IS_EXTERNAL (fixp->fx_addsy));
14370 #endif
14371
14372   if (fixp->fx_subsy)
14373     {
14374       if (fixp->fx_subsy == GOT_symbol)
14375         {
14376           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
14377             {
14378               if (!object_64bit)
14379                 abort ();
14380 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14381               if (fixp->fx_tcbit2)
14382                 fixp->fx_r_type = (fixp->fx_tcbit
14383                                    ? BFD_RELOC_X86_64_REX_GOTPCRELX
14384                                    : BFD_RELOC_X86_64_GOTPCRELX);
14385               else
14386 #endif
14387                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
14388             }
14389           else
14390             {
14391               if (!object_64bit)
14392                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
14393               else
14394                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
14395             }
14396           fixp->fx_subsy = 0;
14397         }
14398     }
14399 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14400   else
14401     {
14402       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
14403          to section.  Since PLT32 relocation must be against symbols,
14404          turn such PLT32 relocation into PC32 relocation.  */
14405       if (fixp->fx_addsy
14406           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
14407               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
14408           && symbol_section_p (fixp->fx_addsy))
14409         fixp->fx_r_type = BFD_RELOC_32_PCREL;
14410       if (!object_64bit)
14411         {
14412           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
14413               && fixp->fx_tcbit2)
14414             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
14415         }
14416     }
14417 #endif
14418
14419   return 1;
14420 }
14421
14422 arelent *
14423 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
14424 {
14425   arelent *rel;
14426   bfd_reloc_code_real_type code;
14427
14428   switch (fixp->fx_r_type)
14429     {
14430 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14431       symbolS *sym;
14432
14433     case BFD_RELOC_SIZE32:
14434     case BFD_RELOC_SIZE64:
14435       if (fixp->fx_addsy
14436           && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))
14437           && (!fixp->fx_subsy
14438               || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))))
14439         sym = fixp->fx_addsy;
14440       else if (fixp->fx_subsy
14441                && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))
14442                && (!fixp->fx_addsy
14443                    || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))))
14444         sym = fixp->fx_subsy;
14445       else
14446         sym = NULL;
14447       if (IS_ELF && sym && S_IS_DEFINED (sym) && !S_IS_EXTERNAL (sym))
14448         {
14449           /* Resolve size relocation against local symbol to size of
14450              the symbol plus addend.  */
14451           valueT value = S_GET_SIZE (sym);
14452
14453           if (symbol_get_bfdsym (sym)->flags & BSF_SECTION_SYM)
14454             value = bfd_section_size (S_GET_SEGMENT (sym));
14455           if (sym == fixp->fx_subsy)
14456             {
14457               value = -value;
14458               if (fixp->fx_addsy)
14459                 value += S_GET_VALUE (fixp->fx_addsy);
14460             }
14461           else if (fixp->fx_subsy)
14462             value -= S_GET_VALUE (fixp->fx_subsy);
14463           value += fixp->fx_offset;
14464           if (fixp->fx_r_type == BFD_RELOC_SIZE32
14465               && object_64bit
14466               && !fits_in_unsigned_long (value))
14467             as_bad_where (fixp->fx_file, fixp->fx_line,
14468                           _("symbol size computation overflow"));
14469           fixp->fx_addsy = NULL;
14470           fixp->fx_subsy = NULL;
14471           md_apply_fix (fixp, (valueT *) &value, NULL);
14472           return NULL;
14473         }
14474       if (!fixp->fx_addsy || fixp->fx_subsy)
14475         {
14476           as_bad_where (fixp->fx_file, fixp->fx_line,
14477                         "unsupported expression involving @size");
14478           return NULL;
14479         }
14480 #endif
14481       /* Fall through.  */
14482
14483     case BFD_RELOC_X86_64_PLT32:
14484     case BFD_RELOC_X86_64_GOT32:
14485     case BFD_RELOC_X86_64_GOTPCREL:
14486     case BFD_RELOC_X86_64_GOTPCRELX:
14487     case BFD_RELOC_X86_64_REX_GOTPCRELX:
14488     case BFD_RELOC_386_PLT32:
14489     case BFD_RELOC_386_GOT32:
14490     case BFD_RELOC_386_GOT32X:
14491     case BFD_RELOC_386_GOTOFF:
14492     case BFD_RELOC_386_GOTPC:
14493     case BFD_RELOC_386_TLS_GD:
14494     case BFD_RELOC_386_TLS_LDM:
14495     case BFD_RELOC_386_TLS_LDO_32:
14496     case BFD_RELOC_386_TLS_IE_32:
14497     case BFD_RELOC_386_TLS_IE:
14498     case BFD_RELOC_386_TLS_GOTIE:
14499     case BFD_RELOC_386_TLS_LE_32:
14500     case BFD_RELOC_386_TLS_LE:
14501     case BFD_RELOC_386_TLS_GOTDESC:
14502     case BFD_RELOC_386_TLS_DESC_CALL:
14503     case BFD_RELOC_X86_64_TLSGD:
14504     case BFD_RELOC_X86_64_TLSLD:
14505     case BFD_RELOC_X86_64_DTPOFF32:
14506     case BFD_RELOC_X86_64_DTPOFF64:
14507     case BFD_RELOC_X86_64_GOTTPOFF:
14508     case BFD_RELOC_X86_64_TPOFF32:
14509     case BFD_RELOC_X86_64_TPOFF64:
14510     case BFD_RELOC_X86_64_GOTOFF64:
14511     case BFD_RELOC_X86_64_GOTPC32:
14512     case BFD_RELOC_X86_64_GOT64:
14513     case BFD_RELOC_X86_64_GOTPCREL64:
14514     case BFD_RELOC_X86_64_GOTPC64:
14515     case BFD_RELOC_X86_64_GOTPLT64:
14516     case BFD_RELOC_X86_64_PLTOFF64:
14517     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14518     case BFD_RELOC_X86_64_TLSDESC_CALL:
14519     case BFD_RELOC_RVA:
14520     case BFD_RELOC_VTABLE_ENTRY:
14521     case BFD_RELOC_VTABLE_INHERIT:
14522 #ifdef TE_PE
14523     case BFD_RELOC_32_SECREL:
14524     case BFD_RELOC_16_SECIDX:
14525 #endif
14526       code = fixp->fx_r_type;
14527       break;
14528     case BFD_RELOC_X86_64_32S:
14529       if (!fixp->fx_pcrel)
14530         {
14531           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
14532           code = fixp->fx_r_type;
14533           break;
14534         }
14535       /* Fall through.  */
14536     default:
14537       if (fixp->fx_pcrel)
14538         {
14539           switch (fixp->fx_size)
14540             {
14541             default:
14542               as_bad_where (fixp->fx_file, fixp->fx_line,
14543                             _("can not do %d byte pc-relative relocation"),
14544                             fixp->fx_size);
14545               code = BFD_RELOC_32_PCREL;
14546               break;
14547             case 1: code = BFD_RELOC_8_PCREL;  break;
14548             case 2: code = BFD_RELOC_16_PCREL; break;
14549             case 4: code = BFD_RELOC_32_PCREL; break;
14550 #ifdef BFD64
14551             case 8: code = BFD_RELOC_64_PCREL; break;
14552 #endif
14553             }
14554         }
14555       else
14556         {
14557           switch (fixp->fx_size)
14558             {
14559             default:
14560               as_bad_where (fixp->fx_file, fixp->fx_line,
14561                             _("can not do %d byte relocation"),
14562                             fixp->fx_size);
14563               code = BFD_RELOC_32;
14564               break;
14565             case 1: code = BFD_RELOC_8;  break;
14566             case 2: code = BFD_RELOC_16; break;
14567             case 4: code = BFD_RELOC_32; break;
14568 #ifdef BFD64
14569             case 8: code = BFD_RELOC_64; break;
14570 #endif
14571             }
14572         }
14573       break;
14574     }
14575
14576   if ((code == BFD_RELOC_32
14577        || code == BFD_RELOC_32_PCREL
14578        || code == BFD_RELOC_X86_64_32S)
14579       && GOT_symbol
14580       && fixp->fx_addsy == GOT_symbol)
14581     {
14582       if (!object_64bit)
14583         code = BFD_RELOC_386_GOTPC;
14584       else
14585         code = BFD_RELOC_X86_64_GOTPC32;
14586     }
14587   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
14588       && GOT_symbol
14589       && fixp->fx_addsy == GOT_symbol)
14590     {
14591       code = BFD_RELOC_X86_64_GOTPC64;
14592     }
14593
14594   rel = XNEW (arelent);
14595   rel->sym_ptr_ptr = XNEW (asymbol *);
14596   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
14597
14598   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
14599
14600   if (!use_rela_relocations)
14601     {
14602       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
14603          vtable entry to be used in the relocation's section offset.  */
14604       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
14605         rel->address = fixp->fx_offset;
14606 #if defined (OBJ_COFF) && defined (TE_PE)
14607       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
14608         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
14609       else
14610 #endif
14611       rel->addend = 0;
14612     }
14613   /* Use the rela in 64bit mode.  */
14614   else
14615     {
14616       if (disallow_64bit_reloc)
14617         switch (code)
14618           {
14619           case BFD_RELOC_X86_64_DTPOFF64:
14620           case BFD_RELOC_X86_64_TPOFF64:
14621           case BFD_RELOC_64_PCREL:
14622           case BFD_RELOC_X86_64_GOTOFF64:
14623           case BFD_RELOC_X86_64_GOT64:
14624           case BFD_RELOC_X86_64_GOTPCREL64:
14625           case BFD_RELOC_X86_64_GOTPC64:
14626           case BFD_RELOC_X86_64_GOTPLT64:
14627           case BFD_RELOC_X86_64_PLTOFF64:
14628             as_bad_where (fixp->fx_file, fixp->fx_line,
14629                           _("cannot represent relocation type %s in x32 mode"),
14630                           bfd_get_reloc_code_name (code));
14631             break;
14632           default:
14633             break;
14634           }
14635
14636       if (!fixp->fx_pcrel)
14637         rel->addend = fixp->fx_offset;
14638       else
14639         switch (code)
14640           {
14641           case BFD_RELOC_X86_64_PLT32:
14642           case BFD_RELOC_X86_64_GOT32:
14643           case BFD_RELOC_X86_64_GOTPCREL:
14644           case BFD_RELOC_X86_64_GOTPCRELX:
14645           case BFD_RELOC_X86_64_REX_GOTPCRELX:
14646           case BFD_RELOC_X86_64_TLSGD:
14647           case BFD_RELOC_X86_64_TLSLD:
14648           case BFD_RELOC_X86_64_GOTTPOFF:
14649           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14650           case BFD_RELOC_X86_64_TLSDESC_CALL:
14651             rel->addend = fixp->fx_offset - fixp->fx_size;
14652             break;
14653           default:
14654             rel->addend = (section->vma
14655                            - fixp->fx_size
14656                            + fixp->fx_addnumber
14657                            + md_pcrel_from (fixp));
14658             break;
14659           }
14660     }
14661
14662   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
14663   if (rel->howto == NULL)
14664     {
14665       as_bad_where (fixp->fx_file, fixp->fx_line,
14666                     _("cannot represent relocation type %s"),
14667                     bfd_get_reloc_code_name (code));
14668       /* Set howto to a garbage value so that we can keep going.  */
14669       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
14670       gas_assert (rel->howto != NULL);
14671     }
14672
14673   return rel;
14674 }
14675
14676 #include "tc-i386-intel.c"
14677
14678 void
14679 tc_x86_parse_to_dw2regnum (expressionS *exp)
14680 {
14681   int saved_naked_reg;
14682   char saved_register_dot;
14683
14684   saved_naked_reg = allow_naked_reg;
14685   allow_naked_reg = 1;
14686   saved_register_dot = register_chars['.'];
14687   register_chars['.'] = '.';
14688   allow_pseudo_reg = 1;
14689   expression_and_evaluate (exp);
14690   allow_pseudo_reg = 0;
14691   register_chars['.'] = saved_register_dot;
14692   allow_naked_reg = saved_naked_reg;
14693
14694   if (exp->X_op == O_register && exp->X_add_number >= 0)
14695     {
14696       if ((addressT) exp->X_add_number < i386_regtab_size)
14697         {
14698           exp->X_op = O_constant;
14699           exp->X_add_number = i386_regtab[exp->X_add_number]
14700                               .dw2_regnum[flag_code >> 1];
14701         }
14702       else
14703         exp->X_op = O_illegal;
14704     }
14705 }
14706
14707 void
14708 tc_x86_frame_initial_instructions (void)
14709 {
14710   static unsigned int sp_regno[2];
14711
14712   if (!sp_regno[flag_code >> 1])
14713     {
14714       char *saved_input = input_line_pointer;
14715       char sp[][4] = {"esp", "rsp"};
14716       expressionS exp;
14717
14718       input_line_pointer = sp[flag_code >> 1];
14719       tc_x86_parse_to_dw2regnum (&exp);
14720       gas_assert (exp.X_op == O_constant);
14721       sp_regno[flag_code >> 1] = exp.X_add_number;
14722       input_line_pointer = saved_input;
14723     }
14724
14725   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
14726   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
14727 }
14728
14729 int
14730 x86_dwarf2_addr_size (void)
14731 {
14732 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14733   if (x86_elf_abi == X86_64_X32_ABI)
14734     return 4;
14735 #endif
14736   return bfd_arch_bits_per_address (stdoutput) / 8;
14737 }
14738
14739 int
14740 i386_elf_section_type (const char *str, size_t len)
14741 {
14742   if (flag_code == CODE_64BIT
14743       && len == sizeof ("unwind") - 1
14744       && startswith (str, "unwind"))
14745     return SHT_X86_64_UNWIND;
14746
14747   return -1;
14748 }
14749
14750 #ifdef TE_SOLARIS
14751 void
14752 i386_solaris_fix_up_eh_frame (segT sec)
14753 {
14754   if (flag_code == CODE_64BIT)
14755     elf_section_type (sec) = SHT_X86_64_UNWIND;
14756 }
14757 #endif
14758
14759 #ifdef TE_PE
14760 void
14761 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
14762 {
14763   expressionS exp;
14764
14765   exp.X_op = O_secrel;
14766   exp.X_add_symbol = symbol;
14767   exp.X_add_number = 0;
14768   emit_expr (&exp, size);
14769 }
14770 #endif
14771
14772 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14773 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
14774
14775 bfd_vma
14776 x86_64_section_letter (int letter, const char **ptr_msg)
14777 {
14778   if (flag_code == CODE_64BIT)
14779     {
14780       if (letter == 'l')
14781         return SHF_X86_64_LARGE;
14782
14783       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
14784     }
14785   else
14786     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
14787   return -1;
14788 }
14789
14790 bfd_vma
14791 x86_64_section_word (char *str, size_t len)
14792 {
14793   if (len == 5 && flag_code == CODE_64BIT && startswith (str, "large"))
14794     return SHF_X86_64_LARGE;
14795
14796   return -1;
14797 }
14798
14799 static void
14800 handle_large_common (int small ATTRIBUTE_UNUSED)
14801 {
14802   if (flag_code != CODE_64BIT)
14803     {
14804       s_comm_internal (0, elf_common_parse);
14805       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
14806     }
14807   else
14808     {
14809       static segT lbss_section;
14810       asection *saved_com_section_ptr = elf_com_section_ptr;
14811       asection *saved_bss_section = bss_section;
14812
14813       if (lbss_section == NULL)
14814         {
14815           flagword applicable;
14816           segT seg = now_seg;
14817           subsegT subseg = now_subseg;
14818
14819           /* The .lbss section is for local .largecomm symbols.  */
14820           lbss_section = subseg_new (".lbss", 0);
14821           applicable = bfd_applicable_section_flags (stdoutput);
14822           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
14823           seg_info (lbss_section)->bss = 1;
14824
14825           subseg_set (seg, subseg);
14826         }
14827
14828       elf_com_section_ptr = &_bfd_elf_large_com_section;
14829       bss_section = lbss_section;
14830
14831       s_comm_internal (0, elf_common_parse);
14832
14833       elf_com_section_ptr = saved_com_section_ptr;
14834       bss_section = saved_bss_section;
14835     }
14836 }
14837 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */