gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2023 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
  23    x86_64 support by Jan Hubicka (jh@suse.cz)
  24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "gen-sframe.h"
  34 #include "sframe.h"
  35 #include "elf/x86-64.h"
  36 #include "opcodes/i386-init.h"
  37 #include "opcodes/i386-mnem.h"
  38 #include <limits.h>
  39
  40 #ifndef INFER_ADDR_PREFIX
  41 #define INFER_ADDR_PREFIX 1
  42 #endif
  43
  44 #ifndef DEFAULT_ARCH
  45 #define DEFAULT_ARCH "i386"
  46 #endif
  47
  48 #ifndef INLINE
  49 #if __GNUC__ >= 2
  50 #define INLINE __inline__
  51 #else
  52 #define INLINE
  53 #endif
  54 #endif
  55
  56 /* Prefixes will be emitted in the order defined below.
  57    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  58    instruction, and so must come before any prefixes.
  59    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  60    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  61 #define WAIT_PREFIX     0
  62 #define SEG_PREFIX      1
  63 #define ADDR_PREFIX     2
  64 #define DATA_PREFIX     3
  65 #define REP_PREFIX      4
  66 #define HLE_PREFIX      REP_PREFIX
  67 #define BND_PREFIX      REP_PREFIX
  68 #define LOCK_PREFIX     5
  69 #define REX_PREFIX      6       /* must come last.  */
  70 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  71
  72 /* we define the syntax here (modulo base,index,scale syntax) */
  73 #define REGISTER_PREFIX '%'
  74 #define IMMEDIATE_PREFIX '$'
  75 #define ABSOLUTE_PREFIX '*'
  76
  77 /* these are the instruction mnemonic suffixes in AT&T syntax or
  78    memory operand size in Intel syntax.  */
  79 #define WORD_MNEM_SUFFIX  'w'
  80 #define BYTE_MNEM_SUFFIX  'b'
  81 #define SHORT_MNEM_SUFFIX 's'
  82 #define LONG_MNEM_SUFFIX  'l'
  83 #define QWORD_MNEM_SUFFIX  'q'
  84
  85 #define END_OF_INSN '\0'
  86
  87 #define OPERAND_TYPE_NONE { .bitfield = { .class = ClassNone } }
  88
  89 /* This matches the C -> StaticRounding alias in the opcode table.  */
  90 #define commutative staticrounding
  91
  92 /*
  93   'templates' is for grouping together 'template' structures for opcodes
  94   of the same name.  This is only used for storing the insns in the grand
  95   ole hash table of insns.
  96   The templates themselves start at START and range up to (but not including)
  97   END.
  98   */
  99 typedef struct
 100 {
 101   const insn_template *start;
 102   const insn_template *end;
 103 }
 104 templates;
 105
 106 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 107 typedef struct
 108 {
 109   unsigned int regmem;  /* codes register or memory operand */
 110   unsigned int reg;     /* codes register operand (or extended opcode) */
 111   unsigned int mode;    /* how to interpret regmem & reg */
 112 }
 113 modrm_byte;
 114
 115 /* x86-64 extension prefix.  */
 116 typedef int rex_byte;
 117
 118 /* 386 opcode byte to code indirect addressing.  */
 119 typedef struct
 120 {
 121   unsigned base;
 122   unsigned index;
 123   unsigned scale;
 124 }
 125 sib_byte;
 126
 127 /* x86 arch names, types and features */
 128 typedef struct
 129 {
 130   const char *name;             /* arch name */
 131   unsigned int len:8;           /* arch string length */
 132   bool skip:1;                  /* show_arch should skip this. */
 133   enum processor_type type;     /* arch type */
 134   i386_cpu_flags enable;                /* cpu feature enable flags */
 135   i386_cpu_flags disable;       /* cpu feature disable flags */
 136 }
 137 arch_entry;
 138
 139 static void update_code_flag (int, int);
 140 static void set_code_flag (int);
 141 static void set_16bit_gcc_code_flag (int);
 142 static void set_intel_syntax (int);
 143 static void set_intel_mnemonic (int);
 144 static void set_allow_index_reg (int);
 145 static void set_check (int);
 146 static void set_cpu_arch (int);
 147 #ifdef TE_PE
 148 static void pe_directive_secrel (int);
 149 static void pe_directive_secidx (int);
 150 #endif
 151 static void signed_cons (int);
 152 static char *output_invalid (int c);
 153 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 154                                     const char *);
 155 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 156                                        const char *);
 157 static int i386_att_operand (char *);
 158 static int i386_intel_operand (char *, int);
 159 static int i386_intel_simplify (expressionS *);
 160 static int i386_intel_parse_name (const char *, expressionS *);
 161 static const reg_entry *parse_register (char *, char **);
 162 static const char *parse_insn (const char *, char *);
 163 static char *parse_operands (char *, const char *);
 164 static void swap_operands (void);
 165 static void swap_2_operands (unsigned int, unsigned int);
 166 static enum flag_code i386_addressing_mode (void);
 167 static void optimize_imm (void);
 168 static void optimize_disp (void);
 169 static const insn_template *match_template (char);
 170 static int check_string (void);
 171 static int process_suffix (void);
 172 static int check_byte_reg (void);
 173 static int check_long_reg (void);
 174 static int check_qword_reg (void);
 175 static int check_word_reg (void);
 176 static int finalize_imm (void);
 177 static int process_operands (void);
 178 static const reg_entry *build_modrm_byte (void);
 179 static void output_insn (void);
 180 static void output_imm (fragS *, offsetT);
 181 static void output_disp (fragS *, offsetT);
 182 #ifndef I386COFF
 183 static void s_bss (int);
 184 #endif
 185 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 186 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 187
 188 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 189 static unsigned int x86_isa_1_used;
 190 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 191 static unsigned int x86_feature_2_used;
 192 /* Generate x86 used ISA and feature properties.  */
 193 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 194 #endif
 195
 196 static const char *default_arch = DEFAULT_ARCH;
 197
 198 /* parse_register() returns this when a register alias cannot be used.  */
 199 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 200                                    { Dw2Inval, Dw2Inval } };
 201
 202 static const reg_entry *reg_eax;
 203 static const reg_entry *reg_ds;
 204 static const reg_entry *reg_es;
 205 static const reg_entry *reg_ss;
 206 static const reg_entry *reg_st0;
 207 static const reg_entry *reg_k0;
 208
 209 /* VEX prefix.  */
 210 typedef struct
 211 {
 212   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 213   unsigned char bytes[4];
 214   unsigned int length;
 215   /* Destination or source register specifier.  */
 216   const reg_entry *register_specifier;
 217 } vex_prefix;
 218
 219 /* 'md_assemble ()' gathers together information and puts it into a
 220    i386_insn.  */
 221
 222 union i386_op
 223   {
 224     expressionS *disps;
 225     expressionS *imms;
 226     const reg_entry *regs;
 227   };
 228
 229 enum i386_error
 230   {
 231     no_error, /* Must be first.  */
 232     operand_size_mismatch,
 233     operand_type_mismatch,
 234     register_type_mismatch,
 235     number_of_operands_mismatch,
 236     invalid_instruction_suffix,
 237     bad_imm4,
 238     unsupported_with_intel_mnemonic,
 239     unsupported_syntax,
 240     unsupported,
 241     unsupported_on_arch,
 242     unsupported_64bit,
 243     invalid_sib_address,
 244     invalid_vsib_address,
 245     invalid_vector_register_set,
 246     invalid_tmm_register_set,
 247     invalid_dest_and_src_register_set,
 248     unsupported_vector_index_register,
 249     unsupported_broadcast,
 250     broadcast_needed,
 251     unsupported_masking,
 252     mask_not_on_destination,
 253     no_default_mask,
 254     unsupported_rc_sae,
 255     invalid_register_operand,
 256   };
 257
 258 struct _i386_insn
 259   {
 260     /* TM holds the template for the insn were currently assembling.  */
 261     insn_template tm;
 262
 263     /* SUFFIX holds the instruction size suffix for byte, word, dword
 264        or qword, if given.  */
 265     char suffix;
 266
 267     /* OPCODE_LENGTH holds the number of base opcode bytes.  */
 268     unsigned char opcode_length;
 269
 270     /* OPERANDS gives the number of given operands.  */
 271     unsigned int operands;
 272
 273     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 274        of given register, displacement, memory operands and immediate
 275        operands.  */
 276     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 277
 278     /* TYPES [i] is the type (see above #defines) which tells us how to
 279        use OP[i] for the corresponding operand.  */
 280     i386_operand_type types[MAX_OPERANDS];
 281
 282     /* Displacement expression, immediate expression, or register for each
 283        operand.  */
 284     union i386_op op[MAX_OPERANDS];
 285
 286     /* Flags for operands.  */
 287     unsigned int flags[MAX_OPERANDS];
 288 #define Operand_PCrel 1
 289 #define Operand_Mem   2
 290
 291     /* Relocation type for operand */
 292     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 293
 294     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 295        the base index byte below.  */
 296     const reg_entry *base_reg;
 297     const reg_entry *index_reg;
 298     unsigned int log2_scale_factor;
 299
 300     /* SEG gives the seg_entries of this insn.  They are zero unless
 301        explicit segment overrides are given.  */
 302     const reg_entry *seg[2];
 303
 304     /* PREFIX holds all the given prefix opcodes (usually null).
 305        PREFIXES is the number of prefix opcodes.  */
 306     unsigned int prefixes;
 307     unsigned char prefix[MAX_PREFIXES];
 308
 309     /* Register is in low 3 bits of opcode.  */
 310     bool short_form;
 311
 312     /* The operand to a branch insn indicates an absolute branch.  */
 313     bool jumpabsolute;
 314
 315     /* The operand to a branch insn indicates a far branch.  */
 316     bool far_branch;
 317
 318     /* There is a memory operand of (%dx) which should be only used
 319        with input/output instructions.  */
 320     bool input_output_operand;
 321
 322     /* Extended states.  */
 323     enum
 324       {
 325         /* Use MMX state.  */
 326         xstate_mmx = 1 << 0,
 327         /* Use XMM state.  */
 328         xstate_xmm = 1 << 1,
 329         /* Use YMM state.  */
 330         xstate_ymm = 1 << 2 | xstate_xmm,
 331         /* Use ZMM state.  */
 332         xstate_zmm = 1 << 3 | xstate_ymm,
 333         /* Use TMM state.  */
 334         xstate_tmm = 1 << 4,
 335         /* Use MASK state.  */
 336         xstate_mask = 1 << 5
 337       } xstate;
 338
 339     /* Has GOTPC or TLS relocation.  */
 340     bool has_gotpc_tls_reloc;
 341
 342     /* RM and SIB are the modrm byte and the sib byte where the
 343        addressing modes of this insn are encoded.  */
 344     modrm_byte rm;
 345     rex_byte rex;
 346     rex_byte vrex;
 347     sib_byte sib;
 348     vex_prefix vex;
 349
 350     /* Masking attributes.
 351
 352        The struct describes masking, applied to OPERAND in the instruction.
 353        REG is a pointer to the corresponding mask register.  ZEROING tells
 354        whether merging or zeroing mask is used.  */
 355     struct Mask_Operation
 356     {
 357       const reg_entry *reg;
 358       unsigned int zeroing;
 359       /* The operand where this operation is associated.  */
 360       unsigned int operand;
 361     } mask;
 362
 363     /* Rounding control and SAE attributes.  */
 364     struct RC_Operation
 365     {
 366       enum rc_type
 367         {
 368           rc_none = -1,
 369           rne,
 370           rd,
 371           ru,
 372           rz,
 373           saeonly
 374         } type;
 375       /* In Intel syntax the operand modifier form is supposed to be used, but
 376          we continue to accept the immediate forms as well.  */
 377       bool modifier;
 378     } rounding;
 379
 380     /* Broadcasting attributes.
 381
 382        The struct describes broadcasting, applied to OPERAND.  TYPE is
 383        expresses the broadcast factor.  */
 384     struct Broadcast_Operation
 385     {
 386       /* Type of broadcast: {1to2}, {1to4}, {1to8}, {1to16} or {1to32}.  */
 387       unsigned int type;
 388
 389       /* Index of broadcasted operand.  */
 390       unsigned int operand;
 391
 392       /* Number of bytes to broadcast.  */
 393       unsigned int bytes;
 394     } broadcast;
 395
 396     /* Compressed disp8*N attribute.  */
 397     unsigned int memshift;
 398
 399     /* Prefer load or store in encoding.  */
 400     enum
 401       {
 402         dir_encoding_default = 0,
 403         dir_encoding_load,
 404         dir_encoding_store,
 405         dir_encoding_swap
 406       } dir_encoding;
 407
 408     /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 409     enum
 410       {
 411         disp_encoding_default = 0,
 412         disp_encoding_8bit,
 413         disp_encoding_16bit,
 414         disp_encoding_32bit
 415       } disp_encoding;
 416
 417     /* Prefer the REX byte in encoding.  */
 418     bool rex_encoding;
 419
 420     /* Disable instruction size optimization.  */
 421     bool no_optimize;
 422
 423     /* How to encode vector instructions.  */
 424     enum
 425       {
 426         vex_encoding_default = 0,
 427         vex_encoding_vex,
 428         vex_encoding_vex3,
 429         vex_encoding_evex,
 430         vex_encoding_error
 431       } vec_encoding;
 432
 433     /* REP prefix.  */
 434     const char *rep_prefix;
 435
 436     /* HLE prefix.  */
 437     const char *hle_prefix;
 438
 439     /* Have BND prefix.  */
 440     const char *bnd_prefix;
 441
 442     /* Have NOTRACK prefix.  */
 443     const char *notrack_prefix;
 444
 445     /* Error message.  */
 446     enum i386_error error;
 447   };
 448
 449 typedef struct _i386_insn i386_insn;
 450
 451 /* Link RC type with corresponding string, that'll be looked for in
 452    asm.  */
 453 struct RC_name
 454 {
 455   enum rc_type type;
 456   const char *name;
 457   unsigned int len;
 458 };
 459
 460 static const struct RC_name RC_NamesTable[] =
 461 {
 462   {  rne, STRING_COMMA_LEN ("rn-sae") },
 463   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 464   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 465   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 466   {  saeonly,  STRING_COMMA_LEN ("sae") },
 467 };
 468
 469 /* To be indexed by segment register number.  */
 470 static const unsigned char i386_seg_prefixes[] = {
 471   ES_PREFIX_OPCODE,
 472   CS_PREFIX_OPCODE,
 473   SS_PREFIX_OPCODE,
 474   DS_PREFIX_OPCODE,
 475   FS_PREFIX_OPCODE,
 476   GS_PREFIX_OPCODE
 477 };
 478
 479 /* List of chars besides those in app.c:symbol_chars that can start an
 480    operand.  Used to prevent the scrubber eating vital white-space.  */
 481 const char extra_symbol_chars[] = "*%-([{}"
 482 #ifdef LEX_AT
 483         "@"
 484 #endif
 485 #ifdef LEX_QM
 486         "?"
 487 #endif
 488         ;
 489
 490 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))     \
 491      && !defined (TE_GNU)                               \
 492      && !defined (TE_LINUX)                             \
 493      && !defined (TE_Haiku)                             \
 494      && !defined (TE_FreeBSD)                           \
 495      && !defined (TE_DragonFly)                         \
 496      && !defined (TE_NetBSD))
 497 /* This array holds the chars that always start a comment.  If the
 498    pre-processor is disabled, these aren't very useful.  The option
 499    --divide will remove '/' from this list.  */
 500 const char *i386_comment_chars = "#/";
 501 #define SVR4_COMMENT_CHARS 1
 502 #define PREFIX_SEPARATOR '\\'
 503
 504 #else
 505 const char *i386_comment_chars = "#";
 506 #define PREFIX_SEPARATOR '/'
 507 #endif
 508
 509 /* This array holds the chars that only start a comment at the beginning of
 510    a line.  If the line seems to have the form '# 123 filename'
 511    .line and .file directives will appear in the pre-processed output.
 512    Note that input_file.c hand checks for '#' at the beginning of the
 513    first line of the input file.  This is because the compiler outputs
 514    #NO_APP at the beginning of its output.
 515    Also note that comments started like this one will always work if
 516    '/' isn't otherwise defined.  */
 517 const char line_comment_chars[] = "#/";
 518
 519 const char line_separator_chars[] = ";";
 520
 521 /* Chars that can be used to separate mant from exp in floating point
 522    nums.  */
 523 const char EXP_CHARS[] = "eE";
 524
 525 /* Chars that mean this number is a floating point constant
 526    As in 0f12.456
 527    or    0d1.2345e12.  */
 528 const char FLT_CHARS[] = "fFdDxXhHbB";
 529
 530 /* Tables for lexical analysis.  */
 531 static char mnemonic_chars[256];
 532 static char register_chars[256];
 533 static char operand_chars[256];
 534 static char identifier_chars[256];
 535
 536 /* Lexical macros.  */
 537 #define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
 538 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 539 #define is_register_char(x) (register_chars[(unsigned char) x])
 540 #define is_space_char(x) ((x) == ' ')
 541 #define is_identifier_char(x) (identifier_chars[(unsigned char) x])
 542
 543 /* All non-digit non-letter characters that may occur in an operand.  */
 544 static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!:[@]";
 545
 546 /* md_assemble() always leaves the strings it's passed unaltered.  To
 547    effect this we maintain a stack of saved characters that we've smashed
 548    with '\0's (indicating end of strings for various sub-fields of the
 549    assembler instruction).  */
 550 static char save_stack[32];
 551 static char *save_stack_p;
 552 #define END_STRING_AND_SAVE(s) \
 553         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 554 #define RESTORE_END_STRING(s) \
 555         do { *(s) = *--save_stack_p; } while (0)
 556
 557 /* The instruction we're assembling.  */
 558 static i386_insn i;
 559
 560 /* Possible templates for current insn.  */
 561 static const templates *current_templates;
 562
 563 /* Per instruction expressionS buffers: max displacements & immediates.  */
 564 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 565 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 566
 567 /* Current operand we are working on.  */
 568 static int this_operand = -1;
 569
 570 /* We support four different modes.  FLAG_CODE variable is used to distinguish
 571    these.  */
 572
 573 enum flag_code {
 574         CODE_32BIT,
 575         CODE_16BIT,
 576         CODE_64BIT };
 577
 578 static enum flag_code flag_code;
 579 static unsigned int object_64bit;
 580 static unsigned int disallow_64bit_reloc;
 581 static int use_rela_relocations = 0;
 582 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 583 static const char *tls_get_addr;
 584
 585 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
 586      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
 587      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
 588
 589 /* The ELF ABI to use.  */
 590 enum x86_elf_abi
 591 {
 592   I386_ABI,
 593   X86_64_ABI,
 594   X86_64_X32_ABI
 595 };
 596
 597 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 598 #endif
 599
 600 #if defined (TE_PE) || defined (TE_PEP)
 601 /* Use big object file format.  */
 602 static int use_big_obj = 0;
 603 #endif
 604
 605 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 606 /* 1 if generating code for a shared library.  */
 607 static int shared = 0;
 608
 609 unsigned int x86_sframe_cfa_sp_reg;
 610 /* The other CFA base register for SFrame stack trace info.  */
 611 unsigned int x86_sframe_cfa_fp_reg;
 612 unsigned int x86_sframe_cfa_ra_reg;
 613
 614 #endif
 615
 616 /* 1 for intel syntax,
 617    0 if att syntax.  */
 618 static int intel_syntax = 0;
 619
 620 static enum x86_64_isa
 621 {
 622   amd64 = 1,    /* AMD64 ISA.  */
 623   intel64       /* Intel64 ISA.  */
 624 } isa64;
 625
 626 /* 1 for intel mnemonic,
 627    0 if att mnemonic.  */
 628 static int intel_mnemonic = !SYSV386_COMPAT;
 629
 630 /* 1 if pseudo registers are permitted.  */
 631 static int allow_pseudo_reg = 0;
 632
 633 /* 1 if register prefix % not required.  */
 634 static int allow_naked_reg = 0;
 635
 636 /* 1 if the assembler should add BND prefix for all control-transferring
 637    instructions supporting it, even if this prefix wasn't specified
 638    explicitly.  */
 639 static int add_bnd_prefix = 0;
 640
 641 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 642 static int allow_index_reg = 0;
 643
 644 /* 1 if the assembler should ignore LOCK prefix, even if it was
 645    specified explicitly.  */
 646 static int omit_lock_prefix = 0;
 647
 648 /* 1 if the assembler should encode lfence, mfence, and sfence as
 649    "lock addl $0, (%{re}sp)".  */
 650 static int avoid_fence = 0;
 651
 652 /* 1 if lfence should be inserted after every load.  */
 653 static int lfence_after_load = 0;
 654
 655 /* Non-zero if lfence should be inserted before indirect branch.  */
 656 static enum lfence_before_indirect_branch_kind
 657   {
 658     lfence_branch_none = 0,
 659     lfence_branch_register,
 660     lfence_branch_memory,
 661     lfence_branch_all
 662   }
 663 lfence_before_indirect_branch;
 664
 665 /* Non-zero if lfence should be inserted before ret.  */
 666 static enum lfence_before_ret_kind
 667   {
 668     lfence_before_ret_none = 0,
 669     lfence_before_ret_not,
 670     lfence_before_ret_or,
 671     lfence_before_ret_shl
 672   }
 673 lfence_before_ret;
 674
 675 /* Types of previous instruction is .byte or prefix.  */
 676 static struct
 677   {
 678     segT seg;
 679     const char *file;
 680     const char *name;
 681     unsigned int line;
 682     enum last_insn_kind
 683       {
 684         last_insn_other = 0,
 685         last_insn_directive,
 686         last_insn_prefix
 687       } kind;
 688   } last_insn;
 689
 690 /* 1 if the assembler should generate relax relocations.  */
 691
 692 static int generate_relax_relocations
 693   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 694
 695 static enum check_kind
 696   {
 697     check_none = 0,
 698     check_warning,
 699     check_error
 700   }
 701 sse_check, operand_check = check_warning;
 702
 703 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 704 static int align_branch_power = 0;
 705
 706 /* Types of branches to align.  */
 707 enum align_branch_kind
 708   {
 709     align_branch_none = 0,
 710     align_branch_jcc = 1,
 711     align_branch_fused = 2,
 712     align_branch_jmp = 3,
 713     align_branch_call = 4,
 714     align_branch_indirect = 5,
 715     align_branch_ret = 6
 716   };
 717
 718 /* Type bits of branches to align.  */
 719 enum align_branch_bit
 720   {
 721     align_branch_jcc_bit = 1 << align_branch_jcc,
 722     align_branch_fused_bit = 1 << align_branch_fused,
 723     align_branch_jmp_bit = 1 << align_branch_jmp,
 724     align_branch_call_bit = 1 << align_branch_call,
 725     align_branch_indirect_bit = 1 << align_branch_indirect,
 726     align_branch_ret_bit = 1 << align_branch_ret
 727   };
 728
 729 static unsigned int align_branch = (align_branch_jcc_bit
 730                                     | align_branch_fused_bit
 731                                     | align_branch_jmp_bit);
 732
 733 /* Types of condition jump used by macro-fusion.  */
 734 enum mf_jcc_kind
 735   {
 736     mf_jcc_jo = 0,  /* base opcode 0x70  */
 737     mf_jcc_jc,      /* base opcode 0x72  */
 738     mf_jcc_je,      /* base opcode 0x74  */
 739     mf_jcc_jna,     /* base opcode 0x76  */
 740     mf_jcc_js,      /* base opcode 0x78  */
 741     mf_jcc_jp,      /* base opcode 0x7a  */
 742     mf_jcc_jl,      /* base opcode 0x7c  */
 743     mf_jcc_jle,     /* base opcode 0x7e  */
 744   };
 745
 746 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 747 enum mf_cmp_kind
 748   {
 749     mf_cmp_test_and,  /* test/cmp */
 750     mf_cmp_alu_cmp,  /* add/sub/cmp */
 751     mf_cmp_incdec  /* inc/dec */
 752   };
 753
 754 /* The maximum padding size for fused jcc.  CMP like instruction can
 755    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 756    prefixes.   */
 757 #define MAX_FUSED_JCC_PADDING_SIZE 20
 758
 759 /* The maximum number of prefixes added for an instruction.  */
 760 static unsigned int align_branch_prefix_size = 5;
 761
 762 /* Optimization:
 763    1. Clear the REX_W bit with register operand if possible.
 764    2. Above plus use 128bit vector instruction to clear the full vector
 765       register.
 766  */
 767 static int optimize = 0;
 768
 769 /* Optimization:
 770    1. Clear the REX_W bit with register operand if possible.
 771    2. Above plus use 128bit vector instruction to clear the full vector
 772       register.
 773    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 774       "testb $imm7,%r8".
 775  */
 776 static int optimize_for_space = 0;
 777
 778 /* Register prefix used for error message.  */
 779 static const char *register_prefix = "%";
 780
 781 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 782    leave, push, and pop instructions so that gcc has the same stack
 783    frame as in 32 bit mode.  */
 784 static char stackop_size = '\0';
 785
 786 /* Non-zero to optimize code alignment.  */
 787 int optimize_align_code = 1;
 788
 789 /* Non-zero to quieten some warnings.  */
 790 static int quiet_warnings = 0;
 791
 792 /* Guard to avoid repeated warnings about non-16-bit code on 16-bit CPUs.  */
 793 static bool pre_386_16bit_warned;
 794
 795 /* CPU name.  */
 796 static const char *cpu_arch_name = NULL;
 797 static char *cpu_sub_arch_name = NULL;
 798
 799 /* CPU feature flags.  */
 800 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 801
 802 /* If we have selected a cpu we are generating instructions for.  */
 803 static int cpu_arch_tune_set = 0;
 804
 805 /* Cpu we are generating instructions for.  */
 806 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 807
 808 /* CPU feature flags of cpu we are generating instructions for.  */
 809 static i386_cpu_flags cpu_arch_tune_flags;
 810
 811 /* CPU instruction set architecture used.  */
 812 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 813
 814 /* CPU feature flags of instruction set architecture used.  */
 815 i386_cpu_flags cpu_arch_isa_flags;
 816
 817 /* If set, conditional jumps are not automatically promoted to handle
 818    larger than a byte offset.  */
 819 static bool no_cond_jump_promotion = false;
 820
 821 /* Encode SSE instructions with VEX prefix.  */
 822 static unsigned int sse2avx;
 823
 824 /* Encode aligned vector move as unaligned vector move.  */
 825 static unsigned int use_unaligned_vector_move;
 826
 827 /* Encode scalar AVX instructions with specific vector length.  */
 828 static enum
 829   {
 830     vex128 = 0,
 831     vex256
 832   } avxscalar;
 833
 834 /* Encode VEX WIG instructions with specific vex.w.  */
 835 static enum
 836   {
 837     vexw0 = 0,
 838     vexw1
 839   } vexwig;
 840
 841 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 842 static enum
 843   {
 844     evexl128 = 0,
 845     evexl256,
 846     evexl512
 847   } evexlig;
 848
 849 /* Encode EVEX WIG instructions with specific evex.w.  */
 850 static enum
 851   {
 852     evexw0 = 0,
 853     evexw1
 854   } evexwig;
 855
 856 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 857 static enum rc_type evexrcig = rne;
 858
 859 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 860 static symbolS *GOT_symbol;
 861
 862 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 863 unsigned int x86_dwarf2_return_column;
 864
 865 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 866 int x86_cie_data_alignment;
 867
 868 /* Interface to relax_segment.
 869    There are 3 major relax states for 386 jump insns because the
 870    different types of jumps add different sizes to frags when we're
 871    figuring out what sort of jump to choose to reach a given label.
 872
 873    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 874    branches which are handled by md_estimate_size_before_relax() and
 875    i386_generic_table_relax_frag().  */
 876
 877 /* Types.  */
 878 #define UNCOND_JUMP 0
 879 #define COND_JUMP 1
 880 #define COND_JUMP86 2
 881 #define BRANCH_PADDING 3
 882 #define BRANCH_PREFIX 4
 883 #define FUSED_JCC_PADDING 5
 884
 885 /* Sizes.  */
 886 #define CODE16  1
 887 #define SMALL   0
 888 #define SMALL16 (SMALL | CODE16)
 889 #define BIG     2
 890 #define BIG16   (BIG | CODE16)
 891
 892 #ifndef INLINE
 893 #ifdef __GNUC__
 894 #define INLINE __inline__
 895 #else
 896 #define INLINE
 897 #endif
 898 #endif
 899
 900 #define ENCODE_RELAX_STATE(type, size) \
 901   ((relax_substateT) (((type) << 2) | (size)))
 902 #define TYPE_FROM_RELAX_STATE(s) \
 903   ((s) >> 2)
 904 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 905     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 906
 907 /* This table is used by relax_frag to promote short jumps to long
 908    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 909    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 910    don't allow a short jump in a 32 bit code segment to be promoted to
 911    a 16 bit offset jump because it's slower (requires data size
 912    prefix), and doesn't work, unless the destination is in the bottom
 913    64k of the code segment (The top 16 bits of eip are zeroed).  */
 914
 915 const relax_typeS md_relax_table[] =
 916 {
 917   /* The fields are:
 918      1) most positive reach of this state,
 919      2) most negative reach of this state,
 920      3) how many bytes this mode will have in the variable part of the frag
 921      4) which index into the table to try if we can't fit into this one.  */
 922
 923   /* UNCOND_JUMP states.  */
 924   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 925   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 926   /* dword jmp adds 4 bytes to frag:
 927      0 extra opcode bytes, 4 displacement bytes.  */
 928   {0, 0, 4, 0},
 929   /* word jmp adds 2 byte2 to frag:
 930      0 extra opcode bytes, 2 displacement bytes.  */
 931   {0, 0, 2, 0},
 932
 933   /* COND_JUMP states.  */
 934   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 935   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 936   /* dword conditionals adds 5 bytes to frag:
 937      1 extra opcode byte, 4 displacement bytes.  */
 938   {0, 0, 5, 0},
 939   /* word conditionals add 3 bytes to frag:
 940      1 extra opcode byte, 2 displacement bytes.  */
 941   {0, 0, 3, 0},
 942
 943   /* COND_JUMP86 states.  */
 944   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 945   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 946   /* dword conditionals adds 5 bytes to frag:
 947      1 extra opcode byte, 4 displacement bytes.  */
 948   {0, 0, 5, 0},
 949   /* word conditionals add 4 bytes to frag:
 950      1 displacement byte and a 3 byte long branch insn.  */
 951   {0, 0, 4, 0}
 952 };
 953
 954 #define ARCH(n, t, f, s) \
 955   { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, CPU_ ## f ## _FLAGS, \
 956     CPU_NONE_FLAGS }
 957 #define SUBARCH(n, e, d, s) \
 958   { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, CPU_ ## e ## _FLAGS, \
 959     CPU_ ## d ## _FLAGS }
 960
 961 static const arch_entry cpu_arch[] =
 962 {
 963   /* Do not replace the first two entries - i386_target_format() and
 964      set_cpu_arch() rely on them being there in this order.  */
 965   ARCH (generic32, GENERIC32, GENERIC32, false),
 966   ARCH (generic64, GENERIC64, GENERIC64, false),
 967   ARCH (i8086, UNKNOWN, NONE, false),
 968   ARCH (i186, UNKNOWN, 186, false),
 969   ARCH (i286, UNKNOWN, 286, false),
 970   ARCH (i386, I386, 386, false),
 971   ARCH (i486, I486, 486, false),
 972   ARCH (i586, PENTIUM, 586, false),
 973   ARCH (i686, PENTIUMPRO, 686, false),
 974   ARCH (pentium, PENTIUM, 586, false),
 975   ARCH (pentiumpro, PENTIUMPRO, PENTIUMPRO, false),
 976   ARCH (pentiumii, PENTIUMPRO, P2, false),
 977   ARCH (pentiumiii, PENTIUMPRO, P3, false),
 978   ARCH (pentium4, PENTIUM4, P4, false),
 979   ARCH (prescott, NOCONA, CORE, false),
 980   ARCH (nocona, NOCONA, NOCONA, false),
 981   ARCH (yonah, CORE, CORE, true),
 982   ARCH (core, CORE, CORE, false),
 983   ARCH (merom, CORE2, CORE2, true),
 984   ARCH (core2, CORE2, CORE2, false),
 985   ARCH (corei7, COREI7, COREI7, false),
 986   ARCH (iamcu, IAMCU, IAMCU, false),
 987   ARCH (k6, K6, K6, false),
 988   ARCH (k6_2, K6, K6_2, false),
 989   ARCH (athlon, ATHLON, ATHLON, false),
 990   ARCH (sledgehammer, K8, K8, true),
 991   ARCH (opteron, K8, K8, false),
 992   ARCH (k8, K8, K8, false),
 993   ARCH (amdfam10, AMDFAM10, AMDFAM10, false),
 994   ARCH (bdver1, BD, BDVER1, false),
 995   ARCH (bdver2, BD, BDVER2, false),
 996   ARCH (bdver3, BD, BDVER3, false),
 997   ARCH (bdver4, BD, BDVER4, false),
 998   ARCH (znver1, ZNVER, ZNVER1, false),
 999   ARCH (znver2, ZNVER, ZNVER2, false),
1000   ARCH (znver3, ZNVER, ZNVER3, false),
1001   ARCH (znver4, ZNVER, ZNVER4, false),
1002   ARCH (btver1, BT, BTVER1, false),
1003   ARCH (btver2, BT, BTVER2, false),
1004
1005   SUBARCH (8087, 8087, ANY_8087, false),
1006   SUBARCH (87, NONE, ANY_8087, false), /* Disable only!  */
1007   SUBARCH (287, 287, ANY_287, false),
1008   SUBARCH (387, 387, ANY_387, false),
1009   SUBARCH (687, 687, ANY_687, false),
1010   SUBARCH (cmov, CMOV, CMOV, false),
1011   SUBARCH (fxsr, FXSR, ANY_FXSR, false),
1012   SUBARCH (mmx, MMX, ANY_MMX, false),
1013   SUBARCH (sse, SSE, ANY_SSE, false),
1014   SUBARCH (sse2, SSE2, ANY_SSE2, false),
1015   SUBARCH (sse3, SSE3, ANY_SSE3, false),
1016   SUBARCH (sse4a, SSE4A, ANY_SSE4A, false),
1017   SUBARCH (ssse3, SSSE3, ANY_SSSE3, false),
1018   SUBARCH (sse4.1, SSE4_1, ANY_SSE4_1, false),
1019   SUBARCH (sse4.2, SSE4_2, ANY_SSE4_2, false),
1020   SUBARCH (sse4, SSE4_2, ANY_SSE4_1, false),
1021   SUBARCH (avx, AVX, ANY_AVX, false),
1022   SUBARCH (avx2, AVX2, ANY_AVX2, false),
1023   SUBARCH (avx512f, AVX512F, ANY_AVX512F, false),
1024   SUBARCH (avx512cd, AVX512CD, ANY_AVX512CD, false),
1025   SUBARCH (avx512er, AVX512ER, ANY_AVX512ER, false),
1026   SUBARCH (avx512pf, AVX512PF, ANY_AVX512PF, false),
1027   SUBARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, false),
1028   SUBARCH (avx512bw, AVX512BW, ANY_AVX512BW, false),
1029   SUBARCH (avx512vl, AVX512VL, ANY_AVX512VL, false),
1030   SUBARCH (vmx, VMX, ANY_VMX, false),
1031   SUBARCH (vmfunc, VMFUNC, ANY_VMFUNC, false),
1032   SUBARCH (smx, SMX, SMX, false),
1033   SUBARCH (xsave, XSAVE, ANY_XSAVE, false),
1034   SUBARCH (xsaveopt, XSAVEOPT, ANY_XSAVEOPT, false),
1035   SUBARCH (xsavec, XSAVEC, ANY_XSAVEC, false),
1036   SUBARCH (xsaves, XSAVES, ANY_XSAVES, false),
1037   SUBARCH (aes, AES, ANY_AES, false),
1038   SUBARCH (pclmul, PCLMUL, ANY_PCLMUL, false),
1039   SUBARCH (clmul, PCLMUL, ANY_PCLMUL, true),
1040   SUBARCH (fsgsbase, FSGSBASE, FSGSBASE, false),
1041   SUBARCH (rdrnd, RDRND, RDRND, false),
1042   SUBARCH (f16c, F16C, ANY_F16C, false),
1043   SUBARCH (bmi2, BMI2, BMI2, false),
1044   SUBARCH (fma, FMA, ANY_FMA, false),
1045   SUBARCH (fma4, FMA4, ANY_FMA4, false),
1046   SUBARCH (xop, XOP, ANY_XOP, false),
1047   SUBARCH (lwp, LWP, ANY_LWP, false),
1048   SUBARCH (movbe, MOVBE, MOVBE, false),
1049   SUBARCH (cx16, CX16, CX16, false),
1050   SUBARCH (ept, EPT, ANY_EPT, false),
1051   SUBARCH (lzcnt, LZCNT, LZCNT, false),
1052   SUBARCH (popcnt, POPCNT, POPCNT, false),
1053   SUBARCH (hle, HLE, HLE, false),
1054   SUBARCH (rtm, RTM, ANY_RTM, false),
1055   SUBARCH (tsx, TSX, TSX, false),
1056   SUBARCH (invpcid, INVPCID, INVPCID, false),
1057   SUBARCH (clflush, CLFLUSH, CLFLUSH, false),
1058   SUBARCH (nop, NOP, NOP, false),
1059   SUBARCH (syscall, SYSCALL, SYSCALL, false),
1060   SUBARCH (rdtscp, RDTSCP, RDTSCP, false),
1061   SUBARCH (3dnow, 3DNOW, ANY_3DNOW, false),
1062   SUBARCH (3dnowa, 3DNOWA, ANY_3DNOWA, false),
1063   SUBARCH (padlock, PADLOCK, PADLOCK, false),
1064   SUBARCH (pacifica, SVME, ANY_SVME, true),
1065   SUBARCH (svme, SVME, ANY_SVME, false),
1066   SUBARCH (abm, ABM, ABM, false),
1067   SUBARCH (bmi, BMI, BMI, false),
1068   SUBARCH (tbm, TBM, TBM, false),
1069   SUBARCH (adx, ADX, ADX, false),
1070   SUBARCH (rdseed, RDSEED, RDSEED, false),
1071   SUBARCH (prfchw, PRFCHW, PRFCHW, false),
1072   SUBARCH (smap, SMAP, SMAP, false),
1073   SUBARCH (mpx, MPX, ANY_MPX, false),
1074   SUBARCH (sha, SHA, ANY_SHA, false),
1075   SUBARCH (clflushopt, CLFLUSHOPT, CLFLUSHOPT, false),
1076   SUBARCH (prefetchwt1, PREFETCHWT1, PREFETCHWT1, false),
1077   SUBARCH (se1, SE1, SE1, false),
1078   SUBARCH (clwb, CLWB, CLWB, false),
1079   SUBARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, false),
1080   SUBARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, false),
1081   SUBARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, false),
1082   SUBARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, false),
1083   SUBARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, false),
1084   SUBARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, false),
1085   SUBARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, false),
1086   SUBARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, false),
1087   SUBARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, false),
1088   SUBARCH (clzero, CLZERO, CLZERO, false),
1089   SUBARCH (mwaitx, MWAITX, MWAITX, false),
1090   SUBARCH (ospke, OSPKE, ANY_OSPKE, false),
1091   SUBARCH (rdpid, RDPID, RDPID, false),
1092   SUBARCH (ptwrite, PTWRITE, PTWRITE, false),
1093   SUBARCH (ibt, IBT, IBT, false),
1094   SUBARCH (shstk, SHSTK, SHSTK, false),
1095   SUBARCH (gfni, GFNI, ANY_GFNI, false),
1096   SUBARCH (vaes, VAES, ANY_VAES, false),
1097   SUBARCH (vpclmulqdq, VPCLMULQDQ, ANY_VPCLMULQDQ, false),
1098   SUBARCH (wbnoinvd, WBNOINVD, WBNOINVD, false),
1099   SUBARCH (pconfig, PCONFIG, PCONFIG, false),
1100   SUBARCH (waitpkg, WAITPKG, WAITPKG, false),
1101   SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
1102   SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
1103   SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
1104   SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
1105   SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
1106   SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
1107   SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
1108   SUBARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, false),
1109   SUBARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
1110            ANY_AVX512_VP2INTERSECT, false),
1111   SUBARCH (tdx, TDX, TDX, false),
1112   SUBARCH (enqcmd, ENQCMD, ENQCMD, false),
1113   SUBARCH (serialize, SERIALIZE, SERIALIZE, false),
1114   SUBARCH (rdpru, RDPRU, RDPRU, false),
1115   SUBARCH (mcommit, MCOMMIT, MCOMMIT, false),
1116   SUBARCH (sev_es, SEV_ES, ANY_SEV_ES, false),
1117   SUBARCH (tsxldtrk, TSXLDTRK, ANY_TSXLDTRK, false),
1118   SUBARCH (kl, KL, ANY_KL, false),
1119   SUBARCH (widekl, WIDEKL, ANY_WIDEKL, false),
1120   SUBARCH (uintr, UINTR, UINTR, false),
1121   SUBARCH (hreset, HRESET, HRESET, false),
1122   SUBARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, false),
1123   SUBARCH (prefetchi, PREFETCHI, PREFETCHI, false),
1124   SUBARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, false),
1125   SUBARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, false),
1126   SUBARCH (cmpccxadd, CMPCCXADD, CMPCCXADD, false),
1127   SUBARCH (wrmsrns, WRMSRNS, WRMSRNS, false),
1128   SUBARCH (msrlist, MSRLIST, MSRLIST, false),
1129   SUBARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, false),
1130   SUBARCH (rao_int, RAO_INT, RAO_INT, false),
1131   SUBARCH (rmpquery, RMPQUERY, ANY_RMPQUERY, false),
1132 };
1133
1134 #undef SUBARCH
1135 #undef ARCH
1136
1137 #ifdef I386COFF
1138 /* Like s_lcomm_internal in gas/read.c but the alignment string
1139    is allowed to be optional.  */
1140
1141 static symbolS *
1142 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1143 {
1144   addressT align = 0;
1145
1146   SKIP_WHITESPACE ();
1147
1148   if (needs_align
1149       && *input_line_pointer == ',')
1150     {
1151       align = parse_align (needs_align - 1);
1152
1153       if (align == (addressT) -1)
1154         return NULL;
1155     }
1156   else
1157     {
1158       if (size >= 8)
1159         align = 3;
1160       else if (size >= 4)
1161         align = 2;
1162       else if (size >= 2)
1163         align = 1;
1164       else
1165         align = 0;
1166     }
1167
1168   bss_alloc (symbolP, size, align);
1169   return symbolP;
1170 }
1171
1172 static void
1173 pe_lcomm (int needs_align)
1174 {
1175   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1176 }
1177 #endif
1178
1179 const pseudo_typeS md_pseudo_table[] =
1180 {
1181 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1182   {"align", s_align_bytes, 0},
1183 #else
1184   {"align", s_align_ptwo, 0},
1185 #endif
1186   {"arch", set_cpu_arch, 0},
1187 #ifndef I386COFF
1188   {"bss", s_bss, 0},
1189 #else
1190   {"lcomm", pe_lcomm, 1},
1191 #endif
1192   {"ffloat", float_cons, 'f'},
1193   {"dfloat", float_cons, 'd'},
1194   {"tfloat", float_cons, 'x'},
1195   {"hfloat", float_cons, 'h'},
1196   {"bfloat16", float_cons, 'b'},
1197   {"value", cons, 2},
1198   {"slong", signed_cons, 4},
1199   {"noopt", s_ignore, 0},
1200   {"optim", s_ignore, 0},
1201   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1202   {"code16", set_code_flag, CODE_16BIT},
1203   {"code32", set_code_flag, CODE_32BIT},
1204 #ifdef BFD64
1205   {"code64", set_code_flag, CODE_64BIT},
1206 #endif
1207   {"intel_syntax", set_intel_syntax, 1},
1208   {"att_syntax", set_intel_syntax, 0},
1209   {"intel_mnemonic", set_intel_mnemonic, 1},
1210   {"att_mnemonic", set_intel_mnemonic, 0},
1211   {"allow_index_reg", set_allow_index_reg, 1},
1212   {"disallow_index_reg", set_allow_index_reg, 0},
1213   {"sse_check", set_check, 0},
1214   {"operand_check", set_check, 1},
1215 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1216   {"largecomm", handle_large_common, 0},
1217 #else
1218   {"file", dwarf2_directive_file, 0},
1219   {"loc", dwarf2_directive_loc, 0},
1220   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1221 #endif
1222 #ifdef TE_PE
1223   {"secrel32", pe_directive_secrel, 0},
1224   {"secidx", pe_directive_secidx, 0},
1225 #endif
1226   {0, 0, 0}
1227 };
1228
1229 /* For interface with expression ().  */
1230 extern char *input_line_pointer;
1231
1232 /* Hash table for instruction mnemonic lookup.  */
1233 static htab_t op_hash;
1234
1235 /* Hash table for register lookup.  */
1236 static htab_t reg_hash;
1237 \f
1238   /* Various efficient no-op patterns for aligning code labels.
1239      Note: Don't try to assemble the instructions in the comments.
1240      0L and 0w are not legal.  */
1241 static const unsigned char f32_1[] =
1242   {0x90};                               /* nop                  */
1243 static const unsigned char f32_2[] =
1244   {0x66,0x90};                          /* xchg %ax,%ax         */
1245 static const unsigned char f32_3[] =
1246   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1247 static const unsigned char f32_4[] =
1248   {0x8d,0x74,0x26,0x00};                /* leal 0(%esi,1),%esi  */
1249 static const unsigned char f32_6[] =
1250   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1251 static const unsigned char f32_7[] =
1252   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
1253 static const unsigned char f16_3[] =
1254   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1255 static const unsigned char f16_4[] =
1256   {0x8d,0xb4,0x00,0x00};                /* lea 0W(%si),%si      */
1257 static const unsigned char jump_disp8[] =
1258   {0xeb};                               /* jmp disp8           */
1259 static const unsigned char jump32_disp32[] =
1260   {0xe9};                               /* jmp disp32          */
1261 static const unsigned char jump16_disp32[] =
1262   {0x66,0xe9};                          /* jmp disp32          */
1263 /* 32-bit NOPs patterns.  */
1264 static const unsigned char *const f32_patt[] = {
1265   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1266 };
1267 /* 16-bit NOPs patterns.  */
1268 static const unsigned char *const f16_patt[] = {
1269   f32_1, f32_2, f16_3, f16_4
1270 };
1271 /* nopl (%[re]ax) */
1272 static const unsigned char alt_3[] =
1273   {0x0f,0x1f,0x00};
1274 /* nopl 0(%[re]ax) */
1275 static const unsigned char alt_4[] =
1276   {0x0f,0x1f,0x40,0x00};
1277 /* nopl 0(%[re]ax,%[re]ax,1) */
1278 static const unsigned char alt_5[] =
1279   {0x0f,0x1f,0x44,0x00,0x00};
1280 /* nopw 0(%[re]ax,%[re]ax,1) */
1281 static const unsigned char alt_6[] =
1282   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1283 /* nopl 0L(%[re]ax) */
1284 static const unsigned char alt_7[] =
1285   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1286 /* nopl 0L(%[re]ax,%[re]ax,1) */
1287 static const unsigned char alt_8[] =
1288   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1289 /* nopw 0L(%[re]ax,%[re]ax,1) */
1290 static const unsigned char alt_9[] =
1291   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1292 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1293 static const unsigned char alt_10[] =
1294   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1295 /* data16 nopw %cs:0L(%eax,%eax,1) */
1296 static const unsigned char alt_11[] =
1297   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1298 /* 32-bit and 64-bit NOPs patterns.  */
1299 static const unsigned char *const alt_patt[] = {
1300   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1301   alt_9, alt_10, alt_11
1302 };
1303
1304 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1305    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1306
1307 static void
1308 i386_output_nops (char *where, const unsigned char *const *patt,
1309                   int count, int max_single_nop_size)
1310
1311 {
1312   /* Place the longer NOP first.  */
1313   int last;
1314   int offset;
1315   const unsigned char *nops;
1316
1317   if (max_single_nop_size < 1)
1318     {
1319       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1320                 max_single_nop_size);
1321       return;
1322     }
1323
1324   nops = patt[max_single_nop_size - 1];
1325
1326   /* Use the smaller one if the requsted one isn't available.  */
1327   if (nops == NULL)
1328     {
1329       max_single_nop_size--;
1330       nops = patt[max_single_nop_size - 1];
1331     }
1332
1333   last = count % max_single_nop_size;
1334
1335   count -= last;
1336   for (offset = 0; offset < count; offset += max_single_nop_size)
1337     memcpy (where + offset, nops, max_single_nop_size);
1338
1339   if (last)
1340     {
1341       nops = patt[last - 1];
1342       if (nops == NULL)
1343         {
1344           /* Use the smaller one plus one-byte NOP if the needed one
1345              isn't available.  */
1346           last--;
1347           nops = patt[last - 1];
1348           memcpy (where + offset, nops, last);
1349           where[offset + last] = *patt[0];
1350         }
1351       else
1352         memcpy (where + offset, nops, last);
1353     }
1354 }
1355
1356 static INLINE int
1357 fits_in_imm7 (offsetT num)
1358 {
1359   return (num & 0x7f) == num;
1360 }
1361
1362 static INLINE int
1363 fits_in_imm31 (offsetT num)
1364 {
1365   return (num & 0x7fffffff) == num;
1366 }
1367
1368 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1369    single NOP instruction LIMIT.  */
1370
1371 void
1372 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1373 {
1374   const unsigned char *const *patt = NULL;
1375   int max_single_nop_size;
1376   /* Maximum number of NOPs before switching to jump over NOPs.  */
1377   int max_number_of_nops;
1378
1379   switch (fragP->fr_type)
1380     {
1381     case rs_fill_nop:
1382     case rs_align_code:
1383       break;
1384     case rs_machine_dependent:
1385       /* Allow NOP padding for jumps and calls.  */
1386       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1387           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1388         break;
1389       /* Fall through.  */
1390     default:
1391       return;
1392     }
1393
1394   /* We need to decide which NOP sequence to use for 32bit and
1395      64bit. When -mtune= is used:
1396
1397      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1398      PROCESSOR_GENERIC32, f32_patt will be used.
1399      2. For the rest, alt_patt will be used.
1400
1401      When -mtune= isn't used, alt_patt will be used if
1402      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1403      be used.
1404
1405      When -march= or .arch is used, we can't use anything beyond
1406      cpu_arch_isa_flags.   */
1407
1408   if (flag_code == CODE_16BIT)
1409     {
1410       patt = f16_patt;
1411       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1412       /* Limit number of NOPs to 2 in 16-bit mode.  */
1413       max_number_of_nops = 2;
1414     }
1415   else
1416     {
1417       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1418         {
1419           /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1420           switch (cpu_arch_tune)
1421             {
1422             case PROCESSOR_UNKNOWN:
1423               /* We use cpu_arch_isa_flags to check if we SHOULD
1424                  optimize with nops.  */
1425               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1426                 patt = alt_patt;
1427               else
1428                 patt = f32_patt;
1429               break;
1430             case PROCESSOR_PENTIUM4:
1431             case PROCESSOR_NOCONA:
1432             case PROCESSOR_CORE:
1433             case PROCESSOR_CORE2:
1434             case PROCESSOR_COREI7:
1435             case PROCESSOR_GENERIC64:
1436             case PROCESSOR_K6:
1437             case PROCESSOR_ATHLON:
1438             case PROCESSOR_K8:
1439             case PROCESSOR_AMDFAM10:
1440             case PROCESSOR_BD:
1441             case PROCESSOR_ZNVER:
1442             case PROCESSOR_BT:
1443               patt = alt_patt;
1444               break;
1445             case PROCESSOR_I386:
1446             case PROCESSOR_I486:
1447             case PROCESSOR_PENTIUM:
1448             case PROCESSOR_PENTIUMPRO:
1449             case PROCESSOR_IAMCU:
1450             case PROCESSOR_GENERIC32:
1451               patt = f32_patt;
1452               break;
1453             case PROCESSOR_NONE:
1454               abort ();
1455             }
1456         }
1457       else
1458         {
1459           switch (fragP->tc_frag_data.tune)
1460             {
1461             case PROCESSOR_UNKNOWN:
1462               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1463                  PROCESSOR_UNKNOWN.  */
1464               abort ();
1465               break;
1466
1467             case PROCESSOR_I386:
1468             case PROCESSOR_I486:
1469             case PROCESSOR_PENTIUM:
1470             case PROCESSOR_IAMCU:
1471             case PROCESSOR_K6:
1472             case PROCESSOR_ATHLON:
1473             case PROCESSOR_K8:
1474             case PROCESSOR_AMDFAM10:
1475             case PROCESSOR_BD:
1476             case PROCESSOR_ZNVER:
1477             case PROCESSOR_BT:
1478             case PROCESSOR_GENERIC32:
1479               /* We use cpu_arch_isa_flags to check if we CAN optimize
1480                  with nops.  */
1481               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1482                 patt = alt_patt;
1483               else
1484                 patt = f32_patt;
1485               break;
1486             case PROCESSOR_PENTIUMPRO:
1487             case PROCESSOR_PENTIUM4:
1488             case PROCESSOR_NOCONA:
1489             case PROCESSOR_CORE:
1490             case PROCESSOR_CORE2:
1491             case PROCESSOR_COREI7:
1492               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1493                 patt = alt_patt;
1494               else
1495                 patt = f32_patt;
1496               break;
1497             case PROCESSOR_GENERIC64:
1498               patt = alt_patt;
1499               break;
1500             case PROCESSOR_NONE:
1501               abort ();
1502             }
1503         }
1504
1505       if (patt == f32_patt)
1506         {
1507           max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1508           /* Limit number of NOPs to 2 for older processors.  */
1509           max_number_of_nops = 2;
1510         }
1511       else
1512         {
1513           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1514           /* Limit number of NOPs to 7 for newer processors.  */
1515           max_number_of_nops = 7;
1516         }
1517     }
1518
1519   if (limit == 0)
1520     limit = max_single_nop_size;
1521
1522   if (fragP->fr_type == rs_fill_nop)
1523     {
1524       /* Output NOPs for .nop directive.  */
1525       if (limit > max_single_nop_size)
1526         {
1527           as_bad_where (fragP->fr_file, fragP->fr_line,
1528                         _("invalid single nop size: %d "
1529                           "(expect within [0, %d])"),
1530                         limit, max_single_nop_size);
1531           return;
1532         }
1533     }
1534   else if (fragP->fr_type != rs_machine_dependent)
1535     fragP->fr_var = count;
1536
1537   if ((count / max_single_nop_size) > max_number_of_nops)
1538     {
1539       /* Generate jump over NOPs.  */
1540       offsetT disp = count - 2;
1541       if (fits_in_imm7 (disp))
1542         {
1543           /* Use "jmp disp8" if possible.  */
1544           count = disp;
1545           where[0] = jump_disp8[0];
1546           where[1] = count;
1547           where += 2;
1548         }
1549       else
1550         {
1551           unsigned int size_of_jump;
1552
1553           if (flag_code == CODE_16BIT)
1554             {
1555               where[0] = jump16_disp32[0];
1556               where[1] = jump16_disp32[1];
1557               size_of_jump = 2;
1558             }
1559           else
1560             {
1561               where[0] = jump32_disp32[0];
1562               size_of_jump = 1;
1563             }
1564
1565           count -= size_of_jump + 4;
1566           if (!fits_in_imm31 (count))
1567             {
1568               as_bad_where (fragP->fr_file, fragP->fr_line,
1569                             _("jump over nop padding out of range"));
1570               return;
1571             }
1572
1573           md_number_to_chars (where + size_of_jump, count, 4);
1574           where += size_of_jump + 4;
1575         }
1576     }
1577
1578   /* Generate multiple NOPs.  */
1579   i386_output_nops (where, patt, count, limit);
1580 }
1581
1582 static INLINE int
1583 operand_type_all_zero (const union i386_operand_type *x)
1584 {
1585   switch (ARRAY_SIZE(x->array))
1586     {
1587     case 3:
1588       if (x->array[2])
1589         return 0;
1590       /* Fall through.  */
1591     case 2:
1592       if (x->array[1])
1593         return 0;
1594       /* Fall through.  */
1595     case 1:
1596       return !x->array[0];
1597     default:
1598       abort ();
1599     }
1600 }
1601
1602 static INLINE void
1603 operand_type_set (union i386_operand_type *x, unsigned int v)
1604 {
1605   switch (ARRAY_SIZE(x->array))
1606     {
1607     case 3:
1608       x->array[2] = v;
1609       /* Fall through.  */
1610     case 2:
1611       x->array[1] = v;
1612       /* Fall through.  */
1613     case 1:
1614       x->array[0] = v;
1615       /* Fall through.  */
1616       break;
1617     default:
1618       abort ();
1619     }
1620
1621   x->bitfield.class = ClassNone;
1622   x->bitfield.instance = InstanceNone;
1623 }
1624
1625 static INLINE int
1626 operand_type_equal (const union i386_operand_type *x,
1627                     const union i386_operand_type *y)
1628 {
1629   switch (ARRAY_SIZE(x->array))
1630     {
1631     case 3:
1632       if (x->array[2] != y->array[2])
1633         return 0;
1634       /* Fall through.  */
1635     case 2:
1636       if (x->array[1] != y->array[1])
1637         return 0;
1638       /* Fall through.  */
1639     case 1:
1640       return x->array[0] == y->array[0];
1641       break;
1642     default:
1643       abort ();
1644     }
1645 }
1646
1647 static INLINE int
1648 cpu_flags_all_zero (const union i386_cpu_flags *x)
1649 {
1650   switch (ARRAY_SIZE(x->array))
1651     {
1652     case 5:
1653       if (x->array[4])
1654         return 0;
1655       /* Fall through.  */
1656     case 4:
1657       if (x->array[3])
1658         return 0;
1659       /* Fall through.  */
1660     case 3:
1661       if (x->array[2])
1662         return 0;
1663       /* Fall through.  */
1664     case 2:
1665       if (x->array[1])
1666         return 0;
1667       /* Fall through.  */
1668     case 1:
1669       return !x->array[0];
1670     default:
1671       abort ();
1672     }
1673 }
1674
1675 static INLINE int
1676 cpu_flags_equal (const union i386_cpu_flags *x,
1677                  const union i386_cpu_flags *y)
1678 {
1679   switch (ARRAY_SIZE(x->array))
1680     {
1681     case 5:
1682       if (x->array[4] != y->array[4])
1683         return 0;
1684       /* Fall through.  */
1685     case 4:
1686       if (x->array[3] != y->array[3])
1687         return 0;
1688       /* Fall through.  */
1689     case 3:
1690       if (x->array[2] != y->array[2])
1691         return 0;
1692       /* Fall through.  */
1693     case 2:
1694       if (x->array[1] != y->array[1])
1695         return 0;
1696       /* Fall through.  */
1697     case 1:
1698       return x->array[0] == y->array[0];
1699       break;
1700     default:
1701       abort ();
1702     }
1703 }
1704
1705 static INLINE int
1706 cpu_flags_check_cpu64 (i386_cpu_flags f)
1707 {
1708   return !((flag_code == CODE_64BIT && f.bitfield.cpuno64)
1709            || (flag_code != CODE_64BIT && f.bitfield.cpu64));
1710 }
1711
1712 static INLINE i386_cpu_flags
1713 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1714 {
1715   switch (ARRAY_SIZE (x.array))
1716     {
1717     case 5:
1718       x.array [4] &= y.array [4];
1719       /* Fall through.  */
1720     case 4:
1721       x.array [3] &= y.array [3];
1722       /* Fall through.  */
1723     case 3:
1724       x.array [2] &= y.array [2];
1725       /* Fall through.  */
1726     case 2:
1727       x.array [1] &= y.array [1];
1728       /* Fall through.  */
1729     case 1:
1730       x.array [0] &= y.array [0];
1731       break;
1732     default:
1733       abort ();
1734     }
1735   return x;
1736 }
1737
1738 static INLINE i386_cpu_flags
1739 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1740 {
1741   switch (ARRAY_SIZE (x.array))
1742     {
1743     case 5:
1744       x.array [4] |= y.array [4];
1745       /* Fall through.  */
1746     case 4:
1747       x.array [3] |= y.array [3];
1748       /* Fall through.  */
1749     case 3:
1750       x.array [2] |= y.array [2];
1751       /* Fall through.  */
1752     case 2:
1753       x.array [1] |= y.array [1];
1754       /* Fall through.  */
1755     case 1:
1756       x.array [0] |= y.array [0];
1757       break;
1758     default:
1759       abort ();
1760     }
1761   return x;
1762 }
1763
1764 static INLINE i386_cpu_flags
1765 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1766 {
1767   switch (ARRAY_SIZE (x.array))
1768     {
1769     case 5:
1770       x.array [4] &= ~y.array [4];
1771       /* Fall through.  */
1772     case 4:
1773       x.array [3] &= ~y.array [3];
1774       /* Fall through.  */
1775     case 3:
1776       x.array [2] &= ~y.array [2];
1777       /* Fall through.  */
1778     case 2:
1779       x.array [1] &= ~y.array [1];
1780       /* Fall through.  */
1781     case 1:
1782       x.array [0] &= ~y.array [0];
1783       break;
1784     default:
1785       abort ();
1786     }
1787   return x;
1788 }
1789
1790 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1791
1792 #define CPU_FLAGS_ARCH_MATCH            0x1
1793 #define CPU_FLAGS_64BIT_MATCH           0x2
1794
1795 #define CPU_FLAGS_PERFECT_MATCH \
1796   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1797
1798 /* Return CPU flags match bits. */
1799
1800 static int
1801 cpu_flags_match (const insn_template *t)
1802 {
1803   i386_cpu_flags x = t->cpu_flags;
1804   int match = cpu_flags_check_cpu64 (x) ? CPU_FLAGS_64BIT_MATCH : 0;
1805
1806   x.bitfield.cpu64 = 0;
1807   x.bitfield.cpuno64 = 0;
1808
1809   if (cpu_flags_all_zero (&x))
1810     {
1811       /* This instruction is available on all archs.  */
1812       match |= CPU_FLAGS_ARCH_MATCH;
1813     }
1814   else
1815     {
1816       /* This instruction is available only on some archs.  */
1817       i386_cpu_flags cpu = cpu_arch_flags;
1818
1819       /* AVX512VL is no standalone feature - match it and then strip it.  */
1820       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1821         return match;
1822       x.bitfield.cpuavx512vl = 0;
1823
1824       /* AVX and AVX2 present at the same time express an operand size
1825          dependency - strip AVX2 for the purposes here.  The operand size
1826          dependent check occurs in check_vecOperands().  */
1827       if (x.bitfield.cpuavx && x.bitfield.cpuavx2)
1828         x.bitfield.cpuavx2 = 0;
1829
1830       cpu = cpu_flags_and (x, cpu);
1831       if (!cpu_flags_all_zero (&cpu))
1832         {
1833           if (x.bitfield.cpuavx)
1834             {
1835               /* We need to check a few extra flags with AVX.  */
1836               if (cpu.bitfield.cpuavx
1837                   && (!t->opcode_modifier.sse2avx
1838                       || (sse2avx && !i.prefix[DATA_PREFIX]))
1839                   && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1840                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1841                   && (!x.bitfield.cpupclmul || cpu.bitfield.cpupclmul))
1842                 match |= CPU_FLAGS_ARCH_MATCH;
1843             }
1844           else if (x.bitfield.cpuavx512f)
1845             {
1846               /* We need to check a few extra flags with AVX512F.  */
1847               if (cpu.bitfield.cpuavx512f
1848                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1849                   && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1850                   && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1851                 match |= CPU_FLAGS_ARCH_MATCH;
1852             }
1853           else
1854             match |= CPU_FLAGS_ARCH_MATCH;
1855         }
1856     }
1857   return match;
1858 }
1859
1860 static INLINE i386_operand_type
1861 operand_type_and (i386_operand_type x, i386_operand_type y)
1862 {
1863   if (x.bitfield.class != y.bitfield.class)
1864     x.bitfield.class = ClassNone;
1865   if (x.bitfield.instance != y.bitfield.instance)
1866     x.bitfield.instance = InstanceNone;
1867
1868   switch (ARRAY_SIZE (x.array))
1869     {
1870     case 3:
1871       x.array [2] &= y.array [2];
1872       /* Fall through.  */
1873     case 2:
1874       x.array [1] &= y.array [1];
1875       /* Fall through.  */
1876     case 1:
1877       x.array [0] &= y.array [0];
1878       break;
1879     default:
1880       abort ();
1881     }
1882   return x;
1883 }
1884
1885 static INLINE i386_operand_type
1886 operand_type_and_not (i386_operand_type x, i386_operand_type y)
1887 {
1888   gas_assert (y.bitfield.class == ClassNone);
1889   gas_assert (y.bitfield.instance == InstanceNone);
1890
1891   switch (ARRAY_SIZE (x.array))
1892     {
1893     case 3:
1894       x.array [2] &= ~y.array [2];
1895       /* Fall through.  */
1896     case 2:
1897       x.array [1] &= ~y.array [1];
1898       /* Fall through.  */
1899     case 1:
1900       x.array [0] &= ~y.array [0];
1901       break;
1902     default:
1903       abort ();
1904     }
1905   return x;
1906 }
1907
1908 static INLINE i386_operand_type
1909 operand_type_or (i386_operand_type x, i386_operand_type y)
1910 {
1911   gas_assert (x.bitfield.class == ClassNone ||
1912               y.bitfield.class == ClassNone ||
1913               x.bitfield.class == y.bitfield.class);
1914   gas_assert (x.bitfield.instance == InstanceNone ||
1915               y.bitfield.instance == InstanceNone ||
1916               x.bitfield.instance == y.bitfield.instance);
1917
1918   switch (ARRAY_SIZE (x.array))
1919     {
1920     case 3:
1921       x.array [2] |= y.array [2];
1922       /* Fall through.  */
1923     case 2:
1924       x.array [1] |= y.array [1];
1925       /* Fall through.  */
1926     case 1:
1927       x.array [0] |= y.array [0];
1928       break;
1929     default:
1930       abort ();
1931     }
1932   return x;
1933 }
1934
1935 static INLINE i386_operand_type
1936 operand_type_xor (i386_operand_type x, i386_operand_type y)
1937 {
1938   gas_assert (y.bitfield.class == ClassNone);
1939   gas_assert (y.bitfield.instance == InstanceNone);
1940
1941   switch (ARRAY_SIZE (x.array))
1942     {
1943     case 3:
1944       x.array [2] ^= y.array [2];
1945       /* Fall through.  */
1946     case 2:
1947       x.array [1] ^= y.array [1];
1948       /* Fall through.  */
1949     case 1:
1950       x.array [0] ^= y.array [0];
1951       break;
1952     default:
1953       abort ();
1954     }
1955   return x;
1956 }
1957
1958 static const i386_operand_type anydisp = {
1959   .bitfield = { .disp8 = 1, .disp16 = 1, .disp32 = 1, .disp64 = 1 }
1960 };
1961
1962 enum operand_type
1963 {
1964   reg,
1965   imm,
1966   disp,
1967   anymem
1968 };
1969
1970 static INLINE int
1971 operand_type_check (i386_operand_type t, enum operand_type c)
1972 {
1973   switch (c)
1974     {
1975     case reg:
1976       return t.bitfield.class == Reg;
1977
1978     case imm:
1979       return (t.bitfield.imm8
1980               || t.bitfield.imm8s
1981               || t.bitfield.imm16
1982               || t.bitfield.imm32
1983               || t.bitfield.imm32s
1984               || t.bitfield.imm64);
1985
1986     case disp:
1987       return (t.bitfield.disp8
1988               || t.bitfield.disp16
1989               || t.bitfield.disp32
1990               || t.bitfield.disp64);
1991
1992     case anymem:
1993       return (t.bitfield.disp8
1994               || t.bitfield.disp16
1995               || t.bitfield.disp32
1996               || t.bitfield.disp64
1997               || t.bitfield.baseindex);
1998
1999     default:
2000       abort ();
2001     }
2002
2003   return 0;
2004 }
2005
2006 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2007    between operand GIVEN and opeand WANTED for instruction template T.  */
2008
2009 static INLINE int
2010 match_operand_size (const insn_template *t, unsigned int wanted,
2011                     unsigned int given)
2012 {
2013   return !((i.types[given].bitfield.byte
2014             && !t->operand_types[wanted].bitfield.byte)
2015            || (i.types[given].bitfield.word
2016                && !t->operand_types[wanted].bitfield.word)
2017            || (i.types[given].bitfield.dword
2018                && !t->operand_types[wanted].bitfield.dword)
2019            || (i.types[given].bitfield.qword
2020                && (!t->operand_types[wanted].bitfield.qword
2021                    /* Don't allow 64-bit (memory) operands outside of 64-bit
2022                       mode, when they're used where a 64-bit GPR could also
2023                       be used.  Checking is needed for Intel Syntax only.  */
2024                    || (intel_syntax
2025                        && flag_code != CODE_64BIT
2026                        && (t->operand_types[wanted].bitfield.class == Reg
2027                            || t->operand_types[wanted].bitfield.class == Accum
2028                            || t->opcode_modifier.isstring))))
2029            || (i.types[given].bitfield.tbyte
2030                && !t->operand_types[wanted].bitfield.tbyte));
2031 }
2032
2033 /* Return 1 if there is no conflict in SIMD register between operand
2034    GIVEN and opeand WANTED for instruction template T.  */
2035
2036 static INLINE int
2037 match_simd_size (const insn_template *t, unsigned int wanted,
2038                  unsigned int given)
2039 {
2040   return !((i.types[given].bitfield.xmmword
2041             && !t->operand_types[wanted].bitfield.xmmword)
2042            || (i.types[given].bitfield.ymmword
2043                && !t->operand_types[wanted].bitfield.ymmword)
2044            || (i.types[given].bitfield.zmmword
2045                && !t->operand_types[wanted].bitfield.zmmword)
2046            || (i.types[given].bitfield.tmmword
2047                && !t->operand_types[wanted].bitfield.tmmword));
2048 }
2049
2050 /* Return 1 if there is no conflict in any size between operand GIVEN
2051    and opeand WANTED for instruction template T.  */
2052
2053 static INLINE int
2054 match_mem_size (const insn_template *t, unsigned int wanted,
2055                 unsigned int given)
2056 {
2057   return (match_operand_size (t, wanted, given)
2058           && !((i.types[given].bitfield.unspecified
2059                 && !i.broadcast.type
2060                 && !i.broadcast.bytes
2061                 && !t->operand_types[wanted].bitfield.unspecified)
2062                || (i.types[given].bitfield.fword
2063                    && !t->operand_types[wanted].bitfield.fword)
2064                /* For scalar opcode templates to allow register and memory
2065                   operands at the same time, some special casing is needed
2066                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2067                   down-conversion vpmov*.  */
2068                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2069                     && t->operand_types[wanted].bitfield.byte
2070                        + t->operand_types[wanted].bitfield.word
2071                        + t->operand_types[wanted].bitfield.dword
2072                        + t->operand_types[wanted].bitfield.qword
2073                        > !!t->opcode_modifier.broadcast)
2074                    ? (i.types[given].bitfield.xmmword
2075                       || i.types[given].bitfield.ymmword
2076                       || i.types[given].bitfield.zmmword)
2077                    : !match_simd_size(t, wanted, given))));
2078 }
2079
2080 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2081    operands for instruction template T, and it has MATCH_REVERSE set if there
2082    is no size conflict on any operands for the template with operands reversed
2083    (and the template allows for reversing in the first place).  */
2084
2085 #define MATCH_STRAIGHT 1
2086 #define MATCH_REVERSE  2
2087
2088 static INLINE unsigned int
2089 operand_size_match (const insn_template *t)
2090 {
2091   unsigned int j, match = MATCH_STRAIGHT;
2092
2093   /* Don't check non-absolute jump instructions.  */
2094   if (t->opcode_modifier.jump
2095       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2096     return match;
2097
2098   /* Check memory and accumulator operand size.  */
2099   for (j = 0; j < i.operands; j++)
2100     {
2101       if (i.types[j].bitfield.class != Reg
2102           && i.types[j].bitfield.class != RegSIMD
2103           && t->opcode_modifier.operandconstraint == ANY_SIZE)
2104         continue;
2105
2106       if (t->operand_types[j].bitfield.class == Reg
2107           && !match_operand_size (t, j, j))
2108         {
2109           match = 0;
2110           break;
2111         }
2112
2113       if (t->operand_types[j].bitfield.class == RegSIMD
2114           && !match_simd_size (t, j, j))
2115         {
2116           match = 0;
2117           break;
2118         }
2119
2120       if (t->operand_types[j].bitfield.instance == Accum
2121           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2122         {
2123           match = 0;
2124           break;
2125         }
2126
2127       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2128         {
2129           match = 0;
2130           break;
2131         }
2132     }
2133
2134   if (!t->opcode_modifier.d)
2135     return match;
2136
2137   /* Check reverse.  */
2138   gas_assert ((i.operands >= 2 && i.operands <= 3)
2139               || t->opcode_modifier.vexsources);
2140
2141   for (j = 0; j < i.operands; j++)
2142     {
2143       unsigned int given = i.operands - j - 1;
2144
2145       /* For 4- and 5-operand insns VEX.W controls just the first two
2146          register operands.  */
2147       if (t->opcode_modifier.vexsources)
2148         given = j < 2 ? 1 - j : j;
2149
2150       if (t->operand_types[j].bitfield.class == Reg
2151           && !match_operand_size (t, j, given))
2152         return match;
2153
2154       if (t->operand_types[j].bitfield.class == RegSIMD
2155           && !match_simd_size (t, j, given))
2156         return match;
2157
2158       if (t->operand_types[j].bitfield.instance == Accum
2159           && (!match_operand_size (t, j, given)
2160               || !match_simd_size (t, j, given)))
2161         return match;
2162
2163       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2164         return match;
2165     }
2166
2167   return match | MATCH_REVERSE;
2168 }
2169
2170 static INLINE int
2171 operand_type_match (i386_operand_type overlap,
2172                     i386_operand_type given)
2173 {
2174   i386_operand_type temp = overlap;
2175
2176   temp.bitfield.unspecified = 0;
2177   temp.bitfield.byte = 0;
2178   temp.bitfield.word = 0;
2179   temp.bitfield.dword = 0;
2180   temp.bitfield.fword = 0;
2181   temp.bitfield.qword = 0;
2182   temp.bitfield.tbyte = 0;
2183   temp.bitfield.xmmword = 0;
2184   temp.bitfield.ymmword = 0;
2185   temp.bitfield.zmmword = 0;
2186   temp.bitfield.tmmword = 0;
2187   if (operand_type_all_zero (&temp))
2188     goto mismatch;
2189
2190   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2191     return 1;
2192
2193  mismatch:
2194   i.error = operand_type_mismatch;
2195   return 0;
2196 }
2197
2198 /* If given types g0 and g1 are registers they must be of the same type
2199    unless the expected operand type register overlap is null.
2200    Intel syntax sized memory operands are also checked here.  */
2201
2202 static INLINE int
2203 operand_type_register_match (i386_operand_type g0,
2204                              i386_operand_type t0,
2205                              i386_operand_type g1,
2206                              i386_operand_type t1)
2207 {
2208   if (g0.bitfield.class != Reg
2209       && g0.bitfield.class != RegSIMD
2210       && (g0.bitfield.unspecified
2211           || !operand_type_check (g0, anymem)))
2212     return 1;
2213
2214   if (g1.bitfield.class != Reg
2215       && g1.bitfield.class != RegSIMD
2216       && (g1.bitfield.unspecified
2217           || !operand_type_check (g1, anymem)))
2218     return 1;
2219
2220   if (g0.bitfield.byte == g1.bitfield.byte
2221       && g0.bitfield.word == g1.bitfield.word
2222       && g0.bitfield.dword == g1.bitfield.dword
2223       && g0.bitfield.qword == g1.bitfield.qword
2224       && g0.bitfield.xmmword == g1.bitfield.xmmword
2225       && g0.bitfield.ymmword == g1.bitfield.ymmword
2226       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2227     return 1;
2228
2229   /* If expectations overlap in no more than a single size, all is fine. */
2230   g0 = operand_type_and (t0, t1);
2231   if (g0.bitfield.byte
2232       + g0.bitfield.word
2233       + g0.bitfield.dword
2234       + g0.bitfield.qword
2235       + g0.bitfield.xmmword
2236       + g0.bitfield.ymmword
2237       + g0.bitfield.zmmword <= 1)
2238     return 1;
2239
2240   i.error = register_type_mismatch;
2241
2242   return 0;
2243 }
2244
2245 static INLINE unsigned int
2246 register_number (const reg_entry *r)
2247 {
2248   unsigned int nr = r->reg_num;
2249
2250   if (r->reg_flags & RegRex)
2251     nr += 8;
2252
2253   if (r->reg_flags & RegVRex)
2254     nr += 16;
2255
2256   return nr;
2257 }
2258
2259 static INLINE unsigned int
2260 mode_from_disp_size (i386_operand_type t)
2261 {
2262   if (t.bitfield.disp8)
2263     return 1;
2264   else if (t.bitfield.disp16
2265            || t.bitfield.disp32)
2266     return 2;
2267   else
2268     return 0;
2269 }
2270
2271 static INLINE int
2272 fits_in_signed_byte (addressT num)
2273 {
2274   return num + 0x80 <= 0xff;
2275 }
2276
2277 static INLINE int
2278 fits_in_unsigned_byte (addressT num)
2279 {
2280   return num <= 0xff;
2281 }
2282
2283 static INLINE int
2284 fits_in_unsigned_word (addressT num)
2285 {
2286   return num <= 0xffff;
2287 }
2288
2289 static INLINE int
2290 fits_in_signed_word (addressT num)
2291 {
2292   return num + 0x8000 <= 0xffff;
2293 }
2294
2295 static INLINE int
2296 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2297 {
2298 #ifndef BFD64
2299   return 1;
2300 #else
2301   return num + 0x80000000 <= 0xffffffff;
2302 #endif
2303 }                               /* fits_in_signed_long() */
2304
2305 static INLINE int
2306 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2307 {
2308 #ifndef BFD64
2309   return 1;
2310 #else
2311   return num <= 0xffffffff;
2312 #endif
2313 }                               /* fits_in_unsigned_long() */
2314
2315 static INLINE valueT extend_to_32bit_address (addressT num)
2316 {
2317 #ifdef BFD64
2318   if (fits_in_unsigned_long(num))
2319     return (num ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2320
2321   if (!fits_in_signed_long (num))
2322     return num & 0xffffffff;
2323 #endif
2324
2325   return num;
2326 }
2327
2328 static INLINE int
2329 fits_in_disp8 (offsetT num)
2330 {
2331   int shift = i.memshift;
2332   unsigned int mask;
2333
2334   if (shift == -1)
2335     abort ();
2336
2337   mask = (1 << shift) - 1;
2338
2339   /* Return 0 if NUM isn't properly aligned.  */
2340   if ((num & mask))
2341     return 0;
2342
2343   /* Check if NUM will fit in 8bit after shift.  */
2344   return fits_in_signed_byte (num >> shift);
2345 }
2346
2347 static INLINE int
2348 fits_in_imm4 (offsetT num)
2349 {
2350   return (num & 0xf) == num;
2351 }
2352
2353 static i386_operand_type
2354 smallest_imm_type (offsetT num)
2355 {
2356   i386_operand_type t;
2357
2358   operand_type_set (&t, 0);
2359   t.bitfield.imm64 = 1;
2360
2361   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2362     {
2363       /* This code is disabled on the 486 because all the Imm1 forms
2364          in the opcode table are slower on the i486.  They're the
2365          versions with the implicitly specified single-position
2366          displacement, which has another syntax if you really want to
2367          use that form.  */
2368       t.bitfield.imm1 = 1;
2369       t.bitfield.imm8 = 1;
2370       t.bitfield.imm8s = 1;
2371       t.bitfield.imm16 = 1;
2372       t.bitfield.imm32 = 1;
2373       t.bitfield.imm32s = 1;
2374     }
2375   else if (fits_in_signed_byte (num))
2376     {
2377       t.bitfield.imm8 = 1;
2378       t.bitfield.imm8s = 1;
2379       t.bitfield.imm16 = 1;
2380       t.bitfield.imm32 = 1;
2381       t.bitfield.imm32s = 1;
2382     }
2383   else if (fits_in_unsigned_byte (num))
2384     {
2385       t.bitfield.imm8 = 1;
2386       t.bitfield.imm16 = 1;
2387       t.bitfield.imm32 = 1;
2388       t.bitfield.imm32s = 1;
2389     }
2390   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2391     {
2392       t.bitfield.imm16 = 1;
2393       t.bitfield.imm32 = 1;
2394       t.bitfield.imm32s = 1;
2395     }
2396   else if (fits_in_signed_long (num))
2397     {
2398       t.bitfield.imm32 = 1;
2399       t.bitfield.imm32s = 1;
2400     }
2401   else if (fits_in_unsigned_long (num))
2402     t.bitfield.imm32 = 1;
2403
2404   return t;
2405 }
2406
2407 static offsetT
2408 offset_in_range (offsetT val, int size)
2409 {
2410   addressT mask;
2411
2412   switch (size)
2413     {
2414     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2415     case 2: mask = ((addressT) 1 << 16) - 1; break;
2416 #ifdef BFD64
2417     case 4: mask = ((addressT) 1 << 32) - 1; break;
2418 #endif
2419     case sizeof (val): return val;
2420     default: abort ();
2421     }
2422
2423   if ((val & ~mask) != 0 && (-val & ~mask) != 0)
2424     as_warn (_("0x%" PRIx64 " shortened to 0x%" PRIx64),
2425              (uint64_t) val, (uint64_t) (val & mask));
2426
2427   return val & mask;
2428 }
2429
2430 static INLINE const char *insn_name (const insn_template *t)
2431 {
2432   return &i386_mnemonics[t->mnem_off];
2433 }
2434
2435 enum PREFIX_GROUP
2436 {
2437   PREFIX_EXIST = 0,
2438   PREFIX_LOCK,
2439   PREFIX_REP,
2440   PREFIX_DS,
2441   PREFIX_OTHER
2442 };
2443
2444 /* Returns
2445    a. PREFIX_EXIST if attempting to add a prefix where one from the
2446    same class already exists.
2447    b. PREFIX_LOCK if lock prefix is added.
2448    c. PREFIX_REP if rep/repne prefix is added.
2449    d. PREFIX_DS if ds prefix is added.
2450    e. PREFIX_OTHER if other prefix is added.
2451  */
2452
2453 static enum PREFIX_GROUP
2454 add_prefix (unsigned int prefix)
2455 {
2456   enum PREFIX_GROUP ret = PREFIX_OTHER;
2457   unsigned int q;
2458
2459   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2460       && flag_code == CODE_64BIT)
2461     {
2462       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2463           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2464           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2465           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2466         ret = PREFIX_EXIST;
2467       q = REX_PREFIX;
2468     }
2469   else
2470     {
2471       switch (prefix)
2472         {
2473         default:
2474           abort ();
2475
2476         case DS_PREFIX_OPCODE:
2477           ret = PREFIX_DS;
2478           /* Fall through.  */
2479         case CS_PREFIX_OPCODE:
2480         case ES_PREFIX_OPCODE:
2481         case FS_PREFIX_OPCODE:
2482         case GS_PREFIX_OPCODE:
2483         case SS_PREFIX_OPCODE:
2484           q = SEG_PREFIX;
2485           break;
2486
2487         case REPNE_PREFIX_OPCODE:
2488         case REPE_PREFIX_OPCODE:
2489           q = REP_PREFIX;
2490           ret = PREFIX_REP;
2491           break;
2492
2493         case LOCK_PREFIX_OPCODE:
2494           q = LOCK_PREFIX;
2495           ret = PREFIX_LOCK;
2496           break;
2497
2498         case FWAIT_OPCODE:
2499           q = WAIT_PREFIX;
2500           break;
2501
2502         case ADDR_PREFIX_OPCODE:
2503           q = ADDR_PREFIX;
2504           break;
2505
2506         case DATA_PREFIX_OPCODE:
2507           q = DATA_PREFIX;
2508           break;
2509         }
2510       if (i.prefix[q] != 0)
2511         ret = PREFIX_EXIST;
2512     }
2513
2514   if (ret)
2515     {
2516       if (!i.prefix[q])
2517         ++i.prefixes;
2518       i.prefix[q] |= prefix;
2519     }
2520   else
2521     as_bad (_("same type of prefix used twice"));
2522
2523   return ret;
2524 }
2525
2526 static void
2527 update_code_flag (int value, int check)
2528 {
2529   PRINTF_LIKE ((*as_error));
2530
2531   flag_code = (enum flag_code) value;
2532   if (flag_code == CODE_64BIT)
2533     {
2534       cpu_arch_flags.bitfield.cpu64 = 1;
2535       cpu_arch_flags.bitfield.cpuno64 = 0;
2536     }
2537   else
2538     {
2539       cpu_arch_flags.bitfield.cpu64 = 0;
2540       cpu_arch_flags.bitfield.cpuno64 = 1;
2541     }
2542   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
2543     {
2544       if (check)
2545         as_error = as_fatal;
2546       else
2547         as_error = as_bad;
2548       (*as_error) (_("64bit mode not supported on `%s'."),
2549                    cpu_arch_name ? cpu_arch_name : default_arch);
2550     }
2551   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2552     {
2553       if (check)
2554         as_error = as_fatal;
2555       else
2556         as_error = as_bad;
2557       (*as_error) (_("32bit mode not supported on `%s'."),
2558                    cpu_arch_name ? cpu_arch_name : default_arch);
2559     }
2560   stackop_size = '\0';
2561 }
2562
2563 static void
2564 set_code_flag (int value)
2565 {
2566   update_code_flag (value, 0);
2567 }
2568
2569 static void
2570 set_16bit_gcc_code_flag (int new_code_flag)
2571 {
2572   flag_code = (enum flag_code) new_code_flag;
2573   if (flag_code != CODE_16BIT)
2574     abort ();
2575   cpu_arch_flags.bitfield.cpu64 = 0;
2576   cpu_arch_flags.bitfield.cpuno64 = 1;
2577   stackop_size = LONG_MNEM_SUFFIX;
2578 }
2579
2580 static void
2581 set_intel_syntax (int syntax_flag)
2582 {
2583   /* Find out if register prefixing is specified.  */
2584   int ask_naked_reg = 0;
2585
2586   SKIP_WHITESPACE ();
2587   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2588     {
2589       char *string;
2590       int e = get_symbol_name (&string);
2591
2592       if (strcmp (string, "prefix") == 0)
2593         ask_naked_reg = 1;
2594       else if (strcmp (string, "noprefix") == 0)
2595         ask_naked_reg = -1;
2596       else
2597         as_bad (_("bad argument to syntax directive."));
2598       (void) restore_line_pointer (e);
2599     }
2600   demand_empty_rest_of_line ();
2601
2602   intel_syntax = syntax_flag;
2603
2604   if (ask_naked_reg == 0)
2605     allow_naked_reg = (intel_syntax
2606                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2607   else
2608     allow_naked_reg = (ask_naked_reg < 0);
2609
2610   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2611
2612   identifier_chars['%'] = intel_syntax && allow_naked_reg ? '%' : 0;
2613   identifier_chars['$'] = intel_syntax ? '$' : 0;
2614   register_prefix = allow_naked_reg ? "" : "%";
2615 }
2616
2617 static void
2618 set_intel_mnemonic (int mnemonic_flag)
2619 {
2620   intel_mnemonic = mnemonic_flag;
2621 }
2622
2623 static void
2624 set_allow_index_reg (int flag)
2625 {
2626   allow_index_reg = flag;
2627 }
2628
2629 static void
2630 set_check (int what)
2631 {
2632   enum check_kind *kind;
2633   const char *str;
2634
2635   if (what)
2636     {
2637       kind = &operand_check;
2638       str = "operand";
2639     }
2640   else
2641     {
2642       kind = &sse_check;
2643       str = "sse";
2644     }
2645
2646   SKIP_WHITESPACE ();
2647
2648   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2649     {
2650       char *string;
2651       int e = get_symbol_name (&string);
2652
2653       if (strcmp (string, "none") == 0)
2654         *kind = check_none;
2655       else if (strcmp (string, "warning") == 0)
2656         *kind = check_warning;
2657       else if (strcmp (string, "error") == 0)
2658         *kind = check_error;
2659       else
2660         as_bad (_("bad argument to %s_check directive."), str);
2661       (void) restore_line_pointer (e);
2662     }
2663   else
2664     as_bad (_("missing argument for %s_check directive"), str);
2665
2666   demand_empty_rest_of_line ();
2667 }
2668
2669 static void
2670 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2671                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2672 {
2673 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2674   static const char *arch;
2675
2676   /* Intel MCU is only supported on ELF.  */
2677   if (!IS_ELF)
2678     return;
2679
2680   if (!arch)
2681     {
2682       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2683          use default_arch.  */
2684       arch = cpu_arch_name;
2685       if (!arch)
2686         arch = default_arch;
2687     }
2688
2689   /* If we are targeting Intel MCU, we must enable it.  */
2690   if ((get_elf_backend_data (stdoutput)->elf_machine_code == EM_IAMCU)
2691       == new_flag.bitfield.cpuiamcu)
2692     return;
2693
2694   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2695 #endif
2696 }
2697
2698 static void
2699 extend_cpu_sub_arch_name (const char *name)
2700 {
2701   if (cpu_sub_arch_name)
2702     cpu_sub_arch_name = reconcat (cpu_sub_arch_name, cpu_sub_arch_name,
2703                                   ".", name, (const char *) NULL);
2704   else
2705     cpu_sub_arch_name = concat (".", name, (const char *) NULL);
2706 }
2707
2708 static void
2709 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2710 {
2711   typedef struct arch_stack_entry
2712   {
2713     const struct arch_stack_entry *prev;
2714     const char *name;
2715     char *sub_name;
2716     i386_cpu_flags flags;
2717     i386_cpu_flags isa_flags;
2718     enum processor_type isa;
2719     enum flag_code flag_code;
2720     char stackop_size;
2721     bool no_cond_jump_promotion;
2722   } arch_stack_entry;
2723   static const arch_stack_entry *arch_stack_top;
2724
2725   SKIP_WHITESPACE ();
2726
2727   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2728     {
2729       char *s;
2730       int e = get_symbol_name (&s);
2731       const char *string = s;
2732       unsigned int j = 0;
2733       i386_cpu_flags flags;
2734
2735       if (strcmp (string, "default") == 0)
2736         {
2737           if (strcmp (default_arch, "iamcu") == 0)
2738             string = default_arch;
2739           else
2740             {
2741               static const i386_cpu_flags cpu_unknown_flags = CPU_UNKNOWN_FLAGS;
2742
2743               cpu_arch_name = NULL;
2744               free (cpu_sub_arch_name);
2745               cpu_sub_arch_name = NULL;
2746               cpu_arch_flags = cpu_unknown_flags;
2747               if (flag_code == CODE_64BIT)
2748                 {
2749                   cpu_arch_flags.bitfield.cpu64 = 1;
2750                   cpu_arch_flags.bitfield.cpuno64 = 0;
2751                 }
2752               else
2753                 {
2754                   cpu_arch_flags.bitfield.cpu64 = 0;
2755                   cpu_arch_flags.bitfield.cpuno64 = 1;
2756                 }
2757               cpu_arch_isa = PROCESSOR_UNKNOWN;
2758               cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
2759               if (!cpu_arch_tune_set)
2760                 {
2761                   cpu_arch_tune = cpu_arch_isa;
2762                   cpu_arch_tune_flags = cpu_arch_isa_flags;
2763                 }
2764
2765               j = ARRAY_SIZE (cpu_arch) + 1;
2766             }
2767         }
2768       else if (strcmp (string, "push") == 0)
2769         {
2770           arch_stack_entry *top = XNEW (arch_stack_entry);
2771
2772           top->name = cpu_arch_name;
2773           if (cpu_sub_arch_name)
2774             top->sub_name = xstrdup (cpu_sub_arch_name);
2775           else
2776             top->sub_name = NULL;
2777           top->flags = cpu_arch_flags;
2778           top->isa = cpu_arch_isa;
2779           top->isa_flags = cpu_arch_isa_flags;
2780           top->flag_code = flag_code;
2781           top->stackop_size = stackop_size;
2782           top->no_cond_jump_promotion = no_cond_jump_promotion;
2783
2784           top->prev = arch_stack_top;
2785           arch_stack_top = top;
2786
2787           (void) restore_line_pointer (e);
2788           demand_empty_rest_of_line ();
2789           return;
2790         }
2791       else if (strcmp (string, "pop") == 0)
2792         {
2793           const arch_stack_entry *top = arch_stack_top;
2794
2795           if (!top)
2796             as_bad (_(".arch stack is empty"));
2797           else if (top->flag_code != flag_code
2798                    || top->stackop_size != stackop_size)
2799             {
2800               static const unsigned int bits[] = {
2801                 [CODE_16BIT] = 16,
2802                 [CODE_32BIT] = 32,
2803                 [CODE_64BIT] = 64,
2804               };
2805
2806               as_bad (_("this `.arch pop' requires `.code%u%s' to be in effect"),
2807                       bits[top->flag_code],
2808                       top->stackop_size == LONG_MNEM_SUFFIX ? "gcc" : "");
2809             }
2810           else
2811             {
2812               arch_stack_top = top->prev;
2813
2814               cpu_arch_name = top->name;
2815               free (cpu_sub_arch_name);
2816               cpu_sub_arch_name = top->sub_name;
2817               cpu_arch_flags = top->flags;
2818               cpu_arch_isa = top->isa;
2819               cpu_arch_isa_flags = top->isa_flags;
2820               no_cond_jump_promotion = top->no_cond_jump_promotion;
2821
2822               XDELETE (top);
2823             }
2824
2825           (void) restore_line_pointer (e);
2826           demand_empty_rest_of_line ();
2827           return;
2828         }
2829
2830       for (; j < ARRAY_SIZE (cpu_arch); j++)
2831         {
2832           if (strcmp (string + (*string == '.'), cpu_arch[j].name) == 0
2833              && (*string == '.') == (cpu_arch[j].type == PROCESSOR_NONE))
2834             {
2835               if (*string != '.')
2836                 {
2837                   check_cpu_arch_compatible (string, cpu_arch[j].enable);
2838
2839                   cpu_arch_name = cpu_arch[j].name;
2840                   free (cpu_sub_arch_name);
2841                   cpu_sub_arch_name = NULL;
2842                   cpu_arch_flags = cpu_arch[j].enable;
2843                   if (flag_code == CODE_64BIT)
2844                     {
2845                       cpu_arch_flags.bitfield.cpu64 = 1;
2846                       cpu_arch_flags.bitfield.cpuno64 = 0;
2847                     }
2848                   else
2849                     {
2850                       cpu_arch_flags.bitfield.cpu64 = 0;
2851                       cpu_arch_flags.bitfield.cpuno64 = 1;
2852                     }
2853                   cpu_arch_isa = cpu_arch[j].type;
2854                   cpu_arch_isa_flags = cpu_arch[j].enable;
2855                   if (!cpu_arch_tune_set)
2856                     {
2857                       cpu_arch_tune = cpu_arch_isa;
2858                       cpu_arch_tune_flags = cpu_arch_isa_flags;
2859                     }
2860                   pre_386_16bit_warned = false;
2861                   break;
2862                 }
2863
2864               if (cpu_flags_all_zero (&cpu_arch[j].enable))
2865                 continue;
2866
2867               flags = cpu_flags_or (cpu_arch_flags,
2868                                     cpu_arch[j].enable);
2869
2870               if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2871                 {
2872                   extend_cpu_sub_arch_name (string + 1);
2873                   cpu_arch_flags = flags;
2874                   cpu_arch_isa_flags = flags;
2875                 }
2876               else
2877                 cpu_arch_isa_flags
2878                   = cpu_flags_or (cpu_arch_isa_flags,
2879                                   cpu_arch[j].enable);
2880               (void) restore_line_pointer (e);
2881               demand_empty_rest_of_line ();
2882               return;
2883             }
2884         }
2885
2886       if (startswith (string, ".no") && j >= ARRAY_SIZE (cpu_arch))
2887         {
2888           /* Disable an ISA extension.  */
2889           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
2890             if (cpu_arch[j].type == PROCESSOR_NONE
2891                 && strcmp (string + 3, cpu_arch[j].name) == 0)
2892               {
2893                 flags = cpu_flags_and_not (cpu_arch_flags,
2894                                            cpu_arch[j].disable);
2895                 if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2896                   {
2897                     extend_cpu_sub_arch_name (string + 1);
2898                     cpu_arch_flags = flags;
2899                     cpu_arch_isa_flags = flags;
2900                   }
2901                 (void) restore_line_pointer (e);
2902                 demand_empty_rest_of_line ();
2903                 return;
2904               }
2905         }
2906
2907       if (j == ARRAY_SIZE (cpu_arch))
2908         as_bad (_("no such architecture: `%s'"), string);
2909
2910       *input_line_pointer = e;
2911     }
2912   else
2913     as_bad (_("missing cpu architecture"));
2914
2915   no_cond_jump_promotion = 0;
2916   if (*input_line_pointer == ','
2917       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
2918     {
2919       char *string;
2920       char e;
2921
2922       ++input_line_pointer;
2923       e = get_symbol_name (&string);
2924
2925       if (strcmp (string, "nojumps") == 0)
2926         no_cond_jump_promotion = 1;
2927       else if (strcmp (string, "jumps") == 0)
2928         ;
2929       else
2930         as_bad (_("no such architecture modifier: `%s'"), string);
2931
2932       (void) restore_line_pointer (e);
2933     }
2934
2935   demand_empty_rest_of_line ();
2936 }
2937
2938 enum bfd_architecture
2939 i386_arch (void)
2940 {
2941   if (cpu_arch_isa == PROCESSOR_IAMCU)
2942     {
2943       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
2944           || flag_code == CODE_64BIT)
2945         as_fatal (_("Intel MCU is 32bit ELF only"));
2946       return bfd_arch_iamcu;
2947     }
2948   else
2949     return bfd_arch_i386;
2950 }
2951
2952 unsigned long
2953 i386_mach (void)
2954 {
2955   if (startswith (default_arch, "x86_64"))
2956     {
2957       if (default_arch[6] == '\0')
2958         return bfd_mach_x86_64;
2959       else
2960         return bfd_mach_x64_32;
2961     }
2962   else if (!strcmp (default_arch, "i386")
2963            || !strcmp (default_arch, "iamcu"))
2964     {
2965       if (cpu_arch_isa == PROCESSOR_IAMCU)
2966         {
2967           if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
2968             as_fatal (_("Intel MCU is 32bit ELF only"));
2969           return bfd_mach_i386_iamcu;
2970         }
2971       else
2972         return bfd_mach_i386_i386;
2973     }
2974   else
2975     as_fatal (_("unknown architecture"));
2976 }
2977 \f
2978 #include "opcodes/i386-tbl.h"
2979
2980 void
2981 md_begin (void)
2982 {
2983   /* Support pseudo prefixes like {disp32}.  */
2984   lex_type ['{'] = LEX_BEGIN_NAME;
2985
2986   /* Initialize op_hash hash table.  */
2987   op_hash = str_htab_create ();
2988
2989   {
2990     const insn_template *const *sets = i386_op_sets;
2991     const insn_template *const *end = sets + ARRAY_SIZE (i386_op_sets) - 1;
2992
2993     /* Type checks to compensate for the conversion through void * which
2994        occurs during hash table insertion / lookup.  */
2995     (void) sizeof (sets == &current_templates->start);
2996     (void) sizeof (end == &current_templates->end);
2997     for (; sets < end; ++sets)
2998       if (str_hash_insert (op_hash, insn_name (*sets), sets, 0))
2999         as_fatal (_("duplicate %s"), insn_name (*sets));
3000   }
3001
3002   /* Initialize reg_hash hash table.  */
3003   reg_hash = str_htab_create ();
3004   {
3005     const reg_entry *regtab;
3006     unsigned int regtab_size = i386_regtab_size;
3007
3008     for (regtab = i386_regtab; regtab_size--; regtab++)
3009       {
3010         switch (regtab->reg_type.bitfield.class)
3011           {
3012           case Reg:
3013             if (regtab->reg_type.bitfield.dword)
3014               {
3015                 if (regtab->reg_type.bitfield.instance == Accum)
3016                   reg_eax = regtab;
3017               }
3018             else if (regtab->reg_type.bitfield.tbyte)
3019               {
3020                 /* There's no point inserting st(<N>) in the hash table, as
3021                    parentheses aren't included in register_chars[] anyway.  */
3022                 if (regtab->reg_type.bitfield.instance != Accum)
3023                   continue;
3024                 reg_st0 = regtab;
3025               }
3026             break;
3027
3028           case SReg:
3029             switch (regtab->reg_num)
3030               {
3031               case 0: reg_es = regtab; break;
3032               case 2: reg_ss = regtab; break;
3033               case 3: reg_ds = regtab; break;
3034               }
3035             break;
3036
3037           case RegMask:
3038             if (!regtab->reg_num)
3039               reg_k0 = regtab;
3040             break;
3041           }
3042
3043         if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3044           as_fatal (_("duplicate %s"), regtab->reg_name);
3045       }
3046   }
3047
3048   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3049   {
3050     int c;
3051     char *p;
3052
3053     for (c = 0; c < 256; c++)
3054       {
3055         if (ISDIGIT (c) || ISLOWER (c))
3056           {
3057             mnemonic_chars[c] = c;
3058             register_chars[c] = c;
3059             operand_chars[c] = c;
3060           }
3061         else if (ISUPPER (c))
3062           {
3063             mnemonic_chars[c] = TOLOWER (c);
3064             register_chars[c] = mnemonic_chars[c];
3065             operand_chars[c] = c;
3066           }
3067         else if (c == '{' || c == '}')
3068           {
3069             mnemonic_chars[c] = c;
3070             operand_chars[c] = c;
3071           }
3072 #ifdef SVR4_COMMENT_CHARS
3073         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3074           operand_chars[c] = c;
3075 #endif
3076
3077         if (ISALPHA (c) || ISDIGIT (c))
3078           identifier_chars[c] = c;
3079         else if (c >= 128)
3080           {
3081             identifier_chars[c] = c;
3082             operand_chars[c] = c;
3083           }
3084       }
3085
3086 #ifdef LEX_AT
3087     identifier_chars['@'] = '@';
3088 #endif
3089 #ifdef LEX_QM
3090     identifier_chars['?'] = '?';
3091     operand_chars['?'] = '?';
3092 #endif
3093     mnemonic_chars['_'] = '_';
3094     mnemonic_chars['-'] = '-';
3095     mnemonic_chars['.'] = '.';
3096     identifier_chars['_'] = '_';
3097     identifier_chars['.'] = '.';
3098
3099     for (p = operand_special_chars; *p != '\0'; p++)
3100       operand_chars[(unsigned char) *p] = *p;
3101   }
3102
3103   if (flag_code == CODE_64BIT)
3104     {
3105 #if defined (OBJ_COFF) && defined (TE_PE)
3106       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3107                                   ? 32 : 16);
3108 #else
3109       x86_dwarf2_return_column = 16;
3110 #endif
3111       x86_cie_data_alignment = -8;
3112 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3113       x86_sframe_cfa_sp_reg = 7;
3114       x86_sframe_cfa_fp_reg = 6;
3115 #endif
3116     }
3117   else
3118     {
3119       x86_dwarf2_return_column = 8;
3120       x86_cie_data_alignment = -4;
3121     }
3122
3123   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3124      can be turned into BRANCH_PREFIX frag.  */
3125   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3126     abort ();
3127 }
3128
3129 void
3130 i386_print_statistics (FILE *file)
3131 {
3132   htab_print_statistics (file, "i386 opcode", op_hash);
3133   htab_print_statistics (file, "i386 register", reg_hash);
3134 }
3135
3136 void
3137 i386_md_end (void)
3138 {
3139   htab_delete (op_hash);
3140   htab_delete (reg_hash);
3141 }
3142 \f
3143 #ifdef DEBUG386
3144
3145 /* Debugging routines for md_assemble.  */
3146 static void pte (insn_template *);
3147 static void pt (i386_operand_type);
3148 static void pe (expressionS *);
3149 static void ps (symbolS *);
3150
3151 static void
3152 pi (const char *line, i386_insn *x)
3153 {
3154   unsigned int j;
3155
3156   fprintf (stdout, "%s: template ", line);
3157   pte (&x->tm);
3158   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3159            x->base_reg ? x->base_reg->reg_name : "none",
3160            x->index_reg ? x->index_reg->reg_name : "none",
3161            x->log2_scale_factor);
3162   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3163            x->rm.mode, x->rm.reg, x->rm.regmem);
3164   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3165            x->sib.base, x->sib.index, x->sib.scale);
3166   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3167            (x->rex & REX_W) != 0,
3168            (x->rex & REX_R) != 0,
3169            (x->rex & REX_X) != 0,
3170            (x->rex & REX_B) != 0);
3171   for (j = 0; j < x->operands; j++)
3172     {
3173       fprintf (stdout, "    #%d:  ", j + 1);
3174       pt (x->types[j]);
3175       fprintf (stdout, "\n");
3176       if (x->types[j].bitfield.class == Reg
3177           || x->types[j].bitfield.class == RegMMX
3178           || x->types[j].bitfield.class == RegSIMD
3179           || x->types[j].bitfield.class == RegMask
3180           || x->types[j].bitfield.class == SReg
3181           || x->types[j].bitfield.class == RegCR
3182           || x->types[j].bitfield.class == RegDR
3183           || x->types[j].bitfield.class == RegTR
3184           || x->types[j].bitfield.class == RegBND)
3185         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3186       if (operand_type_check (x->types[j], imm))
3187         pe (x->op[j].imms);
3188       if (operand_type_check (x->types[j], disp))
3189         pe (x->op[j].disps);
3190     }
3191 }
3192
3193 static void
3194 pte (insn_template *t)
3195 {
3196   static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
3197   static const char *const opc_spc[] = {
3198     NULL, "0f", "0f38", "0f3a", NULL, "evexmap5", "evexmap6", NULL,
3199     "XOP08", "XOP09", "XOP0A",
3200   };
3201   unsigned int j;
3202
3203   fprintf (stdout, " %d operands ", t->operands);
3204   if (opc_pfx[t->opcode_modifier.opcodeprefix])
3205     fprintf (stdout, "pfx %x ", opc_pfx[t->opcode_modifier.opcodeprefix]);
3206   if (opc_spc[t->opcode_modifier.opcodespace])
3207     fprintf (stdout, "space %s ", opc_spc[t->opcode_modifier.opcodespace]);
3208   fprintf (stdout, "opcode %x ", t->base_opcode);
3209   if (t->extension_opcode != None)
3210     fprintf (stdout, "ext %x ", t->extension_opcode);
3211   if (t->opcode_modifier.d)
3212     fprintf (stdout, "D");
3213   if (t->opcode_modifier.w)
3214     fprintf (stdout, "W");
3215   fprintf (stdout, "\n");
3216   for (j = 0; j < t->operands; j++)
3217     {
3218       fprintf (stdout, "    #%d type ", j + 1);
3219       pt (t->operand_types[j]);
3220       fprintf (stdout, "\n");
3221     }
3222 }
3223
3224 static void
3225 pe (expressionS *e)
3226 {
3227   fprintf (stdout, "    operation     %d\n", e->X_op);
3228   fprintf (stdout, "    add_number    %" PRId64 " (%" PRIx64 ")\n",
3229            (int64_t) e->X_add_number, (uint64_t) (valueT) e->X_add_number);
3230   if (e->X_add_symbol)
3231     {
3232       fprintf (stdout, "    add_symbol    ");
3233       ps (e->X_add_symbol);
3234       fprintf (stdout, "\n");
3235     }
3236   if (e->X_op_symbol)
3237     {
3238       fprintf (stdout, "    op_symbol    ");
3239       ps (e->X_op_symbol);
3240       fprintf (stdout, "\n");
3241     }
3242 }
3243
3244 static void
3245 ps (symbolS *s)
3246 {
3247   fprintf (stdout, "%s type %s%s",
3248            S_GET_NAME (s),
3249            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3250            segment_name (S_GET_SEGMENT (s)));
3251 }
3252
3253 static struct type_name
3254   {
3255     i386_operand_type mask;
3256     const char *name;
3257   }
3258 const type_names[] =
3259 {
3260   { { .bitfield = { .class = Reg, .byte = 1 } }, "r8" },
3261   { { .bitfield = { .class = Reg, .word = 1 } }, "r16" },
3262   { { .bitfield = { .class = Reg, .dword = 1 } }, "r32" },
3263   { { .bitfield = { .class = Reg, .qword = 1 } }, "r64" },
3264   { { .bitfield = { .instance = Accum, .byte = 1 } }, "acc8" },
3265   { { .bitfield = { .instance = Accum, .word = 1 } }, "acc16" },
3266   { { .bitfield = { .instance = Accum, .dword = 1 } }, "acc32" },
3267   { { .bitfield = { .instance = Accum, .qword = 1 } }, "acc64" },
3268   { { .bitfield = { .imm8 = 1 } }, "i8" },
3269   { { .bitfield = { .imm8s = 1 } }, "i8s" },
3270   { { .bitfield = { .imm16 = 1 } }, "i16" },
3271   { { .bitfield = { .imm32 = 1 } }, "i32" },
3272   { { .bitfield = { .imm32s = 1 } }, "i32s" },
3273   { { .bitfield = { .imm64 = 1 } }, "i64" },
3274   { { .bitfield = { .imm1 = 1 } }, "i1" },
3275   { { .bitfield = { .baseindex = 1 } }, "BaseIndex" },
3276   { { .bitfield = { .disp8 = 1 } }, "d8" },
3277   { { .bitfield = { .disp16 = 1 } }, "d16" },
3278   { { .bitfield = { .disp32 = 1 } }, "d32" },
3279   { { .bitfield = { .disp64 = 1 } }, "d64" },
3280   { { .bitfield = { .instance = RegD, .word = 1 } }, "InOutPortReg" },
3281   { { .bitfield = { .instance = RegC, .byte = 1 } }, "ShiftCount" },
3282   { { .bitfield = { .class = RegCR } }, "control reg" },
3283   { { .bitfield = { .class = RegTR } }, "test reg" },
3284   { { .bitfield = { .class = RegDR } }, "debug reg" },
3285   { { .bitfield = { .class = Reg, .tbyte = 1 } }, "FReg" },
3286   { { .bitfield = { .instance = Accum, .tbyte = 1 } }, "FAcc" },
3287   { { .bitfield = { .class = SReg } }, "SReg" },
3288   { { .bitfield = { .class = RegMMX } }, "rMMX" },
3289   { { .bitfield = { .class = RegSIMD, .xmmword = 1 } }, "rXMM" },
3290   { { .bitfield = { .class = RegSIMD, .ymmword = 1 } }, "rYMM" },
3291   { { .bitfield = { .class = RegSIMD, .zmmword = 1 } }, "rZMM" },
3292   { { .bitfield = { .class = RegSIMD, .tmmword = 1 } }, "rTMM" },
3293   { { .bitfield = { .class = RegMask } }, "Mask reg" },
3294 };
3295
3296 static void
3297 pt (i386_operand_type t)
3298 {
3299   unsigned int j;
3300   i386_operand_type a;
3301
3302   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3303     {
3304       a = operand_type_and (t, type_names[j].mask);
3305       if (operand_type_equal (&a, &type_names[j].mask))
3306         fprintf (stdout, "%s, ",  type_names[j].name);
3307     }
3308   fflush (stdout);
3309 }
3310
3311 #endif /* DEBUG386 */
3312 \f
3313 static bfd_reloc_code_real_type
3314 reloc (unsigned int size,
3315        int pcrel,
3316        int sign,
3317        bfd_reloc_code_real_type other)
3318 {
3319   if (other != NO_RELOC)
3320     {
3321       reloc_howto_type *rel;
3322
3323       if (size == 8)
3324         switch (other)
3325           {
3326           case BFD_RELOC_X86_64_GOT32:
3327             return BFD_RELOC_X86_64_GOT64;
3328             break;
3329           case BFD_RELOC_X86_64_GOTPLT64:
3330             return BFD_RELOC_X86_64_GOTPLT64;
3331             break;
3332           case BFD_RELOC_X86_64_PLTOFF64:
3333             return BFD_RELOC_X86_64_PLTOFF64;
3334             break;
3335           case BFD_RELOC_X86_64_GOTPC32:
3336             other = BFD_RELOC_X86_64_GOTPC64;
3337             break;
3338           case BFD_RELOC_X86_64_GOTPCREL:
3339             other = BFD_RELOC_X86_64_GOTPCREL64;
3340             break;
3341           case BFD_RELOC_X86_64_TPOFF32:
3342             other = BFD_RELOC_X86_64_TPOFF64;
3343             break;
3344           case BFD_RELOC_X86_64_DTPOFF32:
3345             other = BFD_RELOC_X86_64_DTPOFF64;
3346             break;
3347           default:
3348             break;
3349           }
3350
3351 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3352       if (other == BFD_RELOC_SIZE32)
3353         {
3354           if (size == 8)
3355             other = BFD_RELOC_SIZE64;
3356           if (pcrel)
3357             {
3358               as_bad (_("there are no pc-relative size relocations"));
3359               return NO_RELOC;
3360             }
3361         }
3362 #endif
3363
3364       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3365       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3366         sign = -1;
3367
3368       rel = bfd_reloc_type_lookup (stdoutput, other);
3369       if (!rel)
3370         as_bad (_("unknown relocation (%u)"), other);
3371       else if (size != bfd_get_reloc_size (rel))
3372         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3373                 bfd_get_reloc_size (rel),
3374                 size);
3375       else if (pcrel && !rel->pc_relative)
3376         as_bad (_("non-pc-relative relocation for pc-relative field"));
3377       else if ((rel->complain_on_overflow == complain_overflow_signed
3378                 && !sign)
3379                || (rel->complain_on_overflow == complain_overflow_unsigned
3380                    && sign > 0))
3381         as_bad (_("relocated field and relocation type differ in signedness"));
3382       else
3383         return other;
3384       return NO_RELOC;
3385     }
3386
3387   if (pcrel)
3388     {
3389       if (!sign)
3390         as_bad (_("there are no unsigned pc-relative relocations"));
3391       switch (size)
3392         {
3393         case 1: return BFD_RELOC_8_PCREL;
3394         case 2: return BFD_RELOC_16_PCREL;
3395         case 4: return BFD_RELOC_32_PCREL;
3396         case 8: return BFD_RELOC_64_PCREL;
3397         }
3398       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3399     }
3400   else
3401     {
3402       if (sign > 0)
3403         switch (size)
3404           {
3405           case 4: return BFD_RELOC_X86_64_32S;
3406           }
3407       else
3408         switch (size)
3409           {
3410           case 1: return BFD_RELOC_8;
3411           case 2: return BFD_RELOC_16;
3412           case 4: return BFD_RELOC_32;
3413           case 8: return BFD_RELOC_64;
3414           }
3415       as_bad (_("cannot do %s %u byte relocation"),
3416               sign > 0 ? "signed" : "unsigned", size);
3417     }
3418
3419   return NO_RELOC;
3420 }
3421
3422 /* Here we decide which fixups can be adjusted to make them relative to
3423    the beginning of the section instead of the symbol.  Basically we need
3424    to make sure that the dynamic relocations are done correctly, so in
3425    some cases we force the original symbol to be used.  */
3426
3427 int
3428 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3429 {
3430 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3431   if (!IS_ELF)
3432     return 1;
3433
3434   /* Don't adjust pc-relative references to merge sections in 64-bit
3435      mode.  */
3436   if (use_rela_relocations
3437       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3438       && fixP->fx_pcrel)
3439     return 0;
3440
3441   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3442      and changed later by validate_fix.  */
3443   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3444       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3445     return 0;
3446
3447   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3448      for size relocations.  */
3449   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3450       || fixP->fx_r_type == BFD_RELOC_SIZE64
3451       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3452       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3453       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3454       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3455       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3456       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3457       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3458       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3459       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3460       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3461       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3462       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3463       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3464       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3465       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3466       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3467       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3468       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3469       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3470       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3471       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3472       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3473       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3474       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3475       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3476       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3477       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3478       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3479       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3480     return 0;
3481 #endif
3482   return 1;
3483 }
3484
3485 static INLINE bool
3486 want_disp32 (const insn_template *t)
3487 {
3488   return flag_code != CODE_64BIT
3489          || i.prefix[ADDR_PREFIX]
3490          || (t->base_opcode == 0x8d
3491              && t->opcode_modifier.opcodespace == SPACE_BASE
3492              && (!i.types[1].bitfield.qword
3493                 || t->opcode_modifier.size == SIZE32));
3494 }
3495
3496 static int
3497 intel_float_operand (const char *mnemonic)
3498 {
3499   /* Note that the value returned is meaningful only for opcodes with (memory)
3500      operands, hence the code here is free to improperly handle opcodes that
3501      have no operands (for better performance and smaller code). */
3502
3503   if (mnemonic[0] != 'f')
3504     return 0; /* non-math */
3505
3506   switch (mnemonic[1])
3507     {
3508     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3509        the fs segment override prefix not currently handled because no
3510        call path can make opcodes without operands get here */
3511     case 'i':
3512       return 2 /* integer op */;
3513     case 'l':
3514       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3515         return 3; /* fldcw/fldenv */
3516       break;
3517     case 'n':
3518       if (mnemonic[2] != 'o' /* fnop */)
3519         return 3; /* non-waiting control op */
3520       break;
3521     case 'r':
3522       if (mnemonic[2] == 's')
3523         return 3; /* frstor/frstpm */
3524       break;
3525     case 's':
3526       if (mnemonic[2] == 'a')
3527         return 3; /* fsave */
3528       if (mnemonic[2] == 't')
3529         {
3530           switch (mnemonic[3])
3531             {
3532             case 'c': /* fstcw */
3533             case 'd': /* fstdw */
3534             case 'e': /* fstenv */
3535             case 's': /* fsts[gw] */
3536               return 3;
3537             }
3538         }
3539       break;
3540     case 'x':
3541       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3542         return 0; /* fxsave/fxrstor are not really math ops */
3543       break;
3544     }
3545
3546   return 1;
3547 }
3548
3549 static INLINE void
3550 install_template (const insn_template *t)
3551 {
3552   unsigned int l;
3553
3554   i.tm = *t;
3555
3556   /* Note that for pseudo prefixes this produces a length of 1. But for them
3557      the length isn't interesting at all.  */
3558   for (l = 1; l < 4; ++l)
3559     if (!(t->base_opcode >> (8 * l)))
3560       break;
3561
3562   i.opcode_length = l;
3563 }
3564
3565 /* Build the VEX prefix.  */
3566
3567 static void
3568 build_vex_prefix (const insn_template *t)
3569 {
3570   unsigned int register_specifier;
3571   unsigned int vector_length;
3572   unsigned int w;
3573
3574   /* Check register specifier.  */
3575   if (i.vex.register_specifier)
3576     {
3577       register_specifier =
3578         ~register_number (i.vex.register_specifier) & 0xf;
3579       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3580     }
3581   else
3582     register_specifier = 0xf;
3583
3584   /* Use 2-byte VEX prefix by swapping destination and source operand
3585      if there are more than 1 register operand.  */
3586   if (i.reg_operands > 1
3587       && i.vec_encoding != vex_encoding_vex3
3588       && i.dir_encoding == dir_encoding_default
3589       && i.operands == i.reg_operands
3590       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3591       && i.tm.opcode_modifier.opcodespace == SPACE_0F
3592       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3593       && i.rex == REX_B)
3594     {
3595       unsigned int xchg = i.operands - 1;
3596       union i386_op temp_op;
3597       i386_operand_type temp_type;
3598
3599       temp_type = i.types[xchg];
3600       i.types[xchg] = i.types[0];
3601       i.types[0] = temp_type;
3602       temp_op = i.op[xchg];
3603       i.op[xchg] = i.op[0];
3604       i.op[0] = temp_op;
3605
3606       gas_assert (i.rm.mode == 3);
3607
3608       i.rex = REX_R;
3609       xchg = i.rm.regmem;
3610       i.rm.regmem = i.rm.reg;
3611       i.rm.reg = xchg;
3612
3613       if (i.tm.opcode_modifier.d)
3614         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3615                             ? Opcode_ExtD : Opcode_SIMD_IntD;
3616       else /* Use the next insn.  */
3617         install_template (&t[1]);
3618     }
3619
3620   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3621      are no memory operands and at least 3 register ones.  */
3622   if (i.reg_operands >= 3
3623       && i.vec_encoding != vex_encoding_vex3
3624       && i.reg_operands == i.operands - i.imm_operands
3625       && i.tm.opcode_modifier.vex
3626       && i.tm.opcode_modifier.commutative
3627       && (i.tm.opcode_modifier.sse2avx || optimize > 1)
3628       && i.rex == REX_B
3629       && i.vex.register_specifier
3630       && !(i.vex.register_specifier->reg_flags & RegRex))
3631     {
3632       unsigned int xchg = i.operands - i.reg_operands;
3633       union i386_op temp_op;
3634       i386_operand_type temp_type;
3635
3636       gas_assert (i.tm.opcode_modifier.opcodespace == SPACE_0F);
3637       gas_assert (!i.tm.opcode_modifier.sae);
3638       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3639                                       &i.types[i.operands - 3]));
3640       gas_assert (i.rm.mode == 3);
3641
3642       temp_type = i.types[xchg];
3643       i.types[xchg] = i.types[xchg + 1];
3644       i.types[xchg + 1] = temp_type;
3645       temp_op = i.op[xchg];
3646       i.op[xchg] = i.op[xchg + 1];
3647       i.op[xchg + 1] = temp_op;
3648
3649       i.rex = 0;
3650       xchg = i.rm.regmem | 8;
3651       i.rm.regmem = ~register_specifier & 0xf;
3652       gas_assert (!(i.rm.regmem & 8));
3653       i.vex.register_specifier += xchg - i.rm.regmem;
3654       register_specifier = ~xchg & 0xf;
3655     }
3656
3657   if (i.tm.opcode_modifier.vex == VEXScalar)
3658     vector_length = avxscalar;
3659   else if (i.tm.opcode_modifier.vex == VEX256)
3660     vector_length = 1;
3661   else
3662     {
3663       unsigned int op;
3664
3665       /* Determine vector length from the last multi-length vector
3666          operand.  */
3667       vector_length = 0;
3668       for (op = t->operands; op--;)
3669         if (t->operand_types[op].bitfield.xmmword
3670             && t->operand_types[op].bitfield.ymmword
3671             && i.types[op].bitfield.ymmword)
3672           {
3673             vector_length = 1;
3674             break;
3675           }
3676     }
3677
3678   /* Check the REX.W bit and VEXW.  */
3679   if (i.tm.opcode_modifier.vexw == VEXWIG)
3680     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3681   else if (i.tm.opcode_modifier.vexw)
3682     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3683   else
3684     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3685
3686   /* Use 2-byte VEX prefix if possible.  */
3687   if (w == 0
3688       && i.vec_encoding != vex_encoding_vex3
3689       && i.tm.opcode_modifier.opcodespace == SPACE_0F
3690       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3691     {
3692       /* 2-byte VEX prefix.  */
3693       unsigned int r;
3694
3695       i.vex.length = 2;
3696       i.vex.bytes[0] = 0xc5;
3697
3698       /* Check the REX.R bit.  */
3699       r = (i.rex & REX_R) ? 0 : 1;
3700       i.vex.bytes[1] = (r << 7
3701                         | register_specifier << 3
3702                         | vector_length << 2
3703                         | i.tm.opcode_modifier.opcodeprefix);
3704     }
3705   else
3706     {
3707       /* 3-byte VEX prefix.  */
3708       i.vex.length = 3;
3709
3710       switch (i.tm.opcode_modifier.opcodespace)
3711         {
3712         case SPACE_0F:
3713         case SPACE_0F38:
3714         case SPACE_0F3A:
3715           i.vex.bytes[0] = 0xc4;
3716           break;
3717         case SPACE_XOP08:
3718         case SPACE_XOP09:
3719         case SPACE_XOP0A:
3720           i.vex.bytes[0] = 0x8f;
3721           break;
3722         default:
3723           abort ();
3724         }
3725
3726       /* The high 3 bits of the second VEX byte are 1's compliment
3727          of RXB bits from REX.  */
3728       i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace;
3729
3730       i.vex.bytes[2] = (w << 7
3731                         | register_specifier << 3
3732                         | vector_length << 2
3733                         | i.tm.opcode_modifier.opcodeprefix);
3734     }
3735 }
3736
3737 static INLINE bool
3738 is_evex_encoding (const insn_template *t)
3739 {
3740   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3741          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3742          || t->opcode_modifier.sae;
3743 }
3744
3745 static INLINE bool
3746 is_any_vex_encoding (const insn_template *t)
3747 {
3748   return t->opcode_modifier.vex || is_evex_encoding (t);
3749 }
3750
3751 static unsigned int
3752 get_broadcast_bytes (const insn_template *t, bool diag)
3753 {
3754   unsigned int op, bytes;
3755   const i386_operand_type *types;
3756
3757   if (i.broadcast.type)
3758     return i.broadcast.bytes = ((1 << (t->opcode_modifier.broadcast - 1))
3759                                 * i.broadcast.type);
3760
3761   gas_assert (intel_syntax);
3762
3763   for (op = 0; op < t->operands; ++op)
3764     if (t->operand_types[op].bitfield.baseindex)
3765       break;
3766
3767   gas_assert (op < t->operands);
3768
3769   if (t->opcode_modifier.evex
3770       && t->opcode_modifier.evex != EVEXDYN)
3771     switch (i.broadcast.bytes)
3772       {
3773       case 1:
3774         if (t->operand_types[op].bitfield.word)
3775           return 2;
3776       /* Fall through.  */
3777       case 2:
3778         if (t->operand_types[op].bitfield.dword)
3779           return 4;
3780       /* Fall through.  */
3781       case 4:
3782         if (t->operand_types[op].bitfield.qword)
3783           return 8;
3784       /* Fall through.  */
3785       case 8:
3786         if (t->operand_types[op].bitfield.xmmword)
3787           return 16;
3788         if (t->operand_types[op].bitfield.ymmword)
3789           return 32;
3790         if (t->operand_types[op].bitfield.zmmword)
3791           return 64;
3792       /* Fall through.  */
3793       default:
3794         abort ();
3795       }
3796
3797   gas_assert (op + 1 < t->operands);
3798
3799   if (t->operand_types[op + 1].bitfield.xmmword
3800       + t->operand_types[op + 1].bitfield.ymmword
3801       + t->operand_types[op + 1].bitfield.zmmword > 1)
3802     {
3803       types = &i.types[op + 1];
3804       diag = false;
3805     }
3806   else /* Ambiguous - guess with a preference to non-AVX512VL forms.  */
3807     types = &t->operand_types[op];
3808
3809   if (types->bitfield.zmmword)
3810     bytes = 64;
3811   else if (types->bitfield.ymmword)
3812     bytes = 32;
3813   else
3814     bytes = 16;
3815
3816   if (diag)
3817     as_warn (_("ambiguous broadcast for `%s', using %u-bit form"),
3818              insn_name (t), bytes * 8);
3819
3820   return bytes;
3821 }
3822
3823 /* Build the EVEX prefix.  */
3824
3825 static void
3826 build_evex_prefix (void)
3827 {
3828   unsigned int register_specifier, w;
3829   rex_byte vrex_used = 0;
3830
3831   /* Check register specifier.  */
3832   if (i.vex.register_specifier)
3833     {
3834       gas_assert ((i.vrex & REX_X) == 0);
3835
3836       register_specifier = i.vex.register_specifier->reg_num;
3837       if ((i.vex.register_specifier->reg_flags & RegRex))
3838         register_specifier += 8;
3839       /* The upper 16 registers are encoded in the fourth byte of the
3840          EVEX prefix.  */
3841       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3842         i.vex.bytes[3] = 0x8;
3843       register_specifier = ~register_specifier & 0xf;
3844     }
3845   else
3846     {
3847       register_specifier = 0xf;
3848
3849       /* Encode upper 16 vector index register in the fourth byte of
3850          the EVEX prefix.  */
3851       if (!(i.vrex & REX_X))
3852         i.vex.bytes[3] = 0x8;
3853       else
3854         vrex_used |= REX_X;
3855     }
3856
3857   /* 4 byte EVEX prefix.  */
3858   i.vex.length = 4;
3859   i.vex.bytes[0] = 0x62;
3860
3861   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3862      bits from REX.  */
3863   gas_assert (i.tm.opcode_modifier.opcodespace >= SPACE_0F);
3864   gas_assert (i.tm.opcode_modifier.opcodespace <= SPACE_EVEXMAP6);
3865   i.vex.bytes[1] = (~i.rex & 0x7) << 5 | i.tm.opcode_modifier.opcodespace;
3866
3867   /* The fifth bit of the second EVEX byte is 1's compliment of the
3868      REX_R bit in VREX.  */
3869   if (!(i.vrex & REX_R))
3870     i.vex.bytes[1] |= 0x10;
3871   else
3872     vrex_used |= REX_R;
3873
3874   if ((i.reg_operands + i.imm_operands) == i.operands)
3875     {
3876       /* When all operands are registers, the REX_X bit in REX is not
3877          used.  We reuse it to encode the upper 16 registers, which is
3878          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3879          as 1's compliment.  */
3880       if ((i.vrex & REX_B))
3881         {
3882           vrex_used |= REX_B;
3883           i.vex.bytes[1] &= ~0x40;
3884         }
3885     }
3886
3887   /* EVEX instructions shouldn't need the REX prefix.  */
3888   i.vrex &= ~vrex_used;
3889   gas_assert (i.vrex == 0);
3890
3891   /* Check the REX.W bit and VEXW.  */
3892   if (i.tm.opcode_modifier.vexw == VEXWIG)
3893     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
3894   else if (i.tm.opcode_modifier.vexw)
3895     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3896   else
3897     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
3898
3899   /* The third byte of the EVEX prefix.  */
3900   i.vex.bytes[2] = ((w << 7)
3901                     | (register_specifier << 3)
3902                     | 4 /* Encode the U bit.  */
3903                     | i.tm.opcode_modifier.opcodeprefix);
3904
3905   /* The fourth byte of the EVEX prefix.  */
3906   /* The zeroing-masking bit.  */
3907   if (i.mask.reg && i.mask.zeroing)
3908     i.vex.bytes[3] |= 0x80;
3909
3910   /* Don't always set the broadcast bit if there is no RC.  */
3911   if (i.rounding.type == rc_none)
3912     {
3913       /* Encode the vector length.  */
3914       unsigned int vec_length;
3915
3916       if (!i.tm.opcode_modifier.evex
3917           || i.tm.opcode_modifier.evex == EVEXDYN)
3918         {
3919           unsigned int op;
3920
3921           /* Determine vector length from the last multi-length vector
3922              operand.  */
3923           for (op = i.operands; op--;)
3924             if (i.tm.operand_types[op].bitfield.xmmword
3925                 + i.tm.operand_types[op].bitfield.ymmword
3926                 + i.tm.operand_types[op].bitfield.zmmword > 1)
3927               {
3928                 if (i.types[op].bitfield.zmmword)
3929                   {
3930                     i.tm.opcode_modifier.evex = EVEX512;
3931                     break;
3932                   }
3933                 else if (i.types[op].bitfield.ymmword)
3934                   {
3935                     i.tm.opcode_modifier.evex = EVEX256;
3936                     break;
3937                   }
3938                 else if (i.types[op].bitfield.xmmword)
3939                   {
3940                     i.tm.opcode_modifier.evex = EVEX128;
3941                     break;
3942                   }
3943                 else if (i.broadcast.bytes && op == i.broadcast.operand)
3944                   {
3945                     switch (get_broadcast_bytes (&i.tm, true))
3946                       {
3947                         case 64:
3948                           i.tm.opcode_modifier.evex = EVEX512;
3949                           break;
3950                         case 32:
3951                           i.tm.opcode_modifier.evex = EVEX256;
3952                           break;
3953                         case 16:
3954                           i.tm.opcode_modifier.evex = EVEX128;
3955                           break;
3956                         default:
3957                           abort ();
3958                       }
3959                     break;
3960                   }
3961               }
3962
3963           if (op >= MAX_OPERANDS)
3964             abort ();
3965         }
3966
3967       switch (i.tm.opcode_modifier.evex)
3968         {
3969         case EVEXLIG: /* LL' is ignored */
3970           vec_length = evexlig << 5;
3971           break;
3972         case EVEX128:
3973           vec_length = 0 << 5;
3974           break;
3975         case EVEX256:
3976           vec_length = 1 << 5;
3977           break;
3978         case EVEX512:
3979           vec_length = 2 << 5;
3980           break;
3981         default:
3982           abort ();
3983           break;
3984         }
3985       i.vex.bytes[3] |= vec_length;
3986       /* Encode the broadcast bit.  */
3987       if (i.broadcast.bytes)
3988         i.vex.bytes[3] |= 0x10;
3989     }
3990   else if (i.rounding.type != saeonly)
3991     i.vex.bytes[3] |= 0x10 | (i.rounding.type << 5);
3992   else
3993     i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
3994
3995   if (i.mask.reg)
3996     i.vex.bytes[3] |= i.mask.reg->reg_num;
3997 }
3998
3999 static void
4000 process_immext (void)
4001 {
4002   expressionS *exp;
4003
4004   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
4005      which is coded in the same place as an 8-bit immediate field
4006      would be.  Here we fake an 8-bit immediate operand from the
4007      opcode suffix stored in tm.extension_opcode.
4008
4009      AVX instructions also use this encoding, for some of
4010      3 argument instructions.  */
4011
4012   gas_assert (i.imm_operands <= 1
4013               && (i.operands <= 2
4014                   || (is_any_vex_encoding (&i.tm)
4015                       && i.operands <= 4)));
4016
4017   exp = &im_expressions[i.imm_operands++];
4018   i.op[i.operands].imms = exp;
4019   i.types[i.operands].bitfield.imm8 = 1;
4020   i.operands++;
4021   exp->X_op = O_constant;
4022   exp->X_add_number = i.tm.extension_opcode;
4023   i.tm.extension_opcode = None;
4024 }
4025
4026
4027 static int
4028 check_hle (void)
4029 {
4030   switch (i.tm.opcode_modifier.prefixok)
4031     {
4032     default:
4033       abort ();
4034     case PrefixLock:
4035     case PrefixNone:
4036     case PrefixNoTrack:
4037     case PrefixRep:
4038       as_bad (_("invalid instruction `%s' after `%s'"),
4039               insn_name (&i.tm), i.hle_prefix);
4040       return 0;
4041     case PrefixHLELock:
4042       if (i.prefix[LOCK_PREFIX])
4043         return 1;
4044       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4045       return 0;
4046     case PrefixHLEAny:
4047       return 1;
4048     case PrefixHLERelease:
4049       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4050         {
4051           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4052                   insn_name (&i.tm));
4053           return 0;
4054         }
4055       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4056         {
4057           as_bad (_("memory destination needed for instruction `%s'"
4058                     " after `xrelease'"), insn_name (&i.tm));
4059           return 0;
4060         }
4061       return 1;
4062     }
4063 }
4064
4065 /* Encode aligned vector move as unaligned vector move.  */
4066
4067 static void
4068 encode_with_unaligned_vector_move (void)
4069 {
4070   switch (i.tm.base_opcode)
4071     {
4072     case 0x28:  /* Load instructions.  */
4073     case 0x29:  /* Store instructions.  */
4074       /* movaps/movapd/vmovaps/vmovapd.  */
4075       if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4076           && i.tm.opcode_modifier.opcodeprefix <= PREFIX_0X66)
4077         i.tm.base_opcode = 0x10 | (i.tm.base_opcode & 1);
4078       break;
4079     case 0x6f:  /* Load instructions.  */
4080     case 0x7f:  /* Store instructions.  */
4081       /* movdqa/vmovdqa/vmovdqa64/vmovdqa32. */
4082       if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4083           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66)
4084         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4085       break;
4086     default:
4087       break;
4088     }
4089 }
4090
4091 /* Try the shortest encoding by shortening operand size.  */
4092
4093 static void
4094 optimize_encoding (void)
4095 {
4096   unsigned int j;
4097
4098   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
4099       && i.tm.base_opcode == 0x8d)
4100     {
4101       /* Optimize: -O:
4102            lea symbol, %rN    -> mov $symbol, %rN
4103            lea (%rM), %rN     -> mov %rM, %rN
4104            lea (,%rM,1), %rN  -> mov %rM, %rN
4105
4106            and in 32-bit mode for 16-bit addressing
4107
4108            lea (%rM), %rN     -> movzx %rM, %rN
4109
4110            and in 64-bit mode zap 32-bit addressing in favor of using a
4111            32-bit (or less) destination.
4112        */
4113       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4114         {
4115           if (!i.op[1].regs->reg_type.bitfield.word)
4116             i.tm.opcode_modifier.size = SIZE32;
4117           i.prefix[ADDR_PREFIX] = 0;
4118         }
4119
4120       if (!i.index_reg && !i.base_reg)
4121         {
4122           /* Handle:
4123                lea symbol, %rN    -> mov $symbol, %rN
4124            */
4125           if (flag_code == CODE_64BIT)
4126             {
4127               /* Don't transform a relocation to a 16-bit one.  */
4128               if (i.op[0].disps
4129                   && i.op[0].disps->X_op != O_constant
4130                   && i.op[1].regs->reg_type.bitfield.word)
4131                 return;
4132
4133               if (!i.op[1].regs->reg_type.bitfield.qword
4134                   || i.tm.opcode_modifier.size == SIZE32)
4135                 {
4136                   i.tm.base_opcode = 0xb8;
4137                   i.tm.opcode_modifier.modrm = 0;
4138                   if (!i.op[1].regs->reg_type.bitfield.word)
4139                     i.types[0].bitfield.imm32 = 1;
4140                   else
4141                     {
4142                       i.tm.opcode_modifier.size = SIZE16;
4143                       i.types[0].bitfield.imm16 = 1;
4144                     }
4145                 }
4146               else
4147                 {
4148                   /* Subject to further optimization below.  */
4149                   i.tm.base_opcode = 0xc7;
4150                   i.tm.extension_opcode = 0;
4151                   i.types[0].bitfield.imm32s = 1;
4152                   i.types[0].bitfield.baseindex = 0;
4153                 }
4154             }
4155           /* Outside of 64-bit mode address and operand sizes have to match if
4156              a relocation is involved, as otherwise we wouldn't (currently) or
4157              even couldn't express the relocation correctly.  */
4158           else if (i.op[0].disps
4159                    && i.op[0].disps->X_op != O_constant
4160                    && ((!i.prefix[ADDR_PREFIX])
4161                        != (flag_code == CODE_32BIT
4162                            ? i.op[1].regs->reg_type.bitfield.dword
4163                            : i.op[1].regs->reg_type.bitfield.word)))
4164             return;
4165           /* In 16-bit mode converting LEA with 16-bit addressing and a 32-bit
4166              destination is going to grow encoding size.  */
4167           else if (flag_code == CODE_16BIT
4168                    && (optimize <= 1 || optimize_for_space)
4169                    && !i.prefix[ADDR_PREFIX]
4170                    && i.op[1].regs->reg_type.bitfield.dword)
4171             return;
4172           else
4173             {
4174               i.tm.base_opcode = 0xb8;
4175               i.tm.opcode_modifier.modrm = 0;
4176               if (i.op[1].regs->reg_type.bitfield.dword)
4177                 i.types[0].bitfield.imm32 = 1;
4178               else
4179                 i.types[0].bitfield.imm16 = 1;
4180
4181               if (i.op[0].disps
4182                   && i.op[0].disps->X_op == O_constant
4183                   && i.op[1].regs->reg_type.bitfield.dword
4184                   /* NB: Add () to !i.prefix[ADDR_PREFIX] to silence
4185                      GCC 5. */
4186                   && (!i.prefix[ADDR_PREFIX]) != (flag_code == CODE_32BIT))
4187                 i.op[0].disps->X_add_number &= 0xffff;
4188             }
4189
4190           i.tm.operand_types[0] = i.types[0];
4191           i.imm_operands = 1;
4192           if (!i.op[0].imms)
4193             {
4194               i.op[0].imms = &im_expressions[0];
4195               i.op[0].imms->X_op = O_absent;
4196             }
4197         }
4198       else if (i.op[0].disps
4199                   && (i.op[0].disps->X_op != O_constant
4200                       || i.op[0].disps->X_add_number))
4201         return;
4202       else
4203         {
4204           /* Handle:
4205                lea (%rM), %rN     -> mov %rM, %rN
4206                lea (,%rM,1), %rN  -> mov %rM, %rN
4207                lea (%rM), %rN     -> movzx %rM, %rN
4208            */
4209           const reg_entry *addr_reg;
4210
4211           if (!i.index_reg && i.base_reg->reg_num != RegIP)
4212             addr_reg = i.base_reg;
4213           else if (!i.base_reg
4214                    && i.index_reg->reg_num != RegIZ
4215                    && !i.log2_scale_factor)
4216             addr_reg = i.index_reg;
4217           else
4218             return;
4219
4220           if (addr_reg->reg_type.bitfield.word
4221               && i.op[1].regs->reg_type.bitfield.dword)
4222             {
4223               if (flag_code != CODE_32BIT)
4224                 return;
4225               i.tm.opcode_modifier.opcodespace = SPACE_0F;
4226               i.tm.base_opcode = 0xb7;
4227             }
4228           else
4229             i.tm.base_opcode = 0x8b;
4230
4231           if (addr_reg->reg_type.bitfield.dword
4232               && i.op[1].regs->reg_type.bitfield.qword)
4233             i.tm.opcode_modifier.size = SIZE32;
4234
4235           i.op[0].regs = addr_reg;
4236           i.reg_operands = 2;
4237         }
4238
4239       i.mem_operands = 0;
4240       i.disp_operands = 0;
4241       i.prefix[ADDR_PREFIX] = 0;
4242       i.prefix[SEG_PREFIX] = 0;
4243       i.seg[0] = NULL;
4244     }
4245
4246   if (optimize_for_space
4247       && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4248       && i.reg_operands == 1
4249       && i.imm_operands == 1
4250       && !i.types[1].bitfield.byte
4251       && i.op[0].imms->X_op == O_constant
4252       && fits_in_imm7 (i.op[0].imms->X_add_number)
4253       && (i.tm.base_opcode == 0xa8
4254           || (i.tm.base_opcode == 0xf6
4255               && i.tm.extension_opcode == 0x0)))
4256     {
4257       /* Optimize: -Os:
4258            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4259        */
4260       unsigned int base_regnum = i.op[1].regs->reg_num;
4261       if (flag_code == CODE_64BIT || base_regnum < 4)
4262         {
4263           i.types[1].bitfield.byte = 1;
4264           /* Ignore the suffix.  */
4265           i.suffix = 0;
4266           /* Convert to byte registers.  */
4267           if (i.types[1].bitfield.word)
4268             j = 16;
4269           else if (i.types[1].bitfield.dword)
4270             j = 32;
4271           else
4272             j = 48;
4273           if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4274             j += 8;
4275           i.op[1].regs -= j;
4276         }
4277     }
4278   else if (flag_code == CODE_64BIT
4279            && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4280            && ((i.types[1].bitfield.qword
4281                 && i.reg_operands == 1
4282                 && i.imm_operands == 1
4283                 && i.op[0].imms->X_op == O_constant
4284                 && ((i.tm.base_opcode == 0xb8
4285                      && i.tm.extension_opcode == None
4286                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4287                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4288                         && ((i.tm.base_opcode == 0x24
4289                              || i.tm.base_opcode == 0xa8)
4290                             || (i.tm.base_opcode == 0x80
4291                                 && i.tm.extension_opcode == 0x4)
4292                             || ((i.tm.base_opcode == 0xf6
4293                                  || (i.tm.base_opcode | 1) == 0xc7)
4294                                 && i.tm.extension_opcode == 0x0)))
4295                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4296                         && i.tm.base_opcode == 0x83
4297                         && i.tm.extension_opcode == 0x4)))
4298                || (i.types[0].bitfield.qword
4299                    && ((i.reg_operands == 2
4300                         && i.op[0].regs == i.op[1].regs
4301                         && (i.tm.base_opcode == 0x30
4302                             || i.tm.base_opcode == 0x28))
4303                        || (i.reg_operands == 1
4304                            && i.operands == 1
4305                            && i.tm.base_opcode == 0x30)))))
4306     {
4307       /* Optimize: -O:
4308            andq $imm31, %r64   -> andl $imm31, %r32
4309            andq $imm7, %r64    -> andl $imm7, %r32
4310            testq $imm31, %r64  -> testl $imm31, %r32
4311            xorq %r64, %r64     -> xorl %r32, %r32
4312            subq %r64, %r64     -> subl %r32, %r32
4313            movq $imm31, %r64   -> movl $imm31, %r32
4314            movq $imm32, %r64   -> movl $imm32, %r32
4315         */
4316       i.tm.opcode_modifier.size = SIZE32;
4317       if (i.imm_operands)
4318         {
4319           i.types[0].bitfield.imm32 = 1;
4320           i.types[0].bitfield.imm32s = 0;
4321           i.types[0].bitfield.imm64 = 0;
4322         }
4323       else
4324         {
4325           i.types[0].bitfield.dword = 1;
4326           i.types[0].bitfield.qword = 0;
4327         }
4328       i.types[1].bitfield.dword = 1;
4329       i.types[1].bitfield.qword = 0;
4330       if (i.tm.base_opcode == 0xb8 || (i.tm.base_opcode | 1) == 0xc7)
4331         {
4332           /* Handle
4333                movq $imm31, %r64   -> movl $imm31, %r32
4334                movq $imm32, %r64   -> movl $imm32, %r32
4335            */
4336           i.tm.operand_types[0].bitfield.imm32 = 1;
4337           i.tm.operand_types[0].bitfield.imm32s = 0;
4338           i.tm.operand_types[0].bitfield.imm64 = 0;
4339           if ((i.tm.base_opcode | 1) == 0xc7)
4340             {
4341               /* Handle
4342                    movq $imm31, %r64   -> movl $imm31, %r32
4343                */
4344               i.tm.base_opcode = 0xb8;
4345               i.tm.extension_opcode = None;
4346               i.tm.opcode_modifier.w = 0;
4347               i.tm.opcode_modifier.modrm = 0;
4348             }
4349         }
4350     }
4351   else if (optimize > 1
4352            && !optimize_for_space
4353            && i.tm.opcode_modifier.opcodespace == SPACE_BASE
4354            && i.reg_operands == 2
4355            && i.op[0].regs == i.op[1].regs
4356            && ((i.tm.base_opcode & ~(Opcode_D | 1)) == 0x8
4357                || (i.tm.base_opcode & ~(Opcode_D | 1)) == 0x20)
4358            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4359     {
4360       /* Optimize: -O2:
4361            andb %rN, %rN  -> testb %rN, %rN
4362            andw %rN, %rN  -> testw %rN, %rN
4363            andq %rN, %rN  -> testq %rN, %rN
4364            orb %rN, %rN   -> testb %rN, %rN
4365            orw %rN, %rN   -> testw %rN, %rN
4366            orq %rN, %rN   -> testq %rN, %rN
4367
4368            and outside of 64-bit mode
4369
4370            andl %rN, %rN  -> testl %rN, %rN
4371            orl %rN, %rN   -> testl %rN, %rN
4372        */
4373       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4374     }
4375   else if (i.reg_operands == 3
4376            && i.op[0].regs == i.op[1].regs
4377            && !i.types[2].bitfield.xmmword
4378            && (i.tm.opcode_modifier.vex
4379                || ((!i.mask.reg || i.mask.zeroing)
4380                    && is_evex_encoding (&i.tm)
4381                    && (i.vec_encoding != vex_encoding_evex
4382                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4383                        || i.tm.cpu_flags.bitfield.cpuavx512vl
4384                        || (i.tm.operand_types[2].bitfield.zmmword
4385                            && i.types[2].bitfield.ymmword))))
4386            && i.tm.opcode_modifier.opcodespace == SPACE_0F
4387            && ((i.tm.base_opcode | 2) == 0x57
4388                || i.tm.base_opcode == 0xdf
4389                || i.tm.base_opcode == 0xef
4390                || (i.tm.base_opcode | 3) == 0xfb
4391                || i.tm.base_opcode == 0x42
4392                || i.tm.base_opcode == 0x47))
4393     {
4394       /* Optimize: -O1:
4395            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4396            vpsubq and vpsubw:
4397              EVEX VOP %zmmM, %zmmM, %zmmN
4398                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4399                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4400              EVEX VOP %ymmM, %ymmM, %ymmN
4401                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4402                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4403              VEX VOP %ymmM, %ymmM, %ymmN
4404                -> VEX VOP %xmmM, %xmmM, %xmmN
4405            VOP, one of vpandn and vpxor:
4406              VEX VOP %ymmM, %ymmM, %ymmN
4407                -> VEX VOP %xmmM, %xmmM, %xmmN
4408            VOP, one of vpandnd and vpandnq:
4409              EVEX VOP %zmmM, %zmmM, %zmmN
4410                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4411                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4412              EVEX VOP %ymmM, %ymmM, %ymmN
4413                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4414                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4415            VOP, one of vpxord and vpxorq:
4416              EVEX VOP %zmmM, %zmmM, %zmmN
4417                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4418                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4419              EVEX VOP %ymmM, %ymmM, %ymmN
4420                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4421                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4422            VOP, one of kxord and kxorq:
4423              VEX VOP %kM, %kM, %kN
4424                -> VEX kxorw %kM, %kM, %kN
4425            VOP, one of kandnd and kandnq:
4426              VEX VOP %kM, %kM, %kN
4427                -> VEX kandnw %kM, %kM, %kN
4428        */
4429       if (is_evex_encoding (&i.tm))
4430         {
4431           if (i.vec_encoding != vex_encoding_evex)
4432             {
4433               i.tm.opcode_modifier.vex = VEX128;
4434               i.tm.opcode_modifier.vexw = VEXW0;
4435               i.tm.opcode_modifier.evex = 0;
4436             }
4437           else if (optimize > 1)
4438             i.tm.opcode_modifier.evex = EVEX128;
4439           else
4440             return;
4441         }
4442       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4443         {
4444           i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
4445           i.tm.opcode_modifier.vexw = VEXW0;
4446         }
4447       else
4448         i.tm.opcode_modifier.vex = VEX128;
4449
4450       if (i.tm.opcode_modifier.vex)
4451         for (j = 0; j < 3; j++)
4452           {
4453             i.types[j].bitfield.xmmword = 1;
4454             i.types[j].bitfield.ymmword = 0;
4455           }
4456     }
4457   else if (i.vec_encoding != vex_encoding_evex
4458            && !i.types[0].bitfield.zmmword
4459            && !i.types[1].bitfield.zmmword
4460            && !i.mask.reg
4461            && !i.broadcast.bytes
4462            && is_evex_encoding (&i.tm)
4463            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4464                || (i.tm.base_opcode & ~4) == 0xdb
4465                || (i.tm.base_opcode & ~4) == 0xeb)
4466            && i.tm.extension_opcode == None)
4467     {
4468       /* Optimize: -O1:
4469            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4470            vmovdqu32 and vmovdqu64:
4471              EVEX VOP %xmmM, %xmmN
4472                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4473              EVEX VOP %ymmM, %ymmN
4474                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4475              EVEX VOP %xmmM, mem
4476                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4477              EVEX VOP %ymmM, mem
4478                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4479              EVEX VOP mem, %xmmN
4480                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4481              EVEX VOP mem, %ymmN
4482                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4483            VOP, one of vpand, vpandn, vpor, vpxor:
4484              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4485                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4486              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4487                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4488              EVEX VOP{d,q} mem, %xmmM, %xmmN
4489                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4490              EVEX VOP{d,q} mem, %ymmM, %ymmN
4491                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4492        */
4493       for (j = 0; j < i.operands; j++)
4494         if (operand_type_check (i.types[j], disp)
4495             && i.op[j].disps->X_op == O_constant)
4496           {
4497             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4498                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4499                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4500             int evex_disp8, vex_disp8;
4501             unsigned int memshift = i.memshift;
4502             offsetT n = i.op[j].disps->X_add_number;
4503
4504             evex_disp8 = fits_in_disp8 (n);
4505             i.memshift = 0;
4506             vex_disp8 = fits_in_disp8 (n);
4507             if (evex_disp8 != vex_disp8)
4508               {
4509                 i.memshift = memshift;
4510                 return;
4511               }
4512
4513             i.types[j].bitfield.disp8 = vex_disp8;
4514             break;
4515           }
4516       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4517           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
4518         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4519       i.tm.opcode_modifier.vex
4520         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4521       i.tm.opcode_modifier.vexw = VEXW0;
4522       /* VPAND, VPOR, and VPXOR are commutative.  */
4523       if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
4524         i.tm.opcode_modifier.commutative = 1;
4525       i.tm.opcode_modifier.evex = 0;
4526       i.tm.opcode_modifier.masking = 0;
4527       i.tm.opcode_modifier.broadcast = 0;
4528       i.tm.opcode_modifier.disp8memshift = 0;
4529       i.memshift = 0;
4530       if (j < i.operands)
4531         i.types[j].bitfield.disp8
4532           = fits_in_disp8 (i.op[j].disps->X_add_number);
4533     }
4534 }
4535
4536 /* Return non-zero for load instruction.  */
4537
4538 static int
4539 load_insn_p (void)
4540 {
4541   unsigned int dest;
4542   int any_vex_p = is_any_vex_encoding (&i.tm);
4543   unsigned int base_opcode = i.tm.base_opcode | 1;
4544
4545   if (!any_vex_p)
4546     {
4547       /* Anysize insns: lea, invlpg, clflush, prefetch*, bndmk, bndcl, bndcu,
4548          bndcn, bndstx, bndldx, clflushopt, clwb, cldemote.  */
4549       if (i.tm.opcode_modifier.operandconstraint == ANY_SIZE)
4550         return 0;
4551
4552       /* pop.   */
4553       if (i.tm.mnem_off == MN_pop)
4554         return 1;
4555     }
4556
4557   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE)
4558     {
4559       /* popf, popa.   */
4560       if (i.tm.base_opcode == 0x9d
4561           || i.tm.base_opcode == 0x61)
4562         return 1;
4563
4564       /* movs, cmps, lods, scas.  */
4565       if ((i.tm.base_opcode | 0xb) == 0xaf)
4566         return 1;
4567
4568       /* outs, xlatb.  */
4569       if (base_opcode == 0x6f
4570           || i.tm.base_opcode == 0xd7)
4571         return 1;
4572       /* NB: For AMD-specific insns with implicit memory operands,
4573          they're intentionally not covered.  */
4574     }
4575
4576   /* No memory operand.  */
4577   if (!i.mem_operands)
4578     return 0;
4579
4580   if (any_vex_p)
4581     {
4582       /* vldmxcsr.  */
4583       if (i.tm.base_opcode == 0xae
4584           && i.tm.opcode_modifier.vex
4585           && i.tm.opcode_modifier.opcodespace == SPACE_0F
4586           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4587           && i.tm.extension_opcode == 2)
4588         return 1;
4589     }
4590   else if (i.tm.opcode_modifier.opcodespace == SPACE_BASE)
4591     {
4592       /* test, not, neg, mul, imul, div, idiv.  */
4593       if ((i.tm.base_opcode == 0xf6 || i.tm.base_opcode == 0xf7)
4594           && i.tm.extension_opcode != 1)
4595         return 1;
4596
4597       /* inc, dec.  */
4598       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
4599         return 1;
4600
4601       /* add, or, adc, sbb, and, sub, xor, cmp.  */
4602       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
4603         return 1;
4604
4605       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
4606       if ((base_opcode == 0xc1
4607            || (i.tm.base_opcode >= 0xd0 && i.tm.base_opcode <= 0xd3))
4608           && i.tm.extension_opcode != 6)
4609         return 1;
4610
4611       /* Check for x87 instructions.  */
4612       if (base_opcode >= 0xd8 && base_opcode <= 0xdf)
4613         {
4614           /* Skip fst, fstp, fstenv, fstcw.  */
4615           if (i.tm.base_opcode == 0xd9
4616               && (i.tm.extension_opcode == 2
4617                   || i.tm.extension_opcode == 3
4618                   || i.tm.extension_opcode == 6
4619                   || i.tm.extension_opcode == 7))
4620             return 0;
4621
4622           /* Skip fisttp, fist, fistp, fstp.  */
4623           if (i.tm.base_opcode == 0xdb
4624               && (i.tm.extension_opcode == 1
4625                   || i.tm.extension_opcode == 2
4626                   || i.tm.extension_opcode == 3
4627                   || i.tm.extension_opcode == 7))
4628             return 0;
4629
4630           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
4631           if (i.tm.base_opcode == 0xdd
4632               && (i.tm.extension_opcode == 1
4633                   || i.tm.extension_opcode == 2
4634                   || i.tm.extension_opcode == 3
4635                   || i.tm.extension_opcode == 6
4636                   || i.tm.extension_opcode == 7))
4637             return 0;
4638
4639           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
4640           if (i.tm.base_opcode == 0xdf
4641               && (i.tm.extension_opcode == 1
4642                   || i.tm.extension_opcode == 2
4643                   || i.tm.extension_opcode == 3
4644                   || i.tm.extension_opcode == 6
4645                   || i.tm.extension_opcode == 7))
4646             return 0;
4647
4648           return 1;
4649         }
4650     }
4651   else if (i.tm.opcode_modifier.opcodespace == SPACE_0F)
4652     {
4653       /* bt, bts, btr, btc.  */
4654       if (i.tm.base_opcode == 0xba
4655           && (i.tm.extension_opcode >= 4 && i.tm.extension_opcode <= 7))
4656         return 1;
4657
4658       /* cmpxchg8b, cmpxchg16b, xrstors, vmptrld.  */
4659       if (i.tm.base_opcode == 0xc7
4660           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4661           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3
4662               || i.tm.extension_opcode == 6))
4663         return 1;
4664
4665       /* fxrstor, ldmxcsr, xrstor.  */
4666       if (i.tm.base_opcode == 0xae
4667           && (i.tm.extension_opcode == 1
4668               || i.tm.extension_opcode == 2
4669               || i.tm.extension_opcode == 5))
4670         return 1;
4671
4672       /* lgdt, lidt, lmsw.  */
4673       if (i.tm.base_opcode == 0x01
4674           && (i.tm.extension_opcode == 2
4675               || i.tm.extension_opcode == 3
4676               || i.tm.extension_opcode == 6))
4677         return 1;
4678     }
4679
4680   dest = i.operands - 1;
4681
4682   /* Check fake imm8 operand and 3 source operands.  */
4683   if ((i.tm.opcode_modifier.immext
4684        || i.tm.opcode_modifier.vexsources == VEX3SOURCES)
4685       && i.types[dest].bitfield.imm8)
4686     dest--;
4687
4688   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg.  */
4689   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
4690       && (base_opcode == 0x1
4691           || base_opcode == 0x9
4692           || base_opcode == 0x11
4693           || base_opcode == 0x19
4694           || base_opcode == 0x21
4695           || base_opcode == 0x29
4696           || base_opcode == 0x31
4697           || base_opcode == 0x39
4698           || (base_opcode | 2) == 0x87))
4699     return 1;
4700
4701   /* xadd.  */
4702   if (i.tm.opcode_modifier.opcodespace == SPACE_0F
4703       && base_opcode == 0xc1)
4704     return 1;
4705
4706   /* Check for load instruction.  */
4707   return (i.types[dest].bitfield.class != ClassNone
4708           || i.types[dest].bitfield.instance == Accum);
4709 }
4710
4711 /* Output lfence, 0xfaee8, after instruction.  */
4712
4713 static void
4714 insert_lfence_after (void)
4715 {
4716   if (lfence_after_load && load_insn_p ())
4717     {
4718       /* There are also two REP string instructions that require
4719          special treatment. Specifically, the compare string (CMPS)
4720          and scan string (SCAS) instructions set EFLAGS in a manner
4721          that depends on the data being compared/scanned. When used
4722          with a REP prefix, the number of iterations may therefore
4723          vary depending on this data. If the data is a program secret
4724          chosen by the adversary using an LVI method,
4725          then this data-dependent behavior may leak some aspect
4726          of the secret.  */
4727       if (((i.tm.base_opcode | 0x1) == 0xa7
4728            || (i.tm.base_opcode | 0x1) == 0xaf)
4729           && i.prefix[REP_PREFIX])
4730         {
4731             as_warn (_("`%s` changes flags which would affect control flow behavior"),
4732                      insn_name (&i.tm));
4733         }
4734       char *p = frag_more (3);
4735       *p++ = 0xf;
4736       *p++ = 0xae;
4737       *p = 0xe8;
4738     }
4739 }
4740
4741 /* Output lfence, 0xfaee8, before instruction.  */
4742
4743 static void
4744 insert_lfence_before (void)
4745 {
4746   char *p;
4747
4748   if (i.tm.opcode_modifier.opcodespace != SPACE_BASE)
4749     return;
4750
4751   if (i.tm.base_opcode == 0xff
4752       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
4753     {
4754       /* Insert lfence before indirect branch if needed.  */
4755
4756       if (lfence_before_indirect_branch == lfence_branch_none)
4757         return;
4758
4759       if (i.operands != 1)
4760         abort ();
4761
4762       if (i.reg_operands == 1)
4763         {
4764           /* Indirect branch via register.  Don't insert lfence with
4765              -mlfence-after-load=yes.  */
4766           if (lfence_after_load
4767               || lfence_before_indirect_branch == lfence_branch_memory)
4768             return;
4769         }
4770       else if (i.mem_operands == 1
4771                && lfence_before_indirect_branch != lfence_branch_register)
4772         {
4773           as_warn (_("indirect `%s` with memory operand should be avoided"),
4774                    insn_name (&i.tm));
4775           return;
4776         }
4777       else
4778         return;
4779
4780       if (last_insn.kind != last_insn_other
4781           && last_insn.seg == now_seg)
4782         {
4783           as_warn_where (last_insn.file, last_insn.line,
4784                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
4785                          last_insn.name, insn_name (&i.tm));
4786           return;
4787         }
4788
4789       p = frag_more (3);
4790       *p++ = 0xf;
4791       *p++ = 0xae;
4792       *p = 0xe8;
4793       return;
4794     }
4795
4796   /* Output or/not/shl and lfence before near ret.  */
4797   if (lfence_before_ret != lfence_before_ret_none
4798       && (i.tm.base_opcode == 0xc2
4799           || i.tm.base_opcode == 0xc3))
4800     {
4801       if (last_insn.kind != last_insn_other
4802           && last_insn.seg == now_seg)
4803         {
4804           as_warn_where (last_insn.file, last_insn.line,
4805                          _("`%s` skips -mlfence-before-ret on `%s`"),
4806                          last_insn.name, insn_name (&i.tm));
4807           return;
4808         }
4809
4810       /* Near ret ingore operand size override under CPU64.  */
4811       char prefix = flag_code == CODE_64BIT
4812                     ? 0x48
4813                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
4814
4815       if (lfence_before_ret == lfence_before_ret_not)
4816         {
4817           /* not: 0xf71424, may add prefix
4818              for operand size override or 64-bit code.  */
4819           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
4820           if (prefix)
4821             *p++ = prefix;
4822           *p++ = 0xf7;
4823           *p++ = 0x14;
4824           *p++ = 0x24;
4825           if (prefix)
4826             *p++ = prefix;
4827           *p++ = 0xf7;
4828           *p++ = 0x14;
4829           *p++ = 0x24;
4830         }
4831       else
4832         {
4833           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
4834           if (prefix)
4835             *p++ = prefix;
4836           if (lfence_before_ret == lfence_before_ret_or)
4837             {
4838               /* or: 0x830c2400, may add prefix
4839                  for operand size override or 64-bit code.  */
4840               *p++ = 0x83;
4841               *p++ = 0x0c;
4842             }
4843           else
4844             {
4845               /* shl: 0xc1242400, may add prefix
4846                  for operand size override or 64-bit code.  */
4847               *p++ = 0xc1;
4848               *p++ = 0x24;
4849             }
4850
4851           *p++ = 0x24;
4852           *p++ = 0x0;
4853         }
4854
4855       *p++ = 0xf;
4856       *p++ = 0xae;
4857       *p = 0xe8;
4858     }
4859 }
4860
4861 /* Helper for md_assemble() to decide whether to prepare for a possible 2nd
4862    parsing pass. Instead of introducing a rarely use new insn attribute this
4863    utilizes a common pattern between affected templates. It is deemed
4864    acceptable that this will lead to unnecessary pass 2 preparations in a
4865    limited set of cases.  */
4866 static INLINE bool may_need_pass2 (const insn_template *t)
4867 {
4868   return t->opcode_modifier.sse2avx
4869          /* Note that all SSE2AVX templates have at least one operand.  */
4870          ? t->operand_types[t->operands - 1].bitfield.class == RegSIMD
4871          : (t->opcode_modifier.opcodespace == SPACE_0F
4872             && (t->base_opcode | 1) == 0xbf)
4873            || (t->opcode_modifier.opcodespace == SPACE_BASE
4874                && t->base_opcode == 0x63);
4875 }
4876
4877 /* This is the guts of the machine-dependent assembler.  LINE points to a
4878    machine dependent instruction.  This function is supposed to emit
4879    the frags/bytes it assembles to.  */
4880
4881 void
4882 md_assemble (char *line)
4883 {
4884   unsigned int j;
4885   char mnemonic[MAX_MNEM_SIZE], mnem_suffix = 0, *copy = NULL;
4886   const char *end, *pass1_mnem = NULL;
4887   enum i386_error pass1_err = 0;
4888   const insn_template *t;
4889
4890   /* Initialize globals.  */
4891   current_templates = NULL;
4892  retry:
4893   memset (&i, '\0', sizeof (i));
4894   i.rounding.type = rc_none;
4895   for (j = 0; j < MAX_OPERANDS; j++)
4896     i.reloc[j] = NO_RELOC;
4897   memset (disp_expressions, '\0', sizeof (disp_expressions));
4898   memset (im_expressions, '\0', sizeof (im_expressions));
4899   save_stack_p = save_stack;
4900
4901   /* First parse an instruction mnemonic & call i386_operand for the operands.
4902      We assume that the scrubber has arranged it so that line[0] is the valid
4903      start of a (possibly prefixed) mnemonic.  */
4904
4905   end = parse_insn (line, mnemonic);
4906   if (end == NULL)
4907     {
4908       if (pass1_mnem != NULL)
4909         goto match_error;
4910       if (i.error != no_error)
4911         {
4912           gas_assert (current_templates != NULL);
4913           if (may_need_pass2 (current_templates->start) && !i.suffix)
4914             goto no_match;
4915           /* No point in trying a 2nd pass - it'll only find the same suffix
4916              again.  */
4917           mnem_suffix = i.suffix;
4918           goto match_error;
4919         }
4920       return;
4921     }
4922   t = current_templates->start;
4923   if (may_need_pass2 (t))
4924     {
4925       /* Make a copy of the full line in case we need to retry.  */
4926       copy = xstrdup (line);
4927     }
4928   line += end - line;
4929   mnem_suffix = i.suffix;
4930
4931   line = parse_operands (line, mnemonic);
4932   this_operand = -1;
4933   if (line == NULL)
4934     {
4935       free (copy);
4936       return;
4937     }
4938
4939   /* Now we've parsed the mnemonic into a set of templates, and have the
4940      operands at hand.  */
4941
4942   /* All Intel opcodes have reversed operands except for "bound", "enter",
4943      "invlpg*", "monitor*", "mwait*", "tpause", "umwait", "pvalidate",
4944      "rmpadjust", "rmpupdate", and "rmpquery".  We also don't reverse
4945      intersegment "jmp" and "call" instructions with 2 immediate operands so
4946      that the immediate segment precedes the offset consistently in Intel and
4947      AT&T modes.  */
4948   if (intel_syntax
4949       && i.operands > 1
4950       && (t->mnem_off != MN_bound)
4951       && !startswith (mnemonic, "invlpg")
4952       && !startswith (mnemonic, "monitor")
4953       && !startswith (mnemonic, "mwait")
4954       && (t->mnem_off != MN_pvalidate)
4955       && !startswith (mnemonic, "rmp")
4956       && (t->mnem_off != MN_tpause)
4957       && (t->mnem_off != MN_umwait)
4958       && !(i.operands == 2
4959            && operand_type_check (i.types[0], imm)
4960            && operand_type_check (i.types[1], imm)))
4961     swap_operands ();
4962
4963   /* The order of the immediates should be reversed
4964      for 2 immediates extrq and insertq instructions */
4965   if (i.imm_operands == 2
4966       && (t->mnem_off == MN_extrq || t->mnem_off == MN_insertq))
4967       swap_2_operands (0, 1);
4968
4969   if (i.imm_operands)
4970     optimize_imm ();
4971
4972   if (i.disp_operands && !want_disp32 (t)
4973       && (!t->opcode_modifier.jump
4974           || i.jumpabsolute || i.types[0].bitfield.baseindex))
4975     {
4976       for (j = 0; j < i.operands; ++j)
4977         {
4978           const expressionS *exp = i.op[j].disps;
4979
4980           if (!operand_type_check (i.types[j], disp))
4981             continue;
4982
4983           if (exp->X_op != O_constant)
4984             continue;
4985
4986           /* Since displacement is signed extended to 64bit, don't allow
4987              disp32 if it is out of range.  */
4988           if (fits_in_signed_long (exp->X_add_number))
4989             continue;
4990
4991           i.types[j].bitfield.disp32 = 0;
4992           if (i.types[j].bitfield.baseindex)
4993             {
4994               as_bad (_("0x%" PRIx64 " out of range of signed 32bit displacement"),
4995                       (uint64_t) exp->X_add_number);
4996               return;
4997             }
4998         }
4999     }
5000
5001   /* Don't optimize displacement for movabs since it only takes 64bit
5002      displacement.  */
5003   if (i.disp_operands
5004       && i.disp_encoding <= disp_encoding_8bit
5005       && (flag_code != CODE_64BIT
5006           || strcmp (mnemonic, "movabs") != 0))
5007     optimize_disp ();
5008
5009   /* Next, we find a template that matches the given insn,
5010      making sure the overlap of the given operands types is consistent
5011      with the template operand types.  */
5012
5013   if (!(t = match_template (mnem_suffix)))
5014     {
5015       const char *err_msg;
5016
5017       if (copy && !mnem_suffix)
5018         {
5019           line = copy;
5020           copy = NULL;
5021   no_match:
5022           pass1_err = i.error;
5023           pass1_mnem = insn_name (current_templates->start);
5024           goto retry;
5025         }
5026
5027       /* If a non-/only-64bit template (group) was found in pass 1, and if
5028          _some_ template (group) was found in pass 2, squash pass 1's
5029          error.  */
5030       if (pass1_err == unsupported_64bit)
5031         pass1_mnem = NULL;
5032
5033   match_error:
5034       free (copy);
5035
5036       switch (pass1_mnem ? pass1_err : i.error)
5037         {
5038         default:
5039           abort ();
5040         case operand_size_mismatch:
5041           err_msg = _("operand size mismatch");
5042           break;
5043         case operand_type_mismatch:
5044           err_msg = _("operand type mismatch");
5045           break;
5046         case register_type_mismatch:
5047           err_msg = _("register type mismatch");
5048           break;
5049         case number_of_operands_mismatch:
5050           err_msg = _("number of operands mismatch");
5051           break;
5052         case invalid_instruction_suffix:
5053           err_msg = _("invalid instruction suffix");
5054           break;
5055         case bad_imm4:
5056           err_msg = _("constant doesn't fit in 4 bits");
5057           break;
5058         case unsupported_with_intel_mnemonic:
5059           err_msg = _("unsupported with Intel mnemonic");
5060           break;
5061         case unsupported_syntax:
5062           err_msg = _("unsupported syntax");
5063           break;
5064         case unsupported:
5065           as_bad (_("unsupported instruction `%s'"),
5066                   pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5067           return;
5068         case unsupported_on_arch:
5069           as_bad (_("`%s' is not supported on `%s%s'"),
5070                   pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5071                   cpu_arch_name ? cpu_arch_name : default_arch,
5072                   cpu_sub_arch_name ? cpu_sub_arch_name : "");
5073           return;
5074         case unsupported_64bit:
5075           if (ISLOWER (mnem_suffix))
5076             {
5077               if (flag_code == CODE_64BIT)
5078                 as_bad (_("`%s%c' is not supported in 64-bit mode"),
5079                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5080                         mnem_suffix);
5081               else
5082                 as_bad (_("`%s%c' is only supported in 64-bit mode"),
5083                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5084                         mnem_suffix);
5085             }
5086           else
5087             {
5088               if (flag_code == CODE_64BIT)
5089                 as_bad (_("`%s' is not supported in 64-bit mode"),
5090                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5091               else
5092                 as_bad (_("`%s' is only supported in 64-bit mode"),
5093                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5094             }
5095           return;
5096         case invalid_sib_address:
5097           err_msg = _("invalid SIB address");
5098           break;
5099         case invalid_vsib_address:
5100           err_msg = _("invalid VSIB address");
5101           break;
5102         case invalid_vector_register_set:
5103           err_msg = _("mask, index, and destination registers must be distinct");
5104           break;
5105         case invalid_tmm_register_set:
5106           err_msg = _("all tmm registers must be distinct");
5107           break;
5108         case invalid_dest_and_src_register_set:
5109           err_msg = _("destination and source registers must be distinct");
5110           break;
5111         case unsupported_vector_index_register:
5112           err_msg = _("unsupported vector index register");
5113           break;
5114         case unsupported_broadcast:
5115           err_msg = _("unsupported broadcast");
5116           break;
5117         case broadcast_needed:
5118           err_msg = _("broadcast is needed for operand of such type");
5119           break;
5120         case unsupported_masking:
5121           err_msg = _("unsupported masking");
5122           break;
5123         case mask_not_on_destination:
5124           err_msg = _("mask not on destination operand");
5125           break;
5126         case no_default_mask:
5127           err_msg = _("default mask isn't allowed");
5128           break;
5129         case unsupported_rc_sae:
5130           err_msg = _("unsupported static rounding/sae");
5131           break;
5132         case invalid_register_operand:
5133           err_msg = _("invalid register operand");
5134           break;
5135         }
5136       as_bad (_("%s for `%s'"), err_msg,
5137               pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5138       return;
5139     }
5140
5141   free (copy);
5142
5143   if (sse_check != check_none
5144       /* The opcode space check isn't strictly needed; it's there only to
5145          bypass the logic below when easily possible.  */
5146       && t->opcode_modifier.opcodespace >= SPACE_0F
5147       && t->opcode_modifier.opcodespace <= SPACE_0F3A
5148       && !i.tm.cpu_flags.bitfield.cpusse4a
5149       && !is_any_vex_encoding (t))
5150     {
5151       bool simd = false;
5152
5153       for (j = 0; j < t->operands; ++j)
5154         {
5155           if (t->operand_types[j].bitfield.class == RegMMX)
5156             break;
5157           if (t->operand_types[j].bitfield.class == RegSIMD)
5158             simd = true;
5159         }
5160
5161       if (j >= t->operands && simd)
5162         (sse_check == check_warning
5163          ? as_warn
5164          : as_bad) (_("SSE instruction `%s' is used"), insn_name (&i.tm));
5165     }
5166
5167   if (i.tm.opcode_modifier.fwait)
5168     if (!add_prefix (FWAIT_OPCODE))
5169       return;
5170
5171   /* Check if REP prefix is OK.  */
5172   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep)
5173     {
5174       as_bad (_("invalid instruction `%s' after `%s'"),
5175                 insn_name (&i.tm), i.rep_prefix);
5176       return;
5177     }
5178
5179   /* Check for lock without a lockable instruction.  Destination operand
5180      must be memory unless it is xchg (0x86).  */
5181   if (i.prefix[LOCK_PREFIX]
5182       && (i.tm.opcode_modifier.prefixok < PrefixLock
5183           || i.mem_operands == 0
5184           || (i.tm.base_opcode != 0x86
5185               && !(i.flags[i.operands - 1] & Operand_Mem))))
5186     {
5187       as_bad (_("expecting lockable instruction after `lock'"));
5188       return;
5189     }
5190
5191   if (is_any_vex_encoding (&i.tm)
5192       || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
5193       || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX)
5194     {
5195       /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
5196       if (i.prefix[DATA_PREFIX])
5197         {
5198           as_bad (_("data size prefix invalid with `%s'"), insn_name (&i.tm));
5199           return;
5200         }
5201
5202       /* Don't allow e.g. KMOV in TLS code sequences.  */
5203       for (j = i.imm_operands; j < i.operands; ++j)
5204         switch (i.reloc[j])
5205           {
5206           case BFD_RELOC_386_TLS_GOTIE:
5207           case BFD_RELOC_386_TLS_LE_32:
5208           case BFD_RELOC_X86_64_GOTTPOFF:
5209           case BFD_RELOC_X86_64_TLSLD:
5210             as_bad (_("TLS relocation cannot be used with `%s'"), insn_name (&i.tm));
5211             return;
5212           default:
5213             break;
5214           }
5215     }
5216
5217   /* Check if HLE prefix is OK.  */
5218   if (i.hle_prefix && !check_hle ())
5219     return;
5220
5221   /* Check BND prefix.  */
5222   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
5223     as_bad (_("expecting valid branch instruction after `bnd'"));
5224
5225   /* Check NOTRACK prefix.  */
5226   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
5227     as_bad (_("expecting indirect branch instruction after `notrack'"));
5228
5229   if (i.tm.cpu_flags.bitfield.cpumpx)
5230     {
5231       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
5232         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
5233       else if (flag_code != CODE_16BIT
5234                ? i.prefix[ADDR_PREFIX]
5235                : i.mem_operands && !i.prefix[ADDR_PREFIX])
5236         as_bad (_("16-bit address isn't allowed in MPX instructions"));
5237     }
5238
5239   /* Insert BND prefix.  */
5240   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
5241     {
5242       if (!i.prefix[BND_PREFIX])
5243         add_prefix (BND_PREFIX_OPCODE);
5244       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
5245         {
5246           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
5247           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
5248         }
5249     }
5250
5251   /* Check string instruction segment overrides.  */
5252   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
5253     {
5254       gas_assert (i.mem_operands);
5255       if (!check_string ())
5256         return;
5257       i.disp_operands = 0;
5258     }
5259
5260   /* The memory operand of (%dx) should be only used with input/output
5261      instructions (base opcodes: 0x6c, 0x6e, 0xec, 0xee).  */
5262   if (i.input_output_operand
5263       && ((i.tm.base_opcode | 0x82) != 0xee
5264           || i.tm.opcode_modifier.opcodespace != SPACE_BASE))
5265     {
5266       as_bad (_("input/output port address isn't allowed with `%s'"),
5267               insn_name (&i.tm));
5268       return;
5269     }
5270
5271   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
5272     optimize_encoding ();
5273
5274   if (use_unaligned_vector_move)
5275     encode_with_unaligned_vector_move ();
5276
5277   if (!process_suffix ())
5278     return;
5279
5280   /* Check if IP-relative addressing requirements can be satisfied.  */
5281   if (i.tm.cpu_flags.bitfield.cpuprefetchi
5282       && !(i.base_reg && i.base_reg->reg_num == RegIP))
5283     as_warn (_("'%s' only supports RIP-relative address"), insn_name (&i.tm));
5284
5285   /* Update operand types and check extended states.  */
5286   for (j = 0; j < i.operands; j++)
5287     {
5288       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
5289       switch (i.tm.operand_types[j].bitfield.class)
5290         {
5291         default:
5292           break;
5293         case RegMMX:
5294           i.xstate |= xstate_mmx;
5295           break;
5296         case RegMask:
5297           i.xstate |= xstate_mask;
5298           break;
5299         case RegSIMD:
5300           if (i.tm.operand_types[j].bitfield.tmmword)
5301             i.xstate |= xstate_tmm;
5302           else if (i.tm.operand_types[j].bitfield.zmmword)
5303             i.xstate |= xstate_zmm;
5304           else if (i.tm.operand_types[j].bitfield.ymmword)
5305             i.xstate |= xstate_ymm;
5306           else if (i.tm.operand_types[j].bitfield.xmmword)
5307             i.xstate |= xstate_xmm;
5308           break;
5309         }
5310     }
5311
5312   /* Make still unresolved immediate matches conform to size of immediate
5313      given in i.suffix.  */
5314   if (!finalize_imm ())
5315     return;
5316
5317   if (i.types[0].bitfield.imm1)
5318     i.imm_operands = 0; /* kludge for shift insns.  */
5319
5320   /* For insns with operands there are more diddles to do to the opcode.  */
5321   if (i.operands)
5322     {
5323       if (!process_operands ())
5324         return;
5325     }
5326   else if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
5327     {
5328       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
5329       as_warn (_("translating to `%sp'"), insn_name (&i.tm));
5330     }
5331
5332   if (is_any_vex_encoding (&i.tm))
5333     {
5334       if (!cpu_arch_flags.bitfield.cpui286)
5335         {
5336           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
5337                   insn_name (&i.tm));
5338           return;
5339         }
5340
5341       /* Check for explicit REX prefix.  */
5342       if (i.prefix[REX_PREFIX] || i.rex_encoding)
5343         {
5344           as_bad (_("REX prefix invalid with `%s'"), insn_name (&i.tm));
5345           return;
5346         }
5347
5348       if (i.tm.opcode_modifier.vex)
5349         build_vex_prefix (t);
5350       else
5351         build_evex_prefix ();
5352
5353       /* The individual REX.RXBW bits got consumed.  */
5354       i.rex &= REX_OPCODE;
5355     }
5356
5357   /* Handle conversion of 'int $3' --> special int3 insn.  XOP or FMA4
5358      instructions may define INT_OPCODE as well, so avoid this corner
5359      case for those instructions that use MODRM.  */
5360   if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
5361       && i.tm.base_opcode == INT_OPCODE
5362       && !i.tm.opcode_modifier.modrm
5363       && i.op[0].imms->X_add_number == 3)
5364     {
5365       i.tm.base_opcode = INT3_OPCODE;
5366       i.imm_operands = 0;
5367     }
5368
5369   if ((i.tm.opcode_modifier.jump == JUMP
5370        || i.tm.opcode_modifier.jump == JUMP_BYTE
5371        || i.tm.opcode_modifier.jump == JUMP_DWORD)
5372       && i.op[0].disps->X_op == O_constant)
5373     {
5374       /* Convert "jmp constant" (and "call constant") to a jump (call) to
5375          the absolute address given by the constant.  Since ix86 jumps and
5376          calls are pc relative, we need to generate a reloc.  */
5377       i.op[0].disps->X_add_symbol = &abs_symbol;
5378       i.op[0].disps->X_op = O_symbol;
5379     }
5380
5381   /* For 8 bit registers we need an empty rex prefix.  Also if the
5382      instruction already has a prefix, we need to convert old
5383      registers to new ones.  */
5384
5385   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
5386        && (i.op[0].regs->reg_flags & RegRex64) != 0)
5387       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
5388           && (i.op[1].regs->reg_flags & RegRex64) != 0)
5389       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
5390            || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
5391           && i.rex != 0))
5392     {
5393       int x;
5394
5395       i.rex |= REX_OPCODE;
5396       for (x = 0; x < 2; x++)
5397         {
5398           /* Look for 8 bit operand that uses old registers.  */
5399           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
5400               && (i.op[x].regs->reg_flags & RegRex64) == 0)
5401             {
5402               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5403               /* In case it is "hi" register, give up.  */
5404               if (i.op[x].regs->reg_num > 3)
5405                 as_bad (_("can't encode register '%s%s' in an "
5406                           "instruction requiring REX prefix."),
5407                         register_prefix, i.op[x].regs->reg_name);
5408
5409               /* Otherwise it is equivalent to the extended register.
5410                  Since the encoding doesn't change this is merely
5411                  cosmetic cleanup for debug output.  */
5412
5413               i.op[x].regs = i.op[x].regs + 8;
5414             }
5415         }
5416     }
5417
5418   if (i.rex == 0 && i.rex_encoding)
5419     {
5420       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
5421          that uses legacy register.  If it is "hi" register, don't add
5422          the REX_OPCODE byte.  */
5423       int x;
5424       for (x = 0; x < 2; x++)
5425         if (i.types[x].bitfield.class == Reg
5426             && i.types[x].bitfield.byte
5427             && (i.op[x].regs->reg_flags & RegRex64) == 0
5428             && i.op[x].regs->reg_num > 3)
5429           {
5430             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5431             i.rex_encoding = false;
5432             break;
5433           }
5434
5435       if (i.rex_encoding)
5436         i.rex = REX_OPCODE;
5437     }
5438
5439   if (i.rex != 0)
5440     add_prefix (REX_OPCODE | i.rex);
5441
5442   insert_lfence_before ();
5443
5444   /* We are ready to output the insn.  */
5445   output_insn ();
5446
5447   insert_lfence_after ();
5448
5449   last_insn.seg = now_seg;
5450
5451   if (i.tm.opcode_modifier.isprefix)
5452     {
5453       last_insn.kind = last_insn_prefix;
5454       last_insn.name = insn_name (&i.tm);
5455       last_insn.file = as_where (&last_insn.line);
5456     }
5457   else
5458     last_insn.kind = last_insn_other;
5459 }
5460
5461 /* The Q suffix is generally valid only in 64-bit mode, with very few
5462    exceptions: fild, fistp, fisttp, and cmpxchg8b.  Note that for fild
5463    and fisttp only one of their two templates is matched below: That's
5464    sufficient since other relevant attributes are the same between both
5465    respective templates.  */
5466 static INLINE bool q_suffix_allowed(const insn_template *t)
5467 {
5468   return flag_code == CODE_64BIT
5469          || (t->opcode_modifier.opcodespace == SPACE_BASE
5470              && t->base_opcode == 0xdf
5471              && (t->extension_opcode & 1)) /* fild / fistp / fisttp */
5472          || (t->opcode_modifier.opcodespace == SPACE_0F
5473              && t->base_opcode == 0xc7
5474              && t->opcode_modifier.opcodeprefix == PREFIX_NONE
5475              && t->extension_opcode == 1) /* cmpxchg8b */;
5476 }
5477
5478 static const char *
5479 parse_insn (const char *line, char *mnemonic)
5480 {
5481   const char *l = line, *token_start = l;
5482   char *mnem_p;
5483   bool pass1 = !current_templates;
5484   int supported;
5485   const insn_template *t;
5486   char *dot_p = NULL;
5487
5488   while (1)
5489     {
5490       mnem_p = mnemonic;
5491       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
5492         {
5493           if (*mnem_p == '.')
5494             dot_p = mnem_p;
5495           mnem_p++;
5496           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5497             {
5498               as_bad (_("no such instruction: `%s'"), token_start);
5499               return NULL;
5500             }
5501           l++;
5502         }
5503       if (!is_space_char (*l)
5504           && *l != END_OF_INSN
5505           && (intel_syntax
5506               || (*l != PREFIX_SEPARATOR
5507                   && *l != ',')))
5508         {
5509           as_bad (_("invalid character %s in mnemonic"),
5510                   output_invalid (*l));
5511           return NULL;
5512         }
5513       if (token_start == l)
5514         {
5515           if (!intel_syntax && *l == PREFIX_SEPARATOR)
5516             as_bad (_("expecting prefix; got nothing"));
5517           else
5518             as_bad (_("expecting mnemonic; got nothing"));
5519           return NULL;
5520         }
5521
5522       /* Look up instruction (or prefix) via hash table.  */
5523       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5524
5525       if (*l != END_OF_INSN
5526           && (!is_space_char (*l) || l[1] != END_OF_INSN)
5527           && current_templates
5528           && current_templates->start->opcode_modifier.isprefix)
5529         {
5530           if (!cpu_flags_check_cpu64 (current_templates->start->cpu_flags))
5531             {
5532               as_bad ((flag_code != CODE_64BIT
5533                        ? _("`%s' is only supported in 64-bit mode")
5534                        : _("`%s' is not supported in 64-bit mode")),
5535                       insn_name (current_templates->start));
5536               return NULL;
5537             }
5538           /* If we are in 16-bit mode, do not allow addr16 or data16.
5539              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
5540           if ((current_templates->start->opcode_modifier.size == SIZE16
5541                || current_templates->start->opcode_modifier.size == SIZE32)
5542               && flag_code != CODE_64BIT
5543               && ((current_templates->start->opcode_modifier.size == SIZE32)
5544                   ^ (flag_code == CODE_16BIT)))
5545             {
5546               as_bad (_("redundant %s prefix"),
5547                       insn_name (current_templates->start));
5548               return NULL;
5549             }
5550
5551           if (current_templates->start->base_opcode == PSEUDO_PREFIX)
5552             {
5553               /* Handle pseudo prefixes.  */
5554               switch (current_templates->start->extension_opcode)
5555                 {
5556                 case Prefix_Disp8:
5557                   /* {disp8} */
5558                   i.disp_encoding = disp_encoding_8bit;
5559                   break;
5560                 case Prefix_Disp16:
5561                   /* {disp16} */
5562                   i.disp_encoding = disp_encoding_16bit;
5563                   break;
5564                 case Prefix_Disp32:
5565                   /* {disp32} */
5566                   i.disp_encoding = disp_encoding_32bit;
5567                   break;
5568                 case Prefix_Load:
5569                   /* {load} */
5570                   i.dir_encoding = dir_encoding_load;
5571                   break;
5572                 case Prefix_Store:
5573                   /* {store} */
5574                   i.dir_encoding = dir_encoding_store;
5575                   break;
5576                 case Prefix_VEX:
5577                   /* {vex} */
5578                   i.vec_encoding = vex_encoding_vex;
5579                   break;
5580                 case Prefix_VEX3:
5581                   /* {vex3} */
5582                   i.vec_encoding = vex_encoding_vex3;
5583                   break;
5584                 case Prefix_EVEX:
5585                   /* {evex} */
5586                   i.vec_encoding = vex_encoding_evex;
5587                   break;
5588                 case Prefix_REX:
5589                   /* {rex} */
5590                   i.rex_encoding = true;
5591                   break;
5592                 case Prefix_NoOptimize:
5593                   /* {nooptimize} */
5594                   i.no_optimize = true;
5595                   break;
5596                 default:
5597                   abort ();
5598                 }
5599             }
5600           else
5601             {
5602               /* Add prefix, checking for repeated prefixes.  */
5603               switch (add_prefix (current_templates->start->base_opcode))
5604                 {
5605                 case PREFIX_EXIST:
5606                   return NULL;
5607                 case PREFIX_DS:
5608                   if (current_templates->start->cpu_flags.bitfield.cpuibt)
5609                     i.notrack_prefix = insn_name (current_templates->start);
5610                   break;
5611                 case PREFIX_REP:
5612                   if (current_templates->start->cpu_flags.bitfield.cpuhle)
5613                     i.hle_prefix = insn_name (current_templates->start);
5614                   else if (current_templates->start->cpu_flags.bitfield.cpumpx)
5615                     i.bnd_prefix = insn_name (current_templates->start);
5616                   else
5617                     i.rep_prefix = insn_name (current_templates->start);
5618                   break;
5619                 default:
5620                   break;
5621                 }
5622             }
5623           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
5624           token_start = ++l;
5625         }
5626       else
5627         break;
5628     }
5629
5630   if (!current_templates)
5631     {
5632       /* Deprecated functionality (new code should use pseudo-prefixes instead):
5633          Check if we should swap operand or force 32bit displacement in
5634          encoding.  */
5635       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
5636         i.dir_encoding = dir_encoding_swap;
5637       else if (mnem_p - 3 == dot_p
5638                && dot_p[1] == 'd'
5639                && dot_p[2] == '8')
5640         i.disp_encoding = disp_encoding_8bit;
5641       else if (mnem_p - 4 == dot_p
5642                && dot_p[1] == 'd'
5643                && dot_p[2] == '3'
5644                && dot_p[3] == '2')
5645         i.disp_encoding = disp_encoding_32bit;
5646       else
5647         goto check_suffix;
5648       mnem_p = dot_p;
5649       *dot_p = '\0';
5650       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5651     }
5652
5653   if (!current_templates || !pass1)
5654     {
5655       current_templates = NULL;
5656
5657     check_suffix:
5658       if (mnem_p > mnemonic)
5659         {
5660           /* See if we can get a match by trimming off a suffix.  */
5661           switch (mnem_p[-1])
5662             {
5663             case WORD_MNEM_SUFFIX:
5664               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
5665                 i.suffix = SHORT_MNEM_SUFFIX;
5666               else
5667                 /* Fall through.  */
5668               case BYTE_MNEM_SUFFIX:
5669               case QWORD_MNEM_SUFFIX:
5670                 i.suffix = mnem_p[-1];
5671               mnem_p[-1] = '\0';
5672               current_templates
5673                 = (const templates *) str_hash_find (op_hash, mnemonic);
5674               break;
5675             case SHORT_MNEM_SUFFIX:
5676             case LONG_MNEM_SUFFIX:
5677               if (!intel_syntax)
5678                 {
5679                   i.suffix = mnem_p[-1];
5680                   mnem_p[-1] = '\0';
5681                   current_templates
5682                     = (const templates *) str_hash_find (op_hash, mnemonic);
5683                 }
5684               break;
5685
5686               /* Intel Syntax.  */
5687             case 'd':
5688               if (intel_syntax)
5689                 {
5690                   if (intel_float_operand (mnemonic) == 1)
5691                     i.suffix = SHORT_MNEM_SUFFIX;
5692                   else
5693                     i.suffix = LONG_MNEM_SUFFIX;
5694                   mnem_p[-1] = '\0';
5695                   current_templates
5696                     = (const templates *) str_hash_find (op_hash, mnemonic);
5697                 }
5698               /* For compatibility reasons accept MOVSD and CMPSD without
5699                  operands even in AT&T mode.  */
5700               else if (*l == END_OF_INSN
5701                        || (is_space_char (*l) && l[1] == END_OF_INSN))
5702                 {
5703                   mnem_p[-1] = '\0';
5704                   current_templates
5705                     = (const templates *) str_hash_find (op_hash, mnemonic);
5706                   if (current_templates != NULL
5707                       /* MOVS or CMPS */
5708                       && (current_templates->start->base_opcode | 2) == 0xa6
5709                       && current_templates->start->opcode_modifier.opcodespace
5710                          == SPACE_BASE
5711                       && mnem_p[-2] == 's')
5712                     {
5713                       as_warn (_("found `%sd'; assuming `%sl' was meant"),
5714                                mnemonic, mnemonic);
5715                       i.suffix = LONG_MNEM_SUFFIX;
5716                     }
5717                   else
5718                     {
5719                       current_templates = NULL;
5720                       mnem_p[-1] = 'd';
5721                     }
5722                 }
5723               break;
5724             }
5725         }
5726
5727       if (!current_templates)
5728         {
5729           if (pass1)
5730             as_bad (_("no such instruction: `%s'"), token_start);
5731           return NULL;
5732         }
5733     }
5734
5735   if (current_templates->start->opcode_modifier.jump == JUMP
5736       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
5737     {
5738       /* Check for a branch hint.  We allow ",pt" and ",pn" for
5739          predict taken and predict not taken respectively.
5740          I'm not sure that branch hints actually do anything on loop
5741          and jcxz insns (JumpByte) for current Pentium4 chips.  They
5742          may work in the future and it doesn't hurt to accept them
5743          now.  */
5744       if (l[0] == ',' && l[1] == 'p')
5745         {
5746           if (l[2] == 't')
5747             {
5748               if (!add_prefix (DS_PREFIX_OPCODE))
5749                 return NULL;
5750               l += 3;
5751             }
5752           else if (l[2] == 'n')
5753             {
5754               if (!add_prefix (CS_PREFIX_OPCODE))
5755                 return NULL;
5756               l += 3;
5757             }
5758         }
5759     }
5760   /* Any other comma loses.  */
5761   if (*l == ',')
5762     {
5763       as_bad (_("invalid character %s in mnemonic"),
5764               output_invalid (*l));
5765       return NULL;
5766     }
5767
5768   /* Check if instruction is supported on specified architecture.  */
5769   supported = 0;
5770   for (t = current_templates->start; t < current_templates->end; ++t)
5771     {
5772       supported |= cpu_flags_match (t);
5773
5774       if (i.suffix == QWORD_MNEM_SUFFIX && !q_suffix_allowed (t))
5775         supported &= ~CPU_FLAGS_64BIT_MATCH;
5776
5777       if (supported == CPU_FLAGS_PERFECT_MATCH)
5778         return l;
5779     }
5780
5781   if (pass1)
5782     {
5783       if (supported & CPU_FLAGS_64BIT_MATCH)
5784         i.error = unsupported_on_arch;
5785       else
5786         i.error = unsupported_64bit;
5787     }
5788
5789   return NULL;
5790 }
5791
5792 static char *
5793 parse_operands (char *l, const char *mnemonic)
5794 {
5795   char *token_start;
5796
5797   /* 1 if operand is pending after ','.  */
5798   unsigned int expecting_operand = 0;
5799
5800   while (*l != END_OF_INSN)
5801     {
5802       /* Non-zero if operand parens not balanced.  */
5803       unsigned int paren_not_balanced = 0;
5804       /* True if inside double quotes.  */
5805       bool in_quotes = false;
5806
5807       /* Skip optional white space before operand.  */
5808       if (is_space_char (*l))
5809         ++l;
5810       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
5811         {
5812           as_bad (_("invalid character %s before operand %d"),
5813                   output_invalid (*l),
5814                   i.operands + 1);
5815           return NULL;
5816         }
5817       token_start = l;  /* After white space.  */
5818       while (in_quotes || paren_not_balanced || *l != ',')
5819         {
5820           if (*l == END_OF_INSN)
5821             {
5822               if (in_quotes)
5823                 {
5824                   as_bad (_("unbalanced double quotes in operand %d."),
5825                           i.operands + 1);
5826                   return NULL;
5827                 }
5828               if (paren_not_balanced)
5829                 {
5830                   know (!intel_syntax);
5831                   as_bad (_("unbalanced parenthesis in operand %d."),
5832                           i.operands + 1);
5833                   return NULL;
5834                 }
5835               else
5836                 break;  /* we are done */
5837             }
5838           else if (*l == '\\' && l[1] == '"')
5839             ++l;
5840           else if (*l == '"')
5841             in_quotes = !in_quotes;
5842           else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l))
5843             {
5844               as_bad (_("invalid character %s in operand %d"),
5845                       output_invalid (*l),
5846                       i.operands + 1);
5847               return NULL;
5848             }
5849           if (!intel_syntax && !in_quotes)
5850             {
5851               if (*l == '(')
5852                 ++paren_not_balanced;
5853               if (*l == ')')
5854                 --paren_not_balanced;
5855             }
5856           l++;
5857         }
5858       if (l != token_start)
5859         {                       /* Yes, we've read in another operand.  */
5860           unsigned int operand_ok;
5861           this_operand = i.operands++;
5862           if (i.operands > MAX_OPERANDS)
5863             {
5864               as_bad (_("spurious operands; (%d operands/instruction max)"),
5865                       MAX_OPERANDS);
5866               return NULL;
5867             }
5868           i.types[this_operand].bitfield.unspecified = 1;
5869           /* Now parse operand adding info to 'i' as we go along.  */
5870           END_STRING_AND_SAVE (l);
5871
5872           if (i.mem_operands > 1)
5873             {
5874               as_bad (_("too many memory references for `%s'"),
5875                       mnemonic);
5876               return 0;
5877             }
5878
5879           if (intel_syntax)
5880             operand_ok =
5881               i386_intel_operand (token_start,
5882                                   intel_float_operand (mnemonic));
5883           else
5884             operand_ok = i386_att_operand (token_start);
5885
5886           RESTORE_END_STRING (l);
5887           if (!operand_ok)
5888             return NULL;
5889         }
5890       else
5891         {
5892           if (expecting_operand)
5893             {
5894             expecting_operand_after_comma:
5895               as_bad (_("expecting operand after ','; got nothing"));
5896               return NULL;
5897             }
5898           if (*l == ',')
5899             {
5900               as_bad (_("expecting operand before ','; got nothing"));
5901               return NULL;
5902             }
5903         }
5904
5905       /* Now *l must be either ',' or END_OF_INSN.  */
5906       if (*l == ',')
5907         {
5908           if (*++l == END_OF_INSN)
5909             {
5910               /* Just skip it, if it's \n complain.  */
5911               goto expecting_operand_after_comma;
5912             }
5913           expecting_operand = 1;
5914         }
5915     }
5916   return l;
5917 }
5918
5919 static void
5920 swap_2_operands (unsigned int xchg1, unsigned int xchg2)
5921 {
5922   union i386_op temp_op;
5923   i386_operand_type temp_type;
5924   unsigned int temp_flags;
5925   enum bfd_reloc_code_real temp_reloc;
5926
5927   temp_type = i.types[xchg2];
5928   i.types[xchg2] = i.types[xchg1];
5929   i.types[xchg1] = temp_type;
5930
5931   temp_flags = i.flags[xchg2];
5932   i.flags[xchg2] = i.flags[xchg1];
5933   i.flags[xchg1] = temp_flags;
5934
5935   temp_op = i.op[xchg2];
5936   i.op[xchg2] = i.op[xchg1];
5937   i.op[xchg1] = temp_op;
5938
5939   temp_reloc = i.reloc[xchg2];
5940   i.reloc[xchg2] = i.reloc[xchg1];
5941   i.reloc[xchg1] = temp_reloc;
5942
5943   if (i.mask.reg)
5944     {
5945       if (i.mask.operand == xchg1)
5946         i.mask.operand = xchg2;
5947       else if (i.mask.operand == xchg2)
5948         i.mask.operand = xchg1;
5949     }
5950   if (i.broadcast.type || i.broadcast.bytes)
5951     {
5952       if (i.broadcast.operand == xchg1)
5953         i.broadcast.operand = xchg2;
5954       else if (i.broadcast.operand == xchg2)
5955         i.broadcast.operand = xchg1;
5956     }
5957 }
5958
5959 static void
5960 swap_operands (void)
5961 {
5962   switch (i.operands)
5963     {
5964     case 5:
5965     case 4:
5966       swap_2_operands (1, i.operands - 2);
5967       /* Fall through.  */
5968     case 3:
5969     case 2:
5970       swap_2_operands (0, i.operands - 1);
5971       break;
5972     default:
5973       abort ();
5974     }
5975
5976   if (i.mem_operands == 2)
5977     {
5978       const reg_entry *temp_seg;
5979       temp_seg = i.seg[0];
5980       i.seg[0] = i.seg[1];
5981       i.seg[1] = temp_seg;
5982     }
5983 }
5984
5985 /* Try to ensure constant immediates are represented in the smallest
5986    opcode possible.  */
5987 static void
5988 optimize_imm (void)
5989 {
5990   char guess_suffix = 0;
5991   int op;
5992
5993   if (i.suffix)
5994     guess_suffix = i.suffix;
5995   else if (i.reg_operands)
5996     {
5997       /* Figure out a suffix from the last register operand specified.
5998          We can't do this properly yet, i.e. excluding special register
5999          instances, but the following works for instructions with
6000          immediates.  In any case, we can't set i.suffix yet.  */
6001       for (op = i.operands; --op >= 0;)
6002         if (i.types[op].bitfield.class != Reg)
6003           continue;
6004         else if (i.types[op].bitfield.byte)
6005           {
6006             guess_suffix = BYTE_MNEM_SUFFIX;
6007             break;
6008           }
6009         else if (i.types[op].bitfield.word)
6010           {
6011             guess_suffix = WORD_MNEM_SUFFIX;
6012             break;
6013           }
6014         else if (i.types[op].bitfield.dword)
6015           {
6016             guess_suffix = LONG_MNEM_SUFFIX;
6017             break;
6018           }
6019         else if (i.types[op].bitfield.qword)
6020           {
6021             guess_suffix = QWORD_MNEM_SUFFIX;
6022             break;
6023           }
6024     }
6025   else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
6026     guess_suffix = WORD_MNEM_SUFFIX;
6027
6028   for (op = i.operands; --op >= 0;)
6029     if (operand_type_check (i.types[op], imm))
6030       {
6031         switch (i.op[op].imms->X_op)
6032           {
6033           case O_constant:
6034             /* If a suffix is given, this operand may be shortened.  */
6035             switch (guess_suffix)
6036               {
6037               case LONG_MNEM_SUFFIX:
6038                 i.types[op].bitfield.imm32 = 1;
6039                 i.types[op].bitfield.imm64 = 1;
6040                 break;
6041               case WORD_MNEM_SUFFIX:
6042                 i.types[op].bitfield.imm16 = 1;
6043                 i.types[op].bitfield.imm32 = 1;
6044                 i.types[op].bitfield.imm32s = 1;
6045                 i.types[op].bitfield.imm64 = 1;
6046                 break;
6047               case BYTE_MNEM_SUFFIX:
6048                 i.types[op].bitfield.imm8 = 1;
6049                 i.types[op].bitfield.imm8s = 1;
6050                 i.types[op].bitfield.imm16 = 1;
6051                 i.types[op].bitfield.imm32 = 1;
6052                 i.types[op].bitfield.imm32s = 1;
6053                 i.types[op].bitfield.imm64 = 1;
6054                 break;
6055               }
6056
6057             /* If this operand is at most 16 bits, convert it
6058                to a signed 16 bit number before trying to see
6059                whether it will fit in an even smaller size.
6060                This allows a 16-bit operand such as $0xffe0 to
6061                be recognised as within Imm8S range.  */
6062             if ((i.types[op].bitfield.imm16)
6063                 && fits_in_unsigned_word (i.op[op].imms->X_add_number))
6064               {
6065                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
6066                                                 ^ 0x8000) - 0x8000);
6067               }
6068 #ifdef BFD64
6069             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
6070             if ((i.types[op].bitfield.imm32)
6071                 && fits_in_unsigned_long (i.op[op].imms->X_add_number))
6072               {
6073                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
6074                                                 ^ ((offsetT) 1 << 31))
6075                                                - ((offsetT) 1 << 31));
6076               }
6077 #endif
6078             i.types[op]
6079               = operand_type_or (i.types[op],
6080                                  smallest_imm_type (i.op[op].imms->X_add_number));
6081
6082             /* We must avoid matching of Imm32 templates when 64bit
6083                only immediate is available.  */
6084             if (guess_suffix == QWORD_MNEM_SUFFIX)
6085               i.types[op].bitfield.imm32 = 0;
6086             break;
6087
6088           case O_absent:
6089           case O_register:
6090             abort ();
6091
6092             /* Symbols and expressions.  */
6093           default:
6094             /* Convert symbolic operand to proper sizes for matching, but don't
6095                prevent matching a set of insns that only supports sizes other
6096                than those matching the insn suffix.  */
6097             {
6098               i386_operand_type mask, allowed;
6099               const insn_template *t = current_templates->start;
6100
6101               operand_type_set (&mask, 0);
6102               switch (guess_suffix)
6103                 {
6104                 case QWORD_MNEM_SUFFIX:
6105                   mask.bitfield.imm64 = 1;
6106                   mask.bitfield.imm32s = 1;
6107                   break;
6108                 case LONG_MNEM_SUFFIX:
6109                   mask.bitfield.imm32 = 1;
6110                   break;
6111                 case WORD_MNEM_SUFFIX:
6112                   mask.bitfield.imm16 = 1;
6113                   break;
6114                 case BYTE_MNEM_SUFFIX:
6115                   mask.bitfield.imm8 = 1;
6116                   break;
6117                 default:
6118                   break;
6119                 }
6120
6121               allowed = operand_type_and (t->operand_types[op], mask);
6122               while (++t < current_templates->end)
6123                 {
6124                   allowed = operand_type_or (allowed, t->operand_types[op]);
6125                   allowed = operand_type_and (allowed, mask);
6126                 }
6127
6128               if (!operand_type_all_zero (&allowed))
6129                 i.types[op] = operand_type_and (i.types[op], mask);
6130             }
6131             break;
6132           }
6133       }
6134 }
6135
6136 /* Try to use the smallest displacement type too.  */
6137 static void
6138 optimize_disp (void)
6139 {
6140   int op;
6141
6142   for (op = i.operands; --op >= 0;)
6143     if (operand_type_check (i.types[op], disp))
6144       {
6145         if (i.op[op].disps->X_op == O_constant)
6146           {
6147             offsetT op_disp = i.op[op].disps->X_add_number;
6148
6149             if (!op_disp && i.types[op].bitfield.baseindex)
6150               {
6151                 i.types[op] = operand_type_and_not (i.types[op], anydisp);
6152                 i.op[op].disps = NULL;
6153                 i.disp_operands--;
6154                 continue;
6155               }
6156
6157             if (i.types[op].bitfield.disp16
6158                 && fits_in_unsigned_word (op_disp))
6159               {
6160                 /* If this operand is at most 16 bits, convert
6161                    to a signed 16 bit number and don't use 64bit
6162                    displacement.  */
6163                 op_disp = ((op_disp ^ 0x8000) - 0x8000);
6164                 i.types[op].bitfield.disp64 = 0;
6165               }
6166
6167 #ifdef BFD64
6168             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
6169             if ((flag_code != CODE_64BIT
6170                  ? i.types[op].bitfield.disp32
6171                  : want_disp32 (current_templates->start)
6172                    && (!current_templates->start->opcode_modifier.jump
6173                        || i.jumpabsolute || i.types[op].bitfield.baseindex))
6174                 && fits_in_unsigned_long (op_disp))
6175               {
6176                 /* If this operand is at most 32 bits, convert
6177                    to a signed 32 bit number and don't use 64bit
6178                    displacement.  */
6179                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
6180                 i.types[op].bitfield.disp64 = 0;
6181                 i.types[op].bitfield.disp32 = 1;
6182               }
6183
6184             if (flag_code == CODE_64BIT && fits_in_signed_long (op_disp))
6185               {
6186                 i.types[op].bitfield.disp64 = 0;
6187                 i.types[op].bitfield.disp32 = 1;
6188               }
6189 #endif
6190             if ((i.types[op].bitfield.disp32
6191                  || i.types[op].bitfield.disp16)
6192                 && fits_in_disp8 (op_disp))
6193               i.types[op].bitfield.disp8 = 1;
6194
6195             i.op[op].disps->X_add_number = op_disp;
6196           }
6197         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
6198                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
6199           {
6200             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
6201                          i.op[op].disps, 0, i.reloc[op]);
6202             i.types[op] = operand_type_and_not (i.types[op], anydisp);
6203           }
6204         else
6205           /* We only support 64bit displacement on constants.  */
6206           i.types[op].bitfield.disp64 = 0;
6207       }
6208 }
6209
6210 /* Return 1 if there is a match in broadcast bytes between operand
6211    GIVEN and instruction template T.   */
6212
6213 static INLINE int
6214 match_broadcast_size (const insn_template *t, unsigned int given)
6215 {
6216   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
6217            && i.types[given].bitfield.byte)
6218           || (t->opcode_modifier.broadcast == WORD_BROADCAST
6219               && i.types[given].bitfield.word)
6220           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
6221               && i.types[given].bitfield.dword)
6222           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
6223               && i.types[given].bitfield.qword));
6224 }
6225
6226 /* Check if operands are valid for the instruction.  */
6227
6228 static int
6229 check_VecOperands (const insn_template *t)
6230 {
6231   unsigned int op;
6232   i386_cpu_flags cpu;
6233
6234   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
6235      any one operand are implicity requiring AVX512VL support if the actual
6236      operand size is YMMword or XMMword.  Since this function runs after
6237      template matching, there's no need to check for YMMword/XMMword in
6238      the template.  */
6239   cpu = cpu_flags_and (t->cpu_flags, avx512);
6240   if (!cpu_flags_all_zero (&cpu)
6241       && !t->cpu_flags.bitfield.cpuavx512vl
6242       && !cpu_arch_flags.bitfield.cpuavx512vl)
6243     {
6244       for (op = 0; op < t->operands; ++op)
6245         {
6246           if (t->operand_types[op].bitfield.zmmword
6247               && (i.types[op].bitfield.ymmword
6248                   || i.types[op].bitfield.xmmword))
6249             {
6250               i.error = unsupported;
6251               return 1;
6252             }
6253         }
6254     }
6255
6256   /* Somewhat similarly, templates specifying both AVX and AVX2 are
6257      requiring AVX2 support if the actual operand size is YMMword.  */
6258   if (t->cpu_flags.bitfield.cpuavx
6259       && t->cpu_flags.bitfield.cpuavx2
6260       && !cpu_arch_flags.bitfield.cpuavx2)
6261     {
6262       for (op = 0; op < t->operands; ++op)
6263         {
6264           if (t->operand_types[op].bitfield.xmmword
6265               && i.types[op].bitfield.ymmword)
6266             {
6267               i.error = unsupported;
6268               return 1;
6269             }
6270         }
6271     }
6272
6273   /* Without VSIB byte, we can't have a vector register for index.  */
6274   if (!t->opcode_modifier.sib
6275       && i.index_reg
6276       && (i.index_reg->reg_type.bitfield.xmmword
6277           || i.index_reg->reg_type.bitfield.ymmword
6278           || i.index_reg->reg_type.bitfield.zmmword))
6279     {
6280       i.error = unsupported_vector_index_register;
6281       return 1;
6282     }
6283
6284   /* Check if default mask is allowed.  */
6285   if (t->opcode_modifier.operandconstraint == NO_DEFAULT_MASK
6286       && (!i.mask.reg || i.mask.reg->reg_num == 0))
6287     {
6288       i.error = no_default_mask;
6289       return 1;
6290     }
6291
6292   /* For VSIB byte, we need a vector register for index, and all vector
6293      registers must be distinct.  */
6294   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
6295     {
6296       if (!i.index_reg
6297           || !((t->opcode_modifier.sib == VECSIB128
6298                 && i.index_reg->reg_type.bitfield.xmmword)
6299                || (t->opcode_modifier.sib == VECSIB256
6300                    && i.index_reg->reg_type.bitfield.ymmword)
6301                || (t->opcode_modifier.sib == VECSIB512
6302                    && i.index_reg->reg_type.bitfield.zmmword)))
6303       {
6304         i.error = invalid_vsib_address;
6305         return 1;
6306       }
6307
6308       gas_assert (i.reg_operands == 2 || i.mask.reg);
6309       if (i.reg_operands == 2 && !i.mask.reg)
6310         {
6311           gas_assert (i.types[0].bitfield.class == RegSIMD);
6312           gas_assert (i.types[0].bitfield.xmmword
6313                       || i.types[0].bitfield.ymmword);
6314           gas_assert (i.types[2].bitfield.class == RegSIMD);
6315           gas_assert (i.types[2].bitfield.xmmword
6316                       || i.types[2].bitfield.ymmword);
6317           if (operand_check == check_none)
6318             return 0;
6319           if (register_number (i.op[0].regs)
6320               != register_number (i.index_reg)
6321               && register_number (i.op[2].regs)
6322                  != register_number (i.index_reg)
6323               && register_number (i.op[0].regs)
6324                  != register_number (i.op[2].regs))
6325             return 0;
6326           if (operand_check == check_error)
6327             {
6328               i.error = invalid_vector_register_set;
6329               return 1;
6330             }
6331           as_warn (_("mask, index, and destination registers should be distinct"));
6332         }
6333       else if (i.reg_operands == 1 && i.mask.reg)
6334         {
6335           if (i.types[1].bitfield.class == RegSIMD
6336               && (i.types[1].bitfield.xmmword
6337                   || i.types[1].bitfield.ymmword
6338                   || i.types[1].bitfield.zmmword)
6339               && (register_number (i.op[1].regs)
6340                   == register_number (i.index_reg)))
6341             {
6342               if (operand_check == check_error)
6343                 {
6344                   i.error = invalid_vector_register_set;
6345                   return 1;
6346                 }
6347               if (operand_check != check_none)
6348                 as_warn (_("index and destination registers should be distinct"));
6349             }
6350         }
6351     }
6352
6353   /* For AMX instructions with 3 TMM register operands, all operands
6354       must be distinct.  */
6355   if (i.reg_operands == 3
6356       && t->operand_types[0].bitfield.tmmword
6357       && (i.op[0].regs == i.op[1].regs
6358           || i.op[0].regs == i.op[2].regs
6359           || i.op[1].regs == i.op[2].regs))
6360     {
6361       i.error = invalid_tmm_register_set;
6362       return 1;
6363     }
6364
6365   /* For some special instructions require that destination must be distinct
6366      from source registers.  */
6367   if (t->opcode_modifier.operandconstraint == DISTINCT_DEST)
6368     {
6369       unsigned int dest_reg = i.operands - 1;
6370
6371       know (i.operands >= 3);
6372
6373       /* #UD if dest_reg == src1_reg or dest_reg == src2_reg.  */
6374       if (i.op[dest_reg - 1].regs == i.op[dest_reg].regs
6375           || (i.reg_operands > 2
6376               && i.op[dest_reg - 2].regs == i.op[dest_reg].regs))
6377         {
6378           i.error = invalid_dest_and_src_register_set;
6379           return 1;
6380         }
6381     }
6382
6383   /* Check if broadcast is supported by the instruction and is applied
6384      to the memory operand.  */
6385   if (i.broadcast.type || i.broadcast.bytes)
6386     {
6387       i386_operand_type type, overlap;
6388
6389       /* Check if specified broadcast is supported in this instruction,
6390          and its broadcast bytes match the memory operand.  */
6391       op = i.broadcast.operand;
6392       if (!t->opcode_modifier.broadcast
6393           || !(i.flags[op] & Operand_Mem)
6394           || (!i.types[op].bitfield.unspecified
6395               && !match_broadcast_size (t, op)))
6396         {
6397         bad_broadcast:
6398           i.error = unsupported_broadcast;
6399           return 1;
6400         }
6401
6402       if (i.broadcast.type)
6403         i.broadcast.bytes = ((1 << (t->opcode_modifier.broadcast - 1))
6404                              * i.broadcast.type);
6405       operand_type_set (&type, 0);
6406       switch (get_broadcast_bytes (t, false))
6407         {
6408         case 2:
6409           type.bitfield.word = 1;
6410           break;
6411         case 4:
6412           type.bitfield.dword = 1;
6413           break;
6414         case 8:
6415           type.bitfield.qword = 1;
6416           break;
6417         case 16:
6418           type.bitfield.xmmword = 1;
6419           break;
6420         case 32:
6421           type.bitfield.ymmword = 1;
6422           break;
6423         case 64:
6424           type.bitfield.zmmword = 1;
6425           break;
6426         default:
6427           goto bad_broadcast;
6428         }
6429
6430       overlap = operand_type_and (type, t->operand_types[op]);
6431       if (t->operand_types[op].bitfield.class == RegSIMD
6432           && t->operand_types[op].bitfield.byte
6433              + t->operand_types[op].bitfield.word
6434              + t->operand_types[op].bitfield.dword
6435              + t->operand_types[op].bitfield.qword > 1)
6436         {
6437           overlap.bitfield.xmmword = 0;
6438           overlap.bitfield.ymmword = 0;
6439           overlap.bitfield.zmmword = 0;
6440         }
6441       if (operand_type_all_zero (&overlap))
6442           goto bad_broadcast;
6443
6444       if (t->opcode_modifier.checkoperandsize)
6445         {
6446           unsigned int j;
6447
6448           type.bitfield.baseindex = 1;
6449           for (j = 0; j < i.operands; ++j)
6450             {
6451               if (j != op
6452                   && !operand_type_register_match(i.types[j],
6453                                                   t->operand_types[j],
6454                                                   type,
6455                                                   t->operand_types[op]))
6456                 goto bad_broadcast;
6457             }
6458         }
6459     }
6460   /* If broadcast is supported in this instruction, we need to check if
6461      operand of one-element size isn't specified without broadcast.  */
6462   else if (t->opcode_modifier.broadcast && i.mem_operands)
6463     {
6464       /* Find memory operand.  */
6465       for (op = 0; op < i.operands; op++)
6466         if (i.flags[op] & Operand_Mem)
6467           break;
6468       gas_assert (op < i.operands);
6469       /* Check size of the memory operand.  */
6470       if (match_broadcast_size (t, op))
6471         {
6472           i.error = broadcast_needed;
6473           return 1;
6474         }
6475     }
6476   else
6477     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
6478
6479   /* Check if requested masking is supported.  */
6480   if (i.mask.reg)
6481     {
6482       switch (t->opcode_modifier.masking)
6483         {
6484         case BOTH_MASKING:
6485           break;
6486         case MERGING_MASKING:
6487           if (i.mask.zeroing)
6488             {
6489         case 0:
6490               i.error = unsupported_masking;
6491               return 1;
6492             }
6493           break;
6494         case DYNAMIC_MASKING:
6495           /* Memory destinations allow only merging masking.  */
6496           if (i.mask.zeroing && i.mem_operands)
6497             {
6498               /* Find memory operand.  */
6499               for (op = 0; op < i.operands; op++)
6500                 if (i.flags[op] & Operand_Mem)
6501                   break;
6502               gas_assert (op < i.operands);
6503               if (op == i.operands - 1)
6504                 {
6505                   i.error = unsupported_masking;
6506                   return 1;
6507                 }
6508             }
6509           break;
6510         default:
6511           abort ();
6512         }
6513     }
6514
6515   /* Check if masking is applied to dest operand.  */
6516   if (i.mask.reg && (i.mask.operand != i.operands - 1))
6517     {
6518       i.error = mask_not_on_destination;
6519       return 1;
6520     }
6521
6522   /* Check RC/SAE.  */
6523   if (i.rounding.type != rc_none)
6524     {
6525       if (!t->opcode_modifier.sae
6526           || ((i.rounding.type != saeonly) != t->opcode_modifier.staticrounding)
6527           || i.mem_operands)
6528         {
6529           i.error = unsupported_rc_sae;
6530           return 1;
6531         }
6532
6533       /* Non-EVEX.LIG forms need to have a ZMM register as at least one
6534          operand.  */
6535       if (t->opcode_modifier.evex != EVEXLIG)
6536         {
6537           for (op = 0; op < t->operands; ++op)
6538             if (i.types[op].bitfield.zmmword)
6539               break;
6540           if (op >= t->operands)
6541             {
6542               i.error = operand_size_mismatch;
6543               return 1;
6544             }
6545         }
6546     }
6547
6548   /* Check the special Imm4 cases; must be the first operand.  */
6549   if (t->cpu_flags.bitfield.cpuxop && t->operands == 5)
6550     {
6551       if (i.op[0].imms->X_op != O_constant
6552           || !fits_in_imm4 (i.op[0].imms->X_add_number))
6553         {
6554           i.error = bad_imm4;
6555           return 1;
6556         }
6557
6558       /* Turn off Imm<N> so that update_imm won't complain.  */
6559       operand_type_set (&i.types[0], 0);
6560     }
6561
6562   /* Check vector Disp8 operand.  */
6563   if (t->opcode_modifier.disp8memshift
6564       && i.disp_encoding <= disp_encoding_8bit)
6565     {
6566       if (i.broadcast.bytes)
6567         i.memshift = t->opcode_modifier.broadcast - 1;
6568       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
6569         i.memshift = t->opcode_modifier.disp8memshift;
6570       else
6571         {
6572           const i386_operand_type *type = NULL, *fallback = NULL;
6573
6574           i.memshift = 0;
6575           for (op = 0; op < i.operands; op++)
6576             if (i.flags[op] & Operand_Mem)
6577               {
6578                 if (t->opcode_modifier.evex == EVEXLIG)
6579                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
6580                 else if (t->operand_types[op].bitfield.xmmword
6581                          + t->operand_types[op].bitfield.ymmword
6582                          + t->operand_types[op].bitfield.zmmword <= 1)
6583                   type = &t->operand_types[op];
6584                 else if (!i.types[op].bitfield.unspecified)
6585                   type = &i.types[op];
6586                 else /* Ambiguities get resolved elsewhere.  */
6587                   fallback = &t->operand_types[op];
6588               }
6589             else if (i.types[op].bitfield.class == RegSIMD
6590                      && t->opcode_modifier.evex != EVEXLIG)
6591               {
6592                 if (i.types[op].bitfield.zmmword)
6593                   i.memshift = 6;
6594                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
6595                   i.memshift = 5;
6596                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
6597                   i.memshift = 4;
6598               }
6599
6600           if (!type && !i.memshift)
6601             type = fallback;
6602           if (type)
6603             {
6604               if (type->bitfield.zmmword)
6605                 i.memshift = 6;
6606               else if (type->bitfield.ymmword)
6607                 i.memshift = 5;
6608               else if (type->bitfield.xmmword)
6609                 i.memshift = 4;
6610             }
6611
6612           /* For the check in fits_in_disp8().  */
6613           if (i.memshift == 0)
6614             i.memshift = -1;
6615         }
6616
6617       for (op = 0; op < i.operands; op++)
6618         if (operand_type_check (i.types[op], disp)
6619             && i.op[op].disps->X_op == O_constant)
6620           {
6621             if (fits_in_disp8 (i.op[op].disps->X_add_number))
6622               {
6623                 i.types[op].bitfield.disp8 = 1;
6624                 return 0;
6625               }
6626             i.types[op].bitfield.disp8 = 0;
6627           }
6628     }
6629
6630   i.memshift = 0;
6631
6632   return 0;
6633 }
6634
6635 /* Check if encoding requirements are met by the instruction.  */
6636
6637 static int
6638 VEX_check_encoding (const insn_template *t)
6639 {
6640   if (i.vec_encoding == vex_encoding_error)
6641     {
6642       i.error = unsupported;
6643       return 1;
6644     }
6645
6646   if (i.vec_encoding == vex_encoding_evex)
6647     {
6648       /* This instruction must be encoded with EVEX prefix.  */
6649       if (!is_evex_encoding (t))
6650         {
6651           i.error = unsupported;
6652           return 1;
6653         }
6654       return 0;
6655     }
6656
6657   if (!t->opcode_modifier.vex)
6658     {
6659       /* This instruction template doesn't have VEX prefix.  */
6660       if (i.vec_encoding != vex_encoding_default)
6661         {
6662           i.error = unsupported;
6663           return 1;
6664         }
6665       return 0;
6666     }
6667
6668   return 0;
6669 }
6670
6671 /* Helper function for the progress() macro in match_template().  */
6672 static INLINE enum i386_error progress (enum i386_error new,
6673                                         enum i386_error last,
6674                                         unsigned int line, unsigned int *line_p)
6675 {
6676   if (line <= *line_p)
6677     return last;
6678   *line_p = line;
6679   return new;
6680 }
6681
6682 static const insn_template *
6683 match_template (char mnem_suffix)
6684 {
6685   /* Points to template once we've found it.  */
6686   const insn_template *t;
6687   i386_operand_type overlap0, overlap1, overlap2, overlap3;
6688   i386_operand_type overlap4;
6689   unsigned int found_reverse_match;
6690   i386_operand_type operand_types [MAX_OPERANDS];
6691   int addr_prefix_disp;
6692   unsigned int j, size_match, check_register, errline = __LINE__;
6693   enum i386_error specific_error = number_of_operands_mismatch;
6694 #define progress(err) progress (err, specific_error, __LINE__, &errline)
6695
6696 #if MAX_OPERANDS != 5
6697 # error "MAX_OPERANDS must be 5."
6698 #endif
6699
6700   found_reverse_match = 0;
6701   addr_prefix_disp = -1;
6702
6703   for (t = current_templates->start; t < current_templates->end; t++)
6704     {
6705       addr_prefix_disp = -1;
6706       found_reverse_match = 0;
6707
6708       /* Must have right number of operands.  */
6709       if (i.operands != t->operands)
6710         continue;
6711
6712       /* Check processor support.  */
6713       specific_error = progress (unsupported);
6714       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
6715         continue;
6716
6717       /* Check AT&T mnemonic.   */
6718       specific_error = progress (unsupported_with_intel_mnemonic);
6719       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
6720         continue;
6721
6722       /* Check AT&T/Intel syntax.  */
6723       specific_error = progress (unsupported_syntax);
6724       if ((intel_syntax && t->opcode_modifier.attsyntax)
6725           || (!intel_syntax && t->opcode_modifier.intelsyntax))
6726         continue;
6727
6728       /* Check Intel64/AMD64 ISA.   */
6729       switch (isa64)
6730         {
6731         default:
6732           /* Default: Don't accept Intel64.  */
6733           if (t->opcode_modifier.isa64 == INTEL64)
6734             continue;
6735           break;
6736         case amd64:
6737           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
6738           if (t->opcode_modifier.isa64 >= INTEL64)
6739             continue;
6740           break;
6741         case intel64:
6742           /* -mintel64: Don't accept AMD64.  */
6743           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
6744             continue;
6745           break;
6746         }
6747
6748       /* Check the suffix.  */
6749       specific_error = progress (invalid_instruction_suffix);
6750       if ((t->opcode_modifier.no_bsuf && mnem_suffix == BYTE_MNEM_SUFFIX)
6751           || (t->opcode_modifier.no_wsuf && mnem_suffix == WORD_MNEM_SUFFIX)
6752           || (t->opcode_modifier.no_lsuf && mnem_suffix == LONG_MNEM_SUFFIX)
6753           || (t->opcode_modifier.no_ssuf && mnem_suffix == SHORT_MNEM_SUFFIX)
6754           || (t->opcode_modifier.no_qsuf && mnem_suffix == QWORD_MNEM_SUFFIX))
6755         continue;
6756
6757       specific_error = progress (operand_size_mismatch);
6758       size_match = operand_size_match (t);
6759       if (!size_match)
6760         continue;
6761
6762       /* This is intentionally not
6763
6764          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
6765
6766          as the case of a missing * on the operand is accepted (perhaps with
6767          a warning, issued further down).  */
6768       specific_error = progress (operand_type_mismatch);
6769       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
6770         continue;
6771
6772       /* In Intel syntax, normally we can check for memory operand size when
6773          there is no mnemonic suffix.  But jmp and call have 2 different
6774          encodings with Dword memory operand size.  Skip the "near" one
6775          (permitting a register operand) when "far" was requested.  */
6776       if (i.far_branch
6777           && t->opcode_modifier.jump == JUMP_ABSOLUTE
6778           && t->operand_types[0].bitfield.class == Reg)
6779         continue;
6780
6781       for (j = 0; j < MAX_OPERANDS; j++)
6782         operand_types[j] = t->operand_types[j];
6783
6784       /* In general, don't allow 32-bit operands on pre-386.  */
6785       specific_error = progress (mnem_suffix ? invalid_instruction_suffix
6786                                              : operand_size_mismatch);
6787       j = i.imm_operands + (t->operands > i.imm_operands + 1);
6788       if (i.suffix == LONG_MNEM_SUFFIX
6789           && !cpu_arch_flags.bitfield.cpui386
6790           && (intel_syntax
6791               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
6792                  && !intel_float_operand (insn_name (t)))
6793               : intel_float_operand (insn_name (t)) != 2)
6794           && (t->operands == i.imm_operands
6795               || (operand_types[i.imm_operands].bitfield.class != RegMMX
6796                && operand_types[i.imm_operands].bitfield.class != RegSIMD
6797                && operand_types[i.imm_operands].bitfield.class != RegMask)
6798               || (operand_types[j].bitfield.class != RegMMX
6799                   && operand_types[j].bitfield.class != RegSIMD
6800                   && operand_types[j].bitfield.class != RegMask))
6801           && !t->opcode_modifier.sib)
6802         continue;
6803
6804       /* Do not verify operands when there are none.  */
6805       if (!t->operands)
6806         {
6807           if (VEX_check_encoding (t))
6808             {
6809               specific_error = progress (i.error);
6810               continue;
6811             }
6812
6813           /* We've found a match; break out of loop.  */
6814           break;
6815         }
6816
6817       if (!t->opcode_modifier.jump
6818           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
6819         {
6820           /* There should be only one Disp operand.  */
6821           for (j = 0; j < MAX_OPERANDS; j++)
6822             if (operand_type_check (operand_types[j], disp))
6823               break;
6824           if (j < MAX_OPERANDS)
6825             {
6826               bool override = (i.prefix[ADDR_PREFIX] != 0);
6827
6828               addr_prefix_disp = j;
6829
6830               /* Address size prefix will turn Disp64 operand into Disp32 and
6831                  Disp32/Disp16 one into Disp16/Disp32 respectively.  */
6832               switch (flag_code)
6833                 {
6834                 case CODE_16BIT:
6835                   override = !override;
6836                   /* Fall through.  */
6837                 case CODE_32BIT:
6838                   if (operand_types[j].bitfield.disp32
6839                       && operand_types[j].bitfield.disp16)
6840                     {
6841                       operand_types[j].bitfield.disp16 = override;
6842                       operand_types[j].bitfield.disp32 = !override;
6843                     }
6844                   gas_assert (!operand_types[j].bitfield.disp64);
6845                   break;
6846
6847                 case CODE_64BIT:
6848                   if (operand_types[j].bitfield.disp64)
6849                     {
6850                       gas_assert (!operand_types[j].bitfield.disp32);
6851                       operand_types[j].bitfield.disp32 = override;
6852                       operand_types[j].bitfield.disp64 = !override;
6853                     }
6854                   operand_types[j].bitfield.disp16 = 0;
6855                   break;
6856                 }
6857             }
6858         }
6859
6860       /* We check register size if needed.  */
6861       if (t->opcode_modifier.checkoperandsize)
6862         {
6863           check_register = (1 << t->operands) - 1;
6864           if (i.broadcast.type || i.broadcast.bytes)
6865             check_register &= ~(1 << i.broadcast.operand);
6866         }
6867       else
6868         check_register = 0;
6869
6870       overlap0 = operand_type_and (i.types[0], operand_types[0]);
6871       switch (t->operands)
6872         {
6873         case 1:
6874           if (!operand_type_match (overlap0, i.types[0]))
6875             continue;
6876           break;
6877         case 2:
6878           /* xchg %eax, %eax is a special case. It is an alias for nop
6879              only in 32bit mode and we can use opcode 0x90.  In 64bit
6880              mode, we can't use 0x90 for xchg %eax, %eax since it should
6881              zero-extend %eax to %rax.  */
6882           if (flag_code == CODE_64BIT
6883               && t->base_opcode == 0x90
6884               && t->opcode_modifier.opcodespace == SPACE_BASE
6885               && i.types[0].bitfield.instance == Accum
6886               && i.types[0].bitfield.dword
6887               && i.types[1].bitfield.instance == Accum)
6888             continue;
6889
6890           if (t->base_opcode == MOV_AX_DISP32
6891               && t->opcode_modifier.opcodespace == SPACE_BASE
6892               && t->mnem_off != MN_movabs)
6893             {
6894               /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
6895               if (i.reloc[0] == BFD_RELOC_386_GOT32)
6896                 continue;
6897
6898               /* xrelease mov %eax, <disp> is another special case. It must not
6899                  match the accumulator-only encoding of mov.  */
6900               if (i.hle_prefix)
6901                 continue;
6902             }
6903           /* Fall through.  */
6904
6905         case 3:
6906           if (!(size_match & MATCH_STRAIGHT))
6907             goto check_reverse;
6908           /* Reverse direction of operands if swapping is possible in the first
6909              place (operands need to be symmetric) and
6910              - the load form is requested, and the template is a store form,
6911              - the store form is requested, and the template is a load form,
6912              - the non-default (swapped) form is requested.  */
6913           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
6914           if (t->opcode_modifier.d && i.reg_operands == i.operands
6915               && !operand_type_all_zero (&overlap1))
6916             switch (i.dir_encoding)
6917               {
6918               case dir_encoding_load:
6919                 if (operand_type_check (operand_types[i.operands - 1], anymem)
6920                     || t->opcode_modifier.regmem)
6921                   goto check_reverse;
6922                 break;
6923
6924               case dir_encoding_store:
6925                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
6926                     && !t->opcode_modifier.regmem)
6927                   goto check_reverse;
6928                 break;
6929
6930               case dir_encoding_swap:
6931                 goto check_reverse;
6932
6933               case dir_encoding_default:
6934                 break;
6935               }
6936           /* If we want store form, we skip the current load.  */
6937           if ((i.dir_encoding == dir_encoding_store
6938                || i.dir_encoding == dir_encoding_swap)
6939               && i.mem_operands == 0
6940               && t->opcode_modifier.load)
6941             continue;
6942           /* Fall through.  */
6943         case 4:
6944         case 5:
6945           overlap1 = operand_type_and (i.types[1], operand_types[1]);
6946           if (!operand_type_match (overlap0, i.types[0])
6947               || !operand_type_match (overlap1, i.types[1])
6948               || ((check_register & 3) == 3
6949                   && !operand_type_register_match (i.types[0],
6950                                                    operand_types[0],
6951                                                    i.types[1],
6952                                                    operand_types[1])))
6953             {
6954               specific_error = progress (i.error);
6955
6956               /* Check if other direction is valid ...  */
6957               if (!t->opcode_modifier.d)
6958                 continue;
6959
6960             check_reverse:
6961               if (!(size_match & MATCH_REVERSE))
6962                 continue;
6963               /* Try reversing direction of operands.  */
6964               j = t->opcode_modifier.vexsources ? 1 : i.operands - 1;
6965               overlap0 = operand_type_and (i.types[0], operand_types[j]);
6966               overlap1 = operand_type_and (i.types[j], operand_types[0]);
6967               overlap2 = operand_type_and (i.types[1], operand_types[1]);
6968               gas_assert (t->operands != 3 || !check_register);
6969               if (!operand_type_match (overlap0, i.types[0])
6970                   || !operand_type_match (overlap1, i.types[j])
6971                   || (t->operands == 3
6972                       && !operand_type_match (overlap2, i.types[1]))
6973                   || (check_register
6974                       && !operand_type_register_match (i.types[0],
6975                                                        operand_types[j],
6976                                                        i.types[j],
6977                                                        operand_types[0])))
6978                 {
6979                   /* Does not match either direction.  */
6980                   specific_error = progress (i.error);
6981                   continue;
6982                 }
6983               /* found_reverse_match holds which variant of D
6984                  we've found.  */
6985               if (!t->opcode_modifier.d)
6986                 found_reverse_match = 0;
6987               else if (operand_types[0].bitfield.tbyte)
6988                 {
6989                   if (t->opcode_modifier.operandconstraint != UGH)
6990                     found_reverse_match = Opcode_FloatD;
6991                   else
6992                     found_reverse_match = ~0;
6993                   /* FSUB{,R} and FDIV{,R} may need a 2nd bit flipped.  */
6994                   if ((t->extension_opcode & 4)
6995                       && (intel_syntax || intel_mnemonic))
6996                     found_reverse_match |= Opcode_FloatR;
6997                 }
6998               else if (t->opcode_modifier.vexsources)
6999                 {
7000                   found_reverse_match = Opcode_VexW;
7001                   goto check_operands_345;
7002                 }
7003               else if (t->opcode_modifier.opcodespace != SPACE_BASE
7004                        && (t->opcode_modifier.opcodespace != SPACE_0F
7005                            /* MOV to/from CR/DR/TR, as an exception, follow
7006                               the base opcode space encoding model.  */
7007                            || (t->base_opcode | 7) != 0x27))
7008                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
7009                                       ? Opcode_ExtD : Opcode_SIMD_IntD;
7010               else if (!t->opcode_modifier.commutative)
7011                 found_reverse_match = Opcode_D;
7012               else
7013                 found_reverse_match = ~0;
7014             }
7015           else
7016             {
7017               /* Found a forward 2 operand match here.  */
7018             check_operands_345:
7019               switch (t->operands)
7020                 {
7021                 case 5:
7022                   overlap4 = operand_type_and (i.types[4], operand_types[4]);
7023                   if (!operand_type_match (overlap4, i.types[4])
7024                       || !operand_type_register_match (i.types[3],
7025                                                        operand_types[3],
7026                                                        i.types[4],
7027                                                        operand_types[4]))
7028                     {
7029                       specific_error = progress (i.error);
7030                       continue;
7031                     }
7032                   /* Fall through.  */
7033                 case 4:
7034                   overlap3 = operand_type_and (i.types[3], operand_types[3]);
7035                   if (!operand_type_match (overlap3, i.types[3])
7036                       || ((check_register & 0xa) == 0xa
7037                           && !operand_type_register_match (i.types[1],
7038                                                             operand_types[1],
7039                                                             i.types[3],
7040                                                             operand_types[3]))
7041                       || ((check_register & 0xc) == 0xc
7042                           && !operand_type_register_match (i.types[2],
7043                                                             operand_types[2],
7044                                                             i.types[3],
7045                                                             operand_types[3])))
7046                     {
7047                       specific_error = progress (i.error);
7048                       continue;
7049                     }
7050                   /* Fall through.  */
7051                 case 3:
7052                   overlap2 = operand_type_and (i.types[2], operand_types[2]);
7053                   if (!operand_type_match (overlap2, i.types[2])
7054                       || ((check_register & 5) == 5
7055                           && !operand_type_register_match (i.types[0],
7056                                                             operand_types[0],
7057                                                             i.types[2],
7058                                                             operand_types[2]))
7059                       || ((check_register & 6) == 6
7060                           && !operand_type_register_match (i.types[1],
7061                                                             operand_types[1],
7062                                                             i.types[2],
7063                                                             operand_types[2])))
7064                     {
7065                       specific_error = progress (i.error);
7066                       continue;
7067                     }
7068                   break;
7069                 }
7070             }
7071           /* Found either forward/reverse 2, 3 or 4 operand match here:
7072              slip through to break.  */
7073         }
7074
7075       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
7076       if (VEX_check_encoding (t))
7077         {
7078           specific_error = progress (i.error);
7079           continue;
7080         }
7081
7082       /* Check if vector operands are valid.  */
7083       if (check_VecOperands (t))
7084         {
7085           specific_error = progress (i.error);
7086           continue;
7087         }
7088
7089       /* We've found a match; break out of loop.  */
7090       break;
7091     }
7092
7093 #undef progress
7094
7095   if (t == current_templates->end)
7096     {
7097       /* We found no match.  */
7098       i.error = specific_error;
7099       return NULL;
7100     }
7101
7102   if (!quiet_warnings)
7103     {
7104       if (!intel_syntax
7105           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
7106         as_warn (_("indirect %s without `*'"), insn_name (t));
7107
7108       if (t->opcode_modifier.isprefix
7109           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
7110         {
7111           /* Warn them that a data or address size prefix doesn't
7112              affect assembly of the next line of code.  */
7113           as_warn (_("stand-alone `%s' prefix"), insn_name (t));
7114         }
7115     }
7116
7117   /* Copy the template we found.  */
7118   install_template (t);
7119
7120   if (addr_prefix_disp != -1)
7121     i.tm.operand_types[addr_prefix_disp]
7122       = operand_types[addr_prefix_disp];
7123
7124   switch (found_reverse_match)
7125     {
7126     case 0:
7127       break;
7128
7129     case Opcode_FloatR:
7130     case Opcode_FloatR | Opcode_FloatD:
7131       i.tm.extension_opcode ^= Opcode_FloatR >> 3;
7132       found_reverse_match &= Opcode_FloatD;
7133
7134       /* Fall through.  */
7135     default:
7136       /* If we found a reverse match we must alter the opcode direction
7137          bit and clear/flip the regmem modifier one.  found_reverse_match
7138          holds bits to change (different for int & float insns).  */
7139
7140       i.tm.base_opcode ^= found_reverse_match;
7141
7142       /* Certain SIMD insns have their load forms specified in the opcode
7143          table, and hence we need to _set_ RegMem instead of clearing it.
7144          We need to avoid setting the bit though on insns like KMOVW.  */
7145       i.tm.opcode_modifier.regmem
7146         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
7147           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
7148           && !i.tm.opcode_modifier.regmem;
7149
7150       /* Fall through.  */
7151     case ~0:
7152       i.tm.operand_types[0] = operand_types[i.operands - 1];
7153       i.tm.operand_types[i.operands - 1] = operand_types[0];
7154       break;
7155
7156     case Opcode_VexW:
7157       /* Only the first two register operands need reversing, alongside
7158          flipping VEX.W.  */
7159       i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;
7160
7161       j = i.tm.operand_types[0].bitfield.imm8;
7162       i.tm.operand_types[j] = operand_types[j + 1];
7163       i.tm.operand_types[j + 1] = operand_types[j];
7164       break;
7165     }
7166
7167   return t;
7168 }
7169
7170 static int
7171 check_string (void)
7172 {
7173   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
7174   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
7175
7176   if (i.seg[op] != NULL && i.seg[op] != reg_es)
7177     {
7178       as_bad (_("`%s' operand %u must use `%ses' segment"),
7179               insn_name (&i.tm),
7180               intel_syntax ? i.tm.operands - es_op : es_op + 1,
7181               register_prefix);
7182       return 0;
7183     }
7184
7185   /* There's only ever one segment override allowed per instruction.
7186      This instruction possibly has a legal segment override on the
7187      second operand, so copy the segment to where non-string
7188      instructions store it, allowing common code.  */
7189   i.seg[op] = i.seg[1];
7190
7191   return 1;
7192 }
7193
7194 static int
7195 process_suffix (void)
7196 {
7197   bool is_crc32 = false, is_movx = false;
7198
7199   /* If matched instruction specifies an explicit instruction mnemonic
7200      suffix, use it.  */
7201   if (i.tm.opcode_modifier.size == SIZE16)
7202     i.suffix = WORD_MNEM_SUFFIX;
7203   else if (i.tm.opcode_modifier.size == SIZE32)
7204     i.suffix = LONG_MNEM_SUFFIX;
7205   else if (i.tm.opcode_modifier.size == SIZE64)
7206     i.suffix = QWORD_MNEM_SUFFIX;
7207   else if (i.reg_operands
7208            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
7209            && i.tm.opcode_modifier.operandconstraint != ADDR_PREFIX_OP_REG)
7210     {
7211       unsigned int numop = i.operands;
7212
7213       /* MOVSX/MOVZX */
7214       is_movx = (i.tm.opcode_modifier.opcodespace == SPACE_0F
7215                  && (i.tm.base_opcode | 8) == 0xbe)
7216                 || (i.tm.opcode_modifier.opcodespace == SPACE_BASE
7217                     && i.tm.base_opcode == 0x63
7218                     && i.tm.cpu_flags.bitfield.cpu64);
7219
7220       /* CRC32 */
7221       is_crc32 = (i.tm.base_opcode == 0xf0
7222                   && i.tm.opcode_modifier.opcodespace == SPACE_0F38
7223                   && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2);
7224
7225       /* movsx/movzx want only their source operand considered here, for the
7226          ambiguity checking below.  The suffix will be replaced afterwards
7227          to represent the destination (register).  */
7228       if (is_movx && (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63))
7229         --i.operands;
7230
7231       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
7232       if (is_crc32 && i.tm.operand_types[1].bitfield.qword)
7233         i.rex |= REX_W;
7234
7235       /* If there's no instruction mnemonic suffix we try to invent one
7236          based on GPR operands.  */
7237       if (!i.suffix)
7238         {
7239           /* We take i.suffix from the last register operand specified,
7240              Destination register type is more significant than source
7241              register type.  crc32 in SSE4.2 prefers source register
7242              type. */
7243           unsigned int op = is_crc32 ? 1 : i.operands;
7244
7245           while (op--)
7246             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
7247                 || i.tm.operand_types[op].bitfield.instance == Accum)
7248               {
7249                 if (i.types[op].bitfield.class != Reg)
7250                   continue;
7251                 if (i.types[op].bitfield.byte)
7252                   i.suffix = BYTE_MNEM_SUFFIX;
7253                 else if (i.types[op].bitfield.word)
7254                   i.suffix = WORD_MNEM_SUFFIX;
7255                 else if (i.types[op].bitfield.dword)
7256                   i.suffix = LONG_MNEM_SUFFIX;
7257                 else if (i.types[op].bitfield.qword)
7258                   i.suffix = QWORD_MNEM_SUFFIX;
7259                 else
7260                   continue;
7261                 break;
7262               }
7263
7264           /* As an exception, movsx/movzx silently default to a byte source
7265              in AT&T mode.  */
7266           if (is_movx && i.tm.opcode_modifier.w && !i.suffix && !intel_syntax)
7267             i.suffix = BYTE_MNEM_SUFFIX;
7268         }
7269       else if (i.suffix == BYTE_MNEM_SUFFIX)
7270         {
7271           if (!check_byte_reg ())
7272             return 0;
7273         }
7274       else if (i.suffix == LONG_MNEM_SUFFIX)
7275         {
7276           if (!check_long_reg ())
7277             return 0;
7278         }
7279       else if (i.suffix == QWORD_MNEM_SUFFIX)
7280         {
7281           if (!check_qword_reg ())
7282             return 0;
7283         }
7284       else if (i.suffix == WORD_MNEM_SUFFIX)
7285         {
7286           if (!check_word_reg ())
7287             return 0;
7288         }
7289       else if (intel_syntax
7290                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
7291         /* Do nothing if the instruction is going to ignore the prefix.  */
7292         ;
7293       else
7294         abort ();
7295
7296       /* Undo the movsx/movzx change done above.  */
7297       i.operands = numop;
7298     }
7299   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
7300            && !i.suffix)
7301     {
7302       i.suffix = stackop_size;
7303       if (stackop_size == LONG_MNEM_SUFFIX)
7304         {
7305           /* stackop_size is set to LONG_MNEM_SUFFIX for the
7306              .code16gcc directive to support 16-bit mode with
7307              32-bit address.  For IRET without a suffix, generate
7308              16-bit IRET (opcode 0xcf) to return from an interrupt
7309              handler.  */
7310           if (i.tm.base_opcode == 0xcf)
7311             {
7312               i.suffix = WORD_MNEM_SUFFIX;
7313               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
7314             }
7315           /* Warn about changed behavior for segment register push/pop.  */
7316           else if ((i.tm.base_opcode | 1) == 0x07)
7317             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
7318                      insn_name (&i.tm));
7319         }
7320     }
7321   else if (!i.suffix
7322            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
7323                || i.tm.opcode_modifier.jump == JUMP_BYTE
7324                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
7325                || (i.tm.opcode_modifier.opcodespace == SPACE_0F
7326                    && i.tm.base_opcode == 0x01 /* [ls][gi]dt */
7327                    && i.tm.extension_opcode <= 3)))
7328     {
7329       switch (flag_code)
7330         {
7331         case CODE_64BIT:
7332           if (!i.tm.opcode_modifier.no_qsuf)
7333             {
7334               if (i.tm.opcode_modifier.jump == JUMP_BYTE
7335                   || i.tm.opcode_modifier.no_lsuf)
7336                 i.suffix = QWORD_MNEM_SUFFIX;
7337               break;
7338             }
7339           /* Fall through.  */
7340         case CODE_32BIT:
7341           if (!i.tm.opcode_modifier.no_lsuf)
7342             i.suffix = LONG_MNEM_SUFFIX;
7343           break;
7344         case CODE_16BIT:
7345           if (!i.tm.opcode_modifier.no_wsuf)
7346             i.suffix = WORD_MNEM_SUFFIX;
7347           break;
7348         }
7349     }
7350
7351   if (!i.suffix
7352       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7353           /* Also cover lret/retf/iret in 64-bit mode.  */
7354           || (flag_code == CODE_64BIT
7355               && !i.tm.opcode_modifier.no_lsuf
7356               && !i.tm.opcode_modifier.no_qsuf))
7357       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7358       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
7359       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
7360       /* Accept FLDENV et al without suffix.  */
7361       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
7362     {
7363       unsigned int suffixes, evex = 0;
7364
7365       suffixes = !i.tm.opcode_modifier.no_bsuf;
7366       if (!i.tm.opcode_modifier.no_wsuf)
7367         suffixes |= 1 << 1;
7368       if (!i.tm.opcode_modifier.no_lsuf)
7369         suffixes |= 1 << 2;
7370       if (!i.tm.opcode_modifier.no_ssuf)
7371         suffixes |= 1 << 4;
7372       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
7373         suffixes |= 1 << 5;
7374
7375       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
7376          also suitable for AT&T syntax mode, it was requested that this be
7377          restricted to just Intel syntax.  */
7378       if (intel_syntax && is_any_vex_encoding (&i.tm)
7379           && !i.broadcast.type && !i.broadcast.bytes)
7380         {
7381           unsigned int op;
7382
7383           for (op = 0; op < i.tm.operands; ++op)
7384             {
7385               if (is_evex_encoding (&i.tm)
7386                   && !cpu_arch_flags.bitfield.cpuavx512vl)
7387                 {
7388                   if (i.tm.operand_types[op].bitfield.ymmword)
7389                     i.tm.operand_types[op].bitfield.xmmword = 0;
7390                   if (i.tm.operand_types[op].bitfield.zmmword)
7391                     i.tm.operand_types[op].bitfield.ymmword = 0;
7392                   if (!i.tm.opcode_modifier.evex
7393                       || i.tm.opcode_modifier.evex == EVEXDYN)
7394                     i.tm.opcode_modifier.evex = EVEX512;
7395                 }
7396
7397               if (i.tm.operand_types[op].bitfield.xmmword
7398                   + i.tm.operand_types[op].bitfield.ymmword
7399                   + i.tm.operand_types[op].bitfield.zmmword < 2)
7400                 continue;
7401
7402               /* Any properly sized operand disambiguates the insn.  */
7403               if (i.types[op].bitfield.xmmword
7404                   || i.types[op].bitfield.ymmword
7405                   || i.types[op].bitfield.zmmword)
7406                 {
7407                   suffixes &= ~(7 << 6);
7408                   evex = 0;
7409                   break;
7410                 }
7411
7412               if ((i.flags[op] & Operand_Mem)
7413                   && i.tm.operand_types[op].bitfield.unspecified)
7414                 {
7415                   if (i.tm.operand_types[op].bitfield.xmmword)
7416                     suffixes |= 1 << 6;
7417                   if (i.tm.operand_types[op].bitfield.ymmword)
7418                     suffixes |= 1 << 7;
7419                   if (i.tm.operand_types[op].bitfield.zmmword)
7420                     suffixes |= 1 << 8;
7421                   if (is_evex_encoding (&i.tm))
7422                     evex = EVEX512;
7423                 }
7424             }
7425         }
7426
7427       /* Are multiple suffixes / operand sizes allowed?  */
7428       if (suffixes & (suffixes - 1))
7429         {
7430           if (intel_syntax
7431               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7432                   || operand_check == check_error))
7433             {
7434               as_bad (_("ambiguous operand size for `%s'"), insn_name (&i.tm));
7435               return 0;
7436             }
7437           if (operand_check == check_error)
7438             {
7439               as_bad (_("no instruction mnemonic suffix given and "
7440                         "no register operands; can't size `%s'"), insn_name (&i.tm));
7441               return 0;
7442             }
7443           if (operand_check == check_warning)
7444             as_warn (_("%s; using default for `%s'"),
7445                        intel_syntax
7446                        ? _("ambiguous operand size")
7447                        : _("no instruction mnemonic suffix given and "
7448                            "no register operands"),
7449                        insn_name (&i.tm));
7450
7451           if (i.tm.opcode_modifier.floatmf)
7452             i.suffix = SHORT_MNEM_SUFFIX;
7453           else if (is_movx)
7454             /* handled below */;
7455           else if (evex)
7456             i.tm.opcode_modifier.evex = evex;
7457           else if (flag_code == CODE_16BIT)
7458             i.suffix = WORD_MNEM_SUFFIX;
7459           else if (!i.tm.opcode_modifier.no_lsuf)
7460             i.suffix = LONG_MNEM_SUFFIX;
7461           else
7462             i.suffix = QWORD_MNEM_SUFFIX;
7463         }
7464     }
7465
7466   if (is_movx)
7467     {
7468       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
7469          In AT&T syntax, if there is no suffix (warned about above), the default
7470          will be byte extension.  */
7471       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
7472         i.tm.base_opcode |= 1;
7473
7474       /* For further processing, the suffix should represent the destination
7475          (register).  This is already the case when one was used with
7476          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
7477          no suffix to begin with.  */
7478       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
7479         {
7480           if (i.types[1].bitfield.word)
7481             i.suffix = WORD_MNEM_SUFFIX;
7482           else if (i.types[1].bitfield.qword)
7483             i.suffix = QWORD_MNEM_SUFFIX;
7484           else
7485             i.suffix = LONG_MNEM_SUFFIX;
7486
7487           i.tm.opcode_modifier.w = 0;
7488         }
7489     }
7490
7491   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
7492     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
7493                    != (i.tm.operand_types[1].bitfield.class == Reg);
7494
7495   /* Change the opcode based on the operand size given by i.suffix.  */
7496   switch (i.suffix)
7497     {
7498     /* Size floating point instruction.  */
7499     case LONG_MNEM_SUFFIX:
7500       if (i.tm.opcode_modifier.floatmf)
7501         {
7502           i.tm.base_opcode ^= 4;
7503           break;
7504         }
7505     /* fall through */
7506     case WORD_MNEM_SUFFIX:
7507     case QWORD_MNEM_SUFFIX:
7508       /* It's not a byte, select word/dword operation.  */
7509       if (i.tm.opcode_modifier.w)
7510         {
7511           if (i.short_form)
7512             i.tm.base_opcode |= 8;
7513           else
7514             i.tm.base_opcode |= 1;
7515         }
7516     /* fall through */
7517     case SHORT_MNEM_SUFFIX:
7518       /* Now select between word & dword operations via the operand
7519          size prefix, except for instructions that will ignore this
7520          prefix anyway.  */
7521       if (i.suffix != QWORD_MNEM_SUFFIX
7522           && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7523           && !i.tm.opcode_modifier.floatmf
7524           && !is_any_vex_encoding (&i.tm)
7525           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
7526               || (flag_code == CODE_64BIT
7527                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
7528         {
7529           unsigned int prefix = DATA_PREFIX_OPCODE;
7530
7531           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
7532             prefix = ADDR_PREFIX_OPCODE;
7533
7534           if (!add_prefix (prefix))
7535             return 0;
7536         }
7537
7538       /* Set mode64 for an operand.  */
7539       if (i.suffix == QWORD_MNEM_SUFFIX
7540           && flag_code == CODE_64BIT
7541           && !i.tm.opcode_modifier.norex64
7542           && !i.tm.opcode_modifier.vexw
7543           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
7544              need rex64. */
7545           && ! (i.operands == 2
7546                 && i.tm.base_opcode == 0x90
7547                 && i.tm.opcode_modifier.opcodespace == SPACE_BASE
7548                 && i.types[0].bitfield.instance == Accum
7549                 && i.types[0].bitfield.qword
7550                 && i.types[1].bitfield.instance == Accum))
7551         i.rex |= REX_W;
7552
7553       break;
7554
7555     case 0:
7556       /* Select word/dword/qword operation with explicit data sizing prefix
7557          when there are no suitable register operands.  */
7558       if (i.tm.opcode_modifier.w
7559           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
7560           && (!i.reg_operands
7561               || (i.reg_operands == 1
7562                       /* ShiftCount */
7563                   && (i.tm.operand_types[0].bitfield.instance == RegC
7564                       /* InOutPortReg */
7565                       || i.tm.operand_types[0].bitfield.instance == RegD
7566                       || i.tm.operand_types[1].bitfield.instance == RegD
7567                       /* CRC32 */
7568                       || is_crc32))))
7569         i.tm.base_opcode |= 1;
7570       break;
7571     }
7572
7573   if (i.tm.opcode_modifier.operandconstraint == ADDR_PREFIX_OP_REG)
7574     {
7575       gas_assert (!i.suffix);
7576       gas_assert (i.reg_operands);
7577
7578       if (i.tm.operand_types[0].bitfield.instance == Accum
7579           || i.operands == 1)
7580         {
7581           /* The address size override prefix changes the size of the
7582              first operand.  */
7583           if (flag_code == CODE_64BIT
7584               && i.op[0].regs->reg_type.bitfield.word)
7585             {
7586               as_bad (_("16-bit addressing unavailable for `%s'"),
7587                       insn_name (&i.tm));
7588               return 0;
7589             }
7590
7591           if ((flag_code == CODE_32BIT
7592                ? i.op[0].regs->reg_type.bitfield.word
7593                : i.op[0].regs->reg_type.bitfield.dword)
7594               && !add_prefix (ADDR_PREFIX_OPCODE))
7595             return 0;
7596         }
7597       else
7598         {
7599           /* Check invalid register operand when the address size override
7600              prefix changes the size of register operands.  */
7601           unsigned int op;
7602           enum { need_word, need_dword, need_qword } need;
7603
7604           /* Check the register operand for the address size prefix if
7605              the memory operand has no real registers, like symbol, DISP
7606              or bogus (x32-only) symbol(%rip) when symbol(%eip) is meant.  */
7607           if (i.mem_operands == 1
7608               && i.reg_operands == 1
7609               && i.operands == 2
7610               && i.types[1].bitfield.class == Reg
7611               && (flag_code == CODE_32BIT
7612                   ? i.op[1].regs->reg_type.bitfield.word
7613                   : i.op[1].regs->reg_type.bitfield.dword)
7614               && ((i.base_reg == NULL && i.index_reg == NULL)
7615 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
7616                   || (x86_elf_abi == X86_64_X32_ABI
7617                       && i.base_reg
7618                       && i.base_reg->reg_num == RegIP
7619                       && i.base_reg->reg_type.bitfield.qword))
7620 #else
7621                   || 0)
7622 #endif
7623               && !add_prefix (ADDR_PREFIX_OPCODE))
7624             return 0;
7625
7626           if (flag_code == CODE_32BIT)
7627             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
7628           else if (i.prefix[ADDR_PREFIX])
7629             need = need_dword;
7630           else
7631             need = flag_code == CODE_64BIT ? need_qword : need_word;
7632
7633           for (op = 0; op < i.operands; op++)
7634             {
7635               if (i.types[op].bitfield.class != Reg)
7636                 continue;
7637
7638               switch (need)
7639                 {
7640                 case need_word:
7641                   if (i.op[op].regs->reg_type.bitfield.word)
7642                     continue;
7643                   break;
7644                 case need_dword:
7645                   if (i.op[op].regs->reg_type.bitfield.dword)
7646                     continue;
7647                   break;
7648                 case need_qword:
7649                   if (i.op[op].regs->reg_type.bitfield.qword)
7650                     continue;
7651                   break;
7652                 }
7653
7654               as_bad (_("invalid register operand size for `%s'"),
7655                       insn_name (&i.tm));
7656               return 0;
7657             }
7658         }
7659     }
7660
7661   return 1;
7662 }
7663
7664 static int
7665 check_byte_reg (void)
7666 {
7667   int op;
7668
7669   for (op = i.operands; --op >= 0;)
7670     {
7671       /* Skip non-register operands. */
7672       if (i.types[op].bitfield.class != Reg)
7673         continue;
7674
7675       /* If this is an eight bit register, it's OK.  If it's the 16 or
7676          32 bit version of an eight bit register, we will just use the
7677          low portion, and that's OK too.  */
7678       if (i.types[op].bitfield.byte)
7679         continue;
7680
7681       /* I/O port address operands are OK too.  */
7682       if (i.tm.operand_types[op].bitfield.instance == RegD
7683           && i.tm.operand_types[op].bitfield.word)
7684         continue;
7685
7686       /* crc32 only wants its source operand checked here.  */
7687       if (i.tm.base_opcode == 0xf0
7688           && i.tm.opcode_modifier.opcodespace == SPACE_0F38
7689           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2
7690           && op != 0)
7691         continue;
7692
7693       /* Any other register is bad.  */
7694       as_bad (_("`%s%s' not allowed with `%s%c'"),
7695               register_prefix, i.op[op].regs->reg_name,
7696               insn_name (&i.tm), i.suffix);
7697       return 0;
7698     }
7699   return 1;
7700 }
7701
7702 static int
7703 check_long_reg (void)
7704 {
7705   int op;
7706
7707   for (op = i.operands; --op >= 0;)
7708     /* Skip non-register operands. */
7709     if (i.types[op].bitfield.class != Reg)
7710       continue;
7711     /* Reject eight bit registers, except where the template requires
7712        them. (eg. movzb)  */
7713     else if (i.types[op].bitfield.byte
7714              && (i.tm.operand_types[op].bitfield.class == Reg
7715                  || i.tm.operand_types[op].bitfield.instance == Accum)
7716              && (i.tm.operand_types[op].bitfield.word
7717                  || i.tm.operand_types[op].bitfield.dword))
7718       {
7719         as_bad (_("`%s%s' not allowed with `%s%c'"),
7720                 register_prefix,
7721                 i.op[op].regs->reg_name,
7722                 insn_name (&i.tm),
7723                 i.suffix);
7724         return 0;
7725       }
7726     /* Error if the e prefix on a general reg is missing.  */
7727     else if (i.types[op].bitfield.word
7728              && (i.tm.operand_types[op].bitfield.class == Reg
7729                  || i.tm.operand_types[op].bitfield.instance == Accum)
7730              && i.tm.operand_types[op].bitfield.dword)
7731       {
7732         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7733                 register_prefix, i.op[op].regs->reg_name,
7734                 i.suffix);
7735         return 0;
7736       }
7737     /* Warn if the r prefix on a general reg is present.  */
7738     else if (i.types[op].bitfield.qword
7739              && (i.tm.operand_types[op].bitfield.class == Reg
7740                  || i.tm.operand_types[op].bitfield.instance == Accum)
7741              && i.tm.operand_types[op].bitfield.dword)
7742       {
7743         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7744                 register_prefix, i.op[op].regs->reg_name, i.suffix);
7745         return 0;
7746       }
7747   return 1;
7748 }
7749
7750 static int
7751 check_qword_reg (void)
7752 {
7753   int op;
7754
7755   for (op = i.operands; --op >= 0; )
7756     /* Skip non-register operands. */
7757     if (i.types[op].bitfield.class != Reg)
7758       continue;
7759     /* Reject eight bit registers, except where the template requires
7760        them. (eg. movzb)  */
7761     else if (i.types[op].bitfield.byte
7762              && (i.tm.operand_types[op].bitfield.class == Reg
7763                  || i.tm.operand_types[op].bitfield.instance == Accum)
7764              && (i.tm.operand_types[op].bitfield.word
7765                  || i.tm.operand_types[op].bitfield.dword))
7766       {
7767         as_bad (_("`%s%s' not allowed with `%s%c'"),
7768                 register_prefix,
7769                 i.op[op].regs->reg_name,
7770                 insn_name (&i.tm),
7771                 i.suffix);
7772         return 0;
7773       }
7774     /* Warn if the r prefix on a general reg is missing.  */
7775     else if ((i.types[op].bitfield.word
7776               || i.types[op].bitfield.dword)
7777              && (i.tm.operand_types[op].bitfield.class == Reg
7778                  || i.tm.operand_types[op].bitfield.instance == Accum)
7779              && i.tm.operand_types[op].bitfield.qword)
7780       {
7781         /* Prohibit these changes in the 64bit mode, since the
7782            lowering is more complicated.  */
7783         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7784                 register_prefix, i.op[op].regs->reg_name, i.suffix);
7785         return 0;
7786       }
7787   return 1;
7788 }
7789
7790 static int
7791 check_word_reg (void)
7792 {
7793   int op;
7794   for (op = i.operands; --op >= 0;)
7795     /* Skip non-register operands. */
7796     if (i.types[op].bitfield.class != Reg)
7797       continue;
7798     /* Reject eight bit registers, except where the template requires
7799        them. (eg. movzb)  */
7800     else if (i.types[op].bitfield.byte
7801              && (i.tm.operand_types[op].bitfield.class == Reg
7802                  || i.tm.operand_types[op].bitfield.instance == Accum)
7803              && (i.tm.operand_types[op].bitfield.word
7804                  || i.tm.operand_types[op].bitfield.dword))
7805       {
7806         as_bad (_("`%s%s' not allowed with `%s%c'"),
7807                 register_prefix,
7808                 i.op[op].regs->reg_name,
7809                 insn_name (&i.tm),
7810                 i.suffix);
7811         return 0;
7812       }
7813     /* Error if the e or r prefix on a general reg is present.  */
7814     else if ((i.types[op].bitfield.dword
7815                  || i.types[op].bitfield.qword)
7816              && (i.tm.operand_types[op].bitfield.class == Reg
7817                  || i.tm.operand_types[op].bitfield.instance == Accum)
7818              && i.tm.operand_types[op].bitfield.word)
7819       {
7820         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
7821                 register_prefix, i.op[op].regs->reg_name,
7822                 i.suffix);
7823         return 0;
7824       }
7825   return 1;
7826 }
7827
7828 static int
7829 update_imm (unsigned int j)
7830 {
7831   i386_operand_type overlap = i.types[j];
7832   if (overlap.bitfield.imm8
7833       + overlap.bitfield.imm8s
7834       + overlap.bitfield.imm16
7835       + overlap.bitfield.imm32
7836       + overlap.bitfield.imm32s
7837       + overlap.bitfield.imm64 > 1)
7838     {
7839       static const i386_operand_type imm16 = { .bitfield = { .imm16 = 1 } };
7840       static const i386_operand_type imm32 = { .bitfield = { .imm32 = 1 } };
7841       static const i386_operand_type imm32s = { .bitfield = { .imm32s = 1 } };
7842       static const i386_operand_type imm16_32 = { .bitfield =
7843         { .imm16 = 1, .imm32 = 1 }
7844       };
7845       static const i386_operand_type imm16_32s =  { .bitfield =
7846         { .imm16 = 1, .imm32s = 1 }
7847       };
7848       static const i386_operand_type imm16_32_32s = { .bitfield =
7849         { .imm16 = 1, .imm32 = 1, .imm32s = 1 }
7850       };
7851
7852       if (i.suffix)
7853         {
7854           i386_operand_type temp;
7855
7856           operand_type_set (&temp, 0);
7857           if (i.suffix == BYTE_MNEM_SUFFIX)
7858             {
7859               temp.bitfield.imm8 = overlap.bitfield.imm8;
7860               temp.bitfield.imm8s = overlap.bitfield.imm8s;
7861             }
7862           else if (i.suffix == WORD_MNEM_SUFFIX)
7863             temp.bitfield.imm16 = overlap.bitfield.imm16;
7864           else if (i.suffix == QWORD_MNEM_SUFFIX)
7865             {
7866               temp.bitfield.imm64 = overlap.bitfield.imm64;
7867               temp.bitfield.imm32s = overlap.bitfield.imm32s;
7868             }
7869           else
7870             temp.bitfield.imm32 = overlap.bitfield.imm32;
7871           overlap = temp;
7872         }
7873       else if (operand_type_equal (&overlap, &imm16_32_32s)
7874                || operand_type_equal (&overlap, &imm16_32)
7875                || operand_type_equal (&overlap, &imm16_32s))
7876         {
7877           if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
7878             overlap = imm16;
7879           else
7880             overlap = imm32s;
7881         }
7882       else if (i.prefix[REX_PREFIX] & REX_W)
7883         overlap = operand_type_and (overlap, imm32s);
7884       else if (i.prefix[DATA_PREFIX])
7885         overlap = operand_type_and (overlap,
7886                                     flag_code != CODE_16BIT ? imm16 : imm32);
7887       if (overlap.bitfield.imm8
7888           + overlap.bitfield.imm8s
7889           + overlap.bitfield.imm16
7890           + overlap.bitfield.imm32
7891           + overlap.bitfield.imm32s
7892           + overlap.bitfield.imm64 != 1)
7893         {
7894           as_bad (_("no instruction mnemonic suffix given; "
7895                     "can't determine immediate size"));
7896           return 0;
7897         }
7898     }
7899   i.types[j] = overlap;
7900
7901   return 1;
7902 }
7903
7904 static int
7905 finalize_imm (void)
7906 {
7907   unsigned int j, n;
7908
7909   /* Update the first 2 immediate operands.  */
7910   n = i.operands > 2 ? 2 : i.operands;
7911   if (n)
7912     {
7913       for (j = 0; j < n; j++)
7914         if (update_imm (j) == 0)
7915           return 0;
7916
7917       /* The 3rd operand can't be immediate operand.  */
7918       gas_assert (operand_type_check (i.types[2], imm) == 0);
7919     }
7920
7921   return 1;
7922 }
7923
7924 static int
7925 process_operands (void)
7926 {
7927   /* Default segment register this instruction will use for memory
7928      accesses.  0 means unknown.  This is only for optimizing out
7929      unnecessary segment overrides.  */
7930   const reg_entry *default_seg = NULL;
7931
7932   /* We only need to check those implicit registers for instructions
7933      with 3 operands or less.  */
7934   if (i.operands <= 3)
7935     for (unsigned int j = 0; j < i.operands; j++)
7936       if (i.types[j].bitfield.instance != InstanceNone)
7937         i.reg_operands--;
7938
7939   if (i.tm.opcode_modifier.sse2avx)
7940     {
7941       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
7942          need converting.  */
7943       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
7944       i.prefix[REX_PREFIX] = 0;
7945       i.rex_encoding = 0;
7946     }
7947   /* ImmExt should be processed after SSE2AVX.  */
7948   else if (i.tm.opcode_modifier.immext)
7949     process_immext ();
7950
7951   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
7952     {
7953       static const i386_operand_type regxmm = {
7954         .bitfield = { .class = RegSIMD, .xmmword = 1 }
7955       };
7956       unsigned int dupl = i.operands;
7957       unsigned int dest = dupl - 1;
7958       unsigned int j;
7959
7960       /* The destination must be an xmm register.  */
7961       gas_assert (i.reg_operands
7962                   && MAX_OPERANDS > dupl
7963                   && operand_type_equal (&i.types[dest], &regxmm));
7964
7965       if (i.tm.operand_types[0].bitfield.instance == Accum
7966           && i.tm.operand_types[0].bitfield.xmmword)
7967         {
7968           gas_assert (i.tm.opcode_modifier.vexsources == VEX3SOURCES);
7969           /* Keep xmm0 for instructions with VEX prefix and 3
7970              sources.  */
7971           i.tm.operand_types[0].bitfield.instance = InstanceNone;
7972           i.tm.operand_types[0].bitfield.class = RegSIMD;
7973           i.reg_operands++;
7974           goto duplicate;
7975         }
7976
7977       if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_1ST_XMM0)
7978         {
7979           gas_assert ((MAX_OPERANDS - 1) > dupl
7980                       && (i.tm.opcode_modifier.vexsources
7981                           == VEX3SOURCES));
7982
7983           /* Add the implicit xmm0 for instructions with VEX prefix
7984              and 3 sources.  */
7985           for (j = i.operands; j > 0; j--)
7986             {
7987               i.op[j] = i.op[j - 1];
7988               i.types[j] = i.types[j - 1];
7989               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
7990               i.flags[j] = i.flags[j - 1];
7991             }
7992           i.op[0].regs
7993             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
7994           i.types[0] = regxmm;
7995           i.tm.operand_types[0] = regxmm;
7996
7997           i.operands += 2;
7998           i.reg_operands += 2;
7999           i.tm.operands += 2;
8000
8001           dupl++;
8002           dest++;
8003           i.op[dupl] = i.op[dest];
8004           i.types[dupl] = i.types[dest];
8005           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
8006           i.flags[dupl] = i.flags[dest];
8007         }
8008       else
8009         {
8010         duplicate:
8011           i.operands++;
8012           i.reg_operands++;
8013           i.tm.operands++;
8014
8015           i.op[dupl] = i.op[dest];
8016           i.types[dupl] = i.types[dest];
8017           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
8018           i.flags[dupl] = i.flags[dest];
8019         }
8020
8021        if (i.tm.opcode_modifier.immext)
8022          process_immext ();
8023     }
8024   else if (i.tm.operand_types[0].bitfield.instance == Accum
8025            && i.tm.opcode_modifier.modrm)
8026     {
8027       unsigned int j;
8028
8029       for (j = 1; j < i.operands; j++)
8030         {
8031           i.op[j - 1] = i.op[j];
8032           i.types[j - 1] = i.types[j];
8033
8034           /* We need to adjust fields in i.tm since they are used by
8035              build_modrm_byte.  */
8036           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
8037
8038           i.flags[j - 1] = i.flags[j];
8039         }
8040
8041       /* No adjustment to i.reg_operands: This was already done at the top
8042          of the function.  */
8043       i.operands--;
8044       i.tm.operands--;
8045     }
8046   else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_QUAD_GROUP)
8047     {
8048       unsigned int regnum, first_reg_in_group, last_reg_in_group;
8049
8050       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
8051       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
8052       regnum = register_number (i.op[1].regs);
8053       first_reg_in_group = regnum & ~3;
8054       last_reg_in_group = first_reg_in_group + 3;
8055       if (regnum != first_reg_in_group)
8056         as_warn (_("source register `%s%s' implicitly denotes"
8057                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
8058                  register_prefix, i.op[1].regs->reg_name,
8059                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
8060                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
8061                  insn_name (&i.tm));
8062     }
8063   else if (i.tm.opcode_modifier.operandconstraint == REG_KLUDGE)
8064     {
8065       /* The imul $imm, %reg instruction is converted into
8066          imul $imm, %reg, %reg, and the clr %reg instruction
8067          is converted into xor %reg, %reg.  */
8068
8069       unsigned int first_reg_op;
8070
8071       if (operand_type_check (i.types[0], reg))
8072         first_reg_op = 0;
8073       else
8074         first_reg_op = 1;
8075       /* Pretend we saw the extra register operand.  */
8076       gas_assert (i.reg_operands == 1
8077                   && i.op[first_reg_op + 1].regs == 0);
8078       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
8079       i.types[first_reg_op + 1] = i.types[first_reg_op];
8080       i.operands++;
8081       i.reg_operands++;
8082     }
8083
8084   if (i.tm.opcode_modifier.modrm)
8085     {
8086       /* The opcode is completed (modulo i.tm.extension_opcode which
8087          must be put into the modrm byte).  Now, we make the modrm and
8088          index base bytes based on all the info we've collected.  */
8089
8090       default_seg = build_modrm_byte ();
8091
8092       if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
8093         {
8094           /* Warn about some common errors, but press on regardless.  */
8095           if (i.operands == 2)
8096             {
8097               /* Reversed arguments on faddp or fmulp.  */
8098               as_warn (_("translating to `%s %s%s,%s%s'"), insn_name (&i.tm),
8099                        register_prefix, i.op[!intel_syntax].regs->reg_name,
8100                        register_prefix, i.op[intel_syntax].regs->reg_name);
8101             }
8102           else if (i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
8103             {
8104               /* Extraneous `l' suffix on fp insn.  */
8105               as_warn (_("translating to `%s %s%s'"), insn_name (&i.tm),
8106                        register_prefix, i.op[0].regs->reg_name);
8107             }
8108         }
8109     }
8110   else if (i.types[0].bitfield.class == SReg)
8111     {
8112       if (flag_code != CODE_64BIT
8113           ? i.tm.base_opcode == POP_SEG_SHORT
8114             && i.op[0].regs->reg_num == 1
8115           : (i.tm.base_opcode | 1) == (POP_SEG386_SHORT & 0xff)
8116             && i.op[0].regs->reg_num < 4)
8117         {
8118           as_bad (_("you can't `%s %s%s'"),
8119                   insn_name (&i.tm), register_prefix, i.op[0].regs->reg_name);
8120           return 0;
8121         }
8122       if (i.op[0].regs->reg_num > 3
8123           && i.tm.opcode_modifier.opcodespace == SPACE_BASE )
8124         {
8125           i.tm.base_opcode ^= (POP_SEG_SHORT ^ POP_SEG386_SHORT) & 0xff;
8126           i.tm.opcode_modifier.opcodespace = SPACE_0F;
8127         }
8128       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
8129     }
8130   else if (i.tm.opcode_modifier.opcodespace == SPACE_BASE
8131            && (i.tm.base_opcode & ~3) == MOV_AX_DISP32)
8132     {
8133       default_seg = reg_ds;
8134     }
8135   else if (i.tm.opcode_modifier.isstring)
8136     {
8137       /* For the string instructions that allow a segment override
8138          on one of their operands, the default segment is ds.  */
8139       default_seg = reg_ds;
8140     }
8141   else if (i.short_form)
8142     {
8143       /* The register operand is in operand 0 or 1.  */
8144       const reg_entry *r = i.op[0].regs;
8145
8146       if (i.imm_operands
8147           || (r->reg_type.bitfield.instance == Accum && i.op[1].regs))
8148         r = i.op[1].regs;
8149       /* Register goes in low 3 bits of opcode.  */
8150       i.tm.base_opcode |= r->reg_num;
8151       if ((r->reg_flags & RegRex) != 0)
8152         i.rex |= REX_B;
8153     }
8154
8155   if ((i.seg[0] || i.prefix[SEG_PREFIX])
8156       && i.tm.base_opcode == 0x8d /* lea */
8157       && i.tm.opcode_modifier.opcodespace == SPACE_BASE
8158       && !is_any_vex_encoding(&i.tm))
8159     {
8160       if (!quiet_warnings)
8161         as_warn (_("segment override on `%s' is ineffectual"), insn_name (&i.tm));
8162       if (optimize && !i.no_optimize)
8163         {
8164           i.seg[0] = NULL;
8165           i.prefix[SEG_PREFIX] = 0;
8166         }
8167     }
8168
8169   /* If a segment was explicitly specified, and the specified segment
8170      is neither the default nor the one already recorded from a prefix,
8171      use an opcode prefix to select it.  If we never figured out what
8172      the default segment is, then default_seg will be zero at this
8173      point, and the specified segment prefix will always be used.  */
8174   if (i.seg[0]
8175       && i.seg[0] != default_seg
8176       && i386_seg_prefixes[i.seg[0]->reg_num] != i.prefix[SEG_PREFIX])
8177     {
8178       if (!add_prefix (i386_seg_prefixes[i.seg[0]->reg_num]))
8179         return 0;
8180     }
8181   return 1;
8182 }
8183
8184 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
8185                                  bool do_sse2avx)
8186 {
8187   if (r->reg_flags & RegRex)
8188     {
8189       if (i.rex & rex_bit)
8190         as_bad (_("same type of prefix used twice"));
8191       i.rex |= rex_bit;
8192     }
8193   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
8194     {
8195       gas_assert (i.vex.register_specifier == r);
8196       i.vex.register_specifier += 8;
8197     }
8198
8199   if (r->reg_flags & RegVRex)
8200     i.vrex |= rex_bit;
8201 }
8202
8203 static const reg_entry *
8204 build_modrm_byte (void)
8205 {
8206   const reg_entry *default_seg = NULL;
8207   unsigned int source, dest;
8208   int vex_3_sources;
8209
8210   vex_3_sources = i.tm.opcode_modifier.vexsources == VEX3SOURCES;
8211   if (vex_3_sources)
8212     {
8213       unsigned int nds, reg_slot;
8214       expressionS *exp;
8215
8216       dest = i.operands - 1;
8217       nds = dest - 1;
8218
8219       /* There are 2 kinds of instructions:
8220          1. 5 operands: 4 register operands or 3 register operands
8221          plus 1 memory operand plus one Imm4 operand, VexXDS, and
8222          VexW0 or VexW1.  The destination must be either XMM, YMM or
8223          ZMM register.
8224          2. 4 operands: 4 register operands or 3 register operands
8225          plus 1 memory operand, with VexXDS.  */
8226       gas_assert ((i.reg_operands == 4
8227                    || (i.reg_operands == 3 && i.mem_operands == 1))
8228                   && i.tm.opcode_modifier.vexvvvv == VEXXDS
8229                   && i.tm.opcode_modifier.vexw
8230                   && i.tm.operand_types[dest].bitfield.class == RegSIMD);
8231
8232       /* If VexW1 is set, the first non-immediate operand is the source and
8233          the second non-immediate one is encoded in the immediate operand.  */
8234       if (i.tm.opcode_modifier.vexw == VEXW1)
8235         {
8236           source = i.imm_operands;
8237           reg_slot = i.imm_operands + 1;
8238         }
8239       else
8240         {
8241           source = i.imm_operands + 1;
8242           reg_slot = i.imm_operands;
8243         }
8244
8245       if (i.imm_operands == 0)
8246         {
8247           /* When there is no immediate operand, generate an 8bit
8248              immediate operand to encode the first operand.  */
8249           exp = &im_expressions[i.imm_operands++];
8250           i.op[i.operands].imms = exp;
8251           i.types[i.operands].bitfield.imm8 = 1;
8252           i.operands++;
8253
8254           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8255           exp->X_op = O_constant;
8256           exp->X_add_number = register_number (i.op[reg_slot].regs) << 4;
8257           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8258         }
8259       else
8260         {
8261           gas_assert (i.imm_operands == 1);
8262           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
8263           gas_assert (!i.tm.opcode_modifier.immext);
8264
8265           /* Turn on Imm8 again so that output_imm will generate it.  */
8266           i.types[0].bitfield.imm8 = 1;
8267
8268           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8269           i.op[0].imms->X_add_number
8270               |= register_number (i.op[reg_slot].regs) << 4;
8271           gas_assert ((i.op[reg_slot].regs->reg_flags & RegVRex) == 0);
8272         }
8273
8274       gas_assert (i.tm.operand_types[nds].bitfield.class == RegSIMD);
8275       i.vex.register_specifier = i.op[nds].regs;
8276     }
8277   else
8278     source = dest = 0;
8279
8280   /* i.reg_operands MUST be the number of real register operands;
8281      implicit registers do not count.  If there are 3 register
8282      operands, it must be a instruction with VexNDS.  For a
8283      instruction with VexNDD, the destination register is encoded
8284      in VEX prefix.  If there are 4 register operands, it must be
8285      a instruction with VEX prefix and 3 sources.  */
8286   if (i.mem_operands == 0
8287       && ((i.reg_operands == 2
8288            && i.tm.opcode_modifier.vexvvvv <= VEXXDS)
8289           || (i.reg_operands == 3
8290               && i.tm.opcode_modifier.vexvvvv == VEXXDS)
8291           || (i.reg_operands == 4 && vex_3_sources)))
8292     {
8293       switch (i.operands)
8294         {
8295         case 2:
8296           source = 0;
8297           break;
8298         case 3:
8299           /* When there are 3 operands, one of them may be immediate,
8300              which may be the first or the last operand.  Otherwise,
8301              the first operand must be shift count register (cl) or it
8302              is an instruction with VexNDS. */
8303           gas_assert (i.imm_operands == 1
8304                       || (i.imm_operands == 0
8305                           && (i.tm.opcode_modifier.vexvvvv == VEXXDS
8306                               || (i.types[0].bitfield.instance == RegC
8307                                   && i.types[0].bitfield.byte))));
8308           if (operand_type_check (i.types[0], imm)
8309               || (i.types[0].bitfield.instance == RegC
8310                   && i.types[0].bitfield.byte))
8311             source = 1;
8312           else
8313             source = 0;
8314           break;
8315         case 4:
8316           /* When there are 4 operands, the first two must be 8bit
8317              immediate operands. The source operand will be the 3rd
8318              one.
8319
8320              For instructions with VexNDS, if the first operand
8321              an imm8, the source operand is the 2nd one.  If the last
8322              operand is imm8, the source operand is the first one.  */
8323           gas_assert ((i.imm_operands == 2
8324                        && i.types[0].bitfield.imm8
8325                        && i.types[1].bitfield.imm8)
8326                       || (i.tm.opcode_modifier.vexvvvv == VEXXDS
8327                           && i.imm_operands == 1
8328                           && (i.types[0].bitfield.imm8
8329                               || i.types[i.operands - 1].bitfield.imm8)));
8330           if (i.imm_operands == 2)
8331             source = 2;
8332           else
8333             {
8334               if (i.types[0].bitfield.imm8)
8335                 source = 1;
8336               else
8337                 source = 0;
8338             }
8339           break;
8340         case 5:
8341           gas_assert (!is_evex_encoding (&i.tm));
8342           gas_assert (i.imm_operands == 1 && vex_3_sources);
8343           break;
8344         default:
8345           abort ();
8346         }
8347
8348       if (!vex_3_sources)
8349         {
8350           dest = source + 1;
8351
8352           if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8353             {
8354               /* For instructions with VexNDS, the register-only source
8355                  operand must be a 32/64bit integer, XMM, YMM, ZMM, or mask
8356                  register.  It is encoded in VEX prefix.  */
8357
8358               i386_operand_type op;
8359               unsigned int vvvv;
8360
8361               /* Swap two source operands if needed.  */
8362               if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES)
8363                 {
8364                   vvvv = source;
8365                   source = dest;
8366                 }
8367               else
8368                 vvvv = dest;
8369
8370               op = i.tm.operand_types[vvvv];
8371               if ((dest + 1) >= i.operands
8372                   || ((op.bitfield.class != Reg
8373                        || (!op.bitfield.dword && !op.bitfield.qword))
8374                       && op.bitfield.class != RegSIMD
8375                       && op.bitfield.class != RegMask))
8376                 abort ();
8377               i.vex.register_specifier = i.op[vvvv].regs;
8378               dest++;
8379             }
8380         }
8381
8382       i.rm.mode = 3;
8383       /* One of the register operands will be encoded in the i.rm.reg
8384          field, the other in the combined i.rm.mode and i.rm.regmem
8385          fields.  If no form of this instruction supports a memory
8386          destination operand, then we assume the source operand may
8387          sometimes be a memory operand and so we need to store the
8388          destination in the i.rm.reg field.  */
8389       if (!i.tm.opcode_modifier.regmem
8390           && operand_type_check (i.tm.operand_types[dest], anymem) == 0)
8391         {
8392           i.rm.reg = i.op[dest].regs->reg_num;
8393           i.rm.regmem = i.op[source].regs->reg_num;
8394           set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
8395           set_rex_vrex (i.op[source].regs, REX_B, false);
8396         }
8397       else
8398         {
8399           i.rm.reg = i.op[source].regs->reg_num;
8400           i.rm.regmem = i.op[dest].regs->reg_num;
8401           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
8402           set_rex_vrex (i.op[source].regs, REX_R, false);
8403         }
8404       if (flag_code != CODE_64BIT && (i.rex & REX_R))
8405         {
8406           if (i.types[!i.tm.opcode_modifier.regmem].bitfield.class != RegCR)
8407             abort ();
8408           i.rex &= ~REX_R;
8409           add_prefix (LOCK_PREFIX_OPCODE);
8410         }
8411     }
8412   else
8413     {                   /* If it's not 2 reg operands...  */
8414       unsigned int mem;
8415
8416       if (i.mem_operands)
8417         {
8418           unsigned int fake_zero_displacement = 0;
8419           unsigned int op;
8420
8421           for (op = 0; op < i.operands; op++)
8422             if (i.flags[op] & Operand_Mem)
8423               break;
8424           gas_assert (op < i.operands);
8425
8426           if (i.tm.opcode_modifier.sib)
8427             {
8428               /* The index register of VSIB shouldn't be RegIZ.  */
8429               if (i.tm.opcode_modifier.sib != SIBMEM
8430                   && i.index_reg->reg_num == RegIZ)
8431                 abort ();
8432
8433               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8434               if (!i.base_reg)
8435                 {
8436                   i.sib.base = NO_BASE_REGISTER;
8437                   i.sib.scale = i.log2_scale_factor;
8438                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8439                   i.types[op].bitfield.disp32 = 1;
8440                 }
8441
8442               /* Since the mandatory SIB always has index register, so
8443                  the code logic remains unchanged. The non-mandatory SIB
8444                  without index register is allowed and will be handled
8445                  later.  */
8446               if (i.index_reg)
8447                 {
8448                   if (i.index_reg->reg_num == RegIZ)
8449                     i.sib.index = NO_INDEX_REGISTER;
8450                   else
8451                     i.sib.index = i.index_reg->reg_num;
8452                   set_rex_vrex (i.index_reg, REX_X, false);
8453                 }
8454             }
8455
8456           default_seg = reg_ds;
8457
8458           if (i.base_reg == 0)
8459             {
8460               i.rm.mode = 0;
8461               if (!i.disp_operands)
8462                 fake_zero_displacement = 1;
8463               if (i.index_reg == 0)
8464                 {
8465                   /* Both check for VSIB and mandatory non-vector SIB. */
8466                   gas_assert (!i.tm.opcode_modifier.sib
8467                               || i.tm.opcode_modifier.sib == SIBMEM);
8468                   /* Operand is just <disp>  */
8469                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8470                   if (flag_code == CODE_64BIT)
8471                     {
8472                       /* 64bit mode overwrites the 32bit absolute
8473                          addressing by RIP relative addressing and
8474                          absolute addressing is encoded by one of the
8475                          redundant SIB forms.  */
8476                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8477                       i.sib.base = NO_BASE_REGISTER;
8478                       i.sib.index = NO_INDEX_REGISTER;
8479                       i.types[op].bitfield.disp32 = 1;
8480                     }
8481                   else if ((flag_code == CODE_16BIT)
8482                            ^ (i.prefix[ADDR_PREFIX] != 0))
8483                     {
8484                       i.rm.regmem = NO_BASE_REGISTER_16;
8485                       i.types[op].bitfield.disp16 = 1;
8486                     }
8487                   else
8488                     {
8489                       i.rm.regmem = NO_BASE_REGISTER;
8490                       i.types[op].bitfield.disp32 = 1;
8491                     }
8492                 }
8493               else if (!i.tm.opcode_modifier.sib)
8494                 {
8495                   /* !i.base_reg && i.index_reg  */
8496                   if (i.index_reg->reg_num == RegIZ)
8497                     i.sib.index = NO_INDEX_REGISTER;
8498                   else
8499                     i.sib.index = i.index_reg->reg_num;
8500                   i.sib.base = NO_BASE_REGISTER;
8501                   i.sib.scale = i.log2_scale_factor;
8502                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8503                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8504                   i.types[op].bitfield.disp32 = 1;
8505                   if ((i.index_reg->reg_flags & RegRex) != 0)
8506                     i.rex |= REX_X;
8507                 }
8508             }
8509           /* RIP addressing for 64bit mode.  */
8510           else if (i.base_reg->reg_num == RegIP)
8511             {
8512               gas_assert (!i.tm.opcode_modifier.sib);
8513               i.rm.regmem = NO_BASE_REGISTER;
8514               i.types[op].bitfield.disp8 = 0;
8515               i.types[op].bitfield.disp16 = 0;
8516               i.types[op].bitfield.disp32 = 1;
8517               i.types[op].bitfield.disp64 = 0;
8518               i.flags[op] |= Operand_PCrel;
8519               if (! i.disp_operands)
8520                 fake_zero_displacement = 1;
8521             }
8522           else if (i.base_reg->reg_type.bitfield.word)
8523             {
8524               gas_assert (!i.tm.opcode_modifier.sib);
8525               switch (i.base_reg->reg_num)
8526                 {
8527                 case 3: /* (%bx)  */
8528                   if (i.index_reg == 0)
8529                     i.rm.regmem = 7;
8530                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
8531                     i.rm.regmem = i.index_reg->reg_num - 6;
8532                   break;
8533                 case 5: /* (%bp)  */
8534                   default_seg = reg_ss;
8535                   if (i.index_reg == 0)
8536                     {
8537                       i.rm.regmem = 6;
8538                       if (operand_type_check (i.types[op], disp) == 0)
8539                         {
8540                           /* fake (%bp) into 0(%bp)  */
8541                           if (i.disp_encoding == disp_encoding_16bit)
8542                             i.types[op].bitfield.disp16 = 1;
8543                           else
8544                             i.types[op].bitfield.disp8 = 1;
8545                           fake_zero_displacement = 1;
8546                         }
8547                     }
8548                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
8549                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
8550                   break;
8551                 default: /* (%si) -> 4 or (%di) -> 5  */
8552                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
8553                 }
8554               if (!fake_zero_displacement
8555                   && !i.disp_operands
8556                   && i.disp_encoding)
8557                 {
8558                   fake_zero_displacement = 1;
8559                   if (i.disp_encoding == disp_encoding_8bit)
8560                     i.types[op].bitfield.disp8 = 1;
8561                   else
8562                     i.types[op].bitfield.disp16 = 1;
8563                 }
8564               i.rm.mode = mode_from_disp_size (i.types[op]);
8565             }
8566           else /* i.base_reg and 32/64 bit mode  */
8567             {
8568               if (operand_type_check (i.types[op], disp))
8569                 {
8570                   i.types[op].bitfield.disp16 = 0;
8571                   i.types[op].bitfield.disp64 = 0;
8572                   i.types[op].bitfield.disp32 = 1;
8573                 }
8574
8575               if (!i.tm.opcode_modifier.sib)
8576                 i.rm.regmem = i.base_reg->reg_num;
8577               if ((i.base_reg->reg_flags & RegRex) != 0)
8578                 i.rex |= REX_B;
8579               i.sib.base = i.base_reg->reg_num;
8580               /* x86-64 ignores REX prefix bit here to avoid decoder
8581                  complications.  */
8582               if (!(i.base_reg->reg_flags & RegRex)
8583                   && (i.base_reg->reg_num == EBP_REG_NUM
8584                    || i.base_reg->reg_num == ESP_REG_NUM))
8585                   default_seg = reg_ss;
8586               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
8587                 {
8588                   fake_zero_displacement = 1;
8589                   if (i.disp_encoding == disp_encoding_32bit)
8590                     i.types[op].bitfield.disp32 = 1;
8591                   else
8592                     i.types[op].bitfield.disp8 = 1;
8593                 }
8594               i.sib.scale = i.log2_scale_factor;
8595               if (i.index_reg == 0)
8596                 {
8597                   /* Only check for VSIB. */
8598                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
8599                               && i.tm.opcode_modifier.sib != VECSIB256
8600                               && i.tm.opcode_modifier.sib != VECSIB512);
8601
8602                   /* <disp>(%esp) becomes two byte modrm with no index
8603                      register.  We've already stored the code for esp
8604                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
8605                      Any base register besides %esp will not use the
8606                      extra modrm byte.  */
8607                   i.sib.index = NO_INDEX_REGISTER;
8608                 }
8609               else if (!i.tm.opcode_modifier.sib)
8610                 {
8611                   if (i.index_reg->reg_num == RegIZ)
8612                     i.sib.index = NO_INDEX_REGISTER;
8613                   else
8614                     i.sib.index = i.index_reg->reg_num;
8615                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8616                   if ((i.index_reg->reg_flags & RegRex) != 0)
8617                     i.rex |= REX_X;
8618                 }
8619
8620               if (i.disp_operands
8621                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
8622                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
8623                 i.rm.mode = 0;
8624               else
8625                 {
8626                   if (!fake_zero_displacement
8627                       && !i.disp_operands
8628                       && i.disp_encoding)
8629                     {
8630                       fake_zero_displacement = 1;
8631                       if (i.disp_encoding == disp_encoding_8bit)
8632                         i.types[op].bitfield.disp8 = 1;
8633                       else
8634                         i.types[op].bitfield.disp32 = 1;
8635                     }
8636                   i.rm.mode = mode_from_disp_size (i.types[op]);
8637                 }
8638             }
8639
8640           if (fake_zero_displacement)
8641             {
8642               /* Fakes a zero displacement assuming that i.types[op]
8643                  holds the correct displacement size.  */
8644               expressionS *exp;
8645
8646               gas_assert (i.op[op].disps == 0);
8647               exp = &disp_expressions[i.disp_operands++];
8648               i.op[op].disps = exp;
8649               exp->X_op = O_constant;
8650               exp->X_add_number = 0;
8651               exp->X_add_symbol = (symbolS *) 0;
8652               exp->X_op_symbol = (symbolS *) 0;
8653             }
8654
8655           mem = op;
8656         }
8657       else
8658         mem = ~0;
8659
8660       if (i.tm.opcode_modifier.vexsources == XOP2SOURCES)
8661         {
8662           if (operand_type_check (i.types[0], imm))
8663             i.vex.register_specifier = NULL;
8664           else
8665             {
8666               /* VEX.vvvv encodes one of the sources when the first
8667                  operand is not an immediate.  */
8668               if (i.tm.opcode_modifier.vexw == VEXW0)
8669                 i.vex.register_specifier = i.op[0].regs;
8670               else
8671                 i.vex.register_specifier = i.op[1].regs;
8672             }
8673
8674           /* Destination is a XMM register encoded in the ModRM.reg
8675              and VEX.R bit.  */
8676           i.rm.reg = i.op[2].regs->reg_num;
8677           if ((i.op[2].regs->reg_flags & RegRex) != 0)
8678             i.rex |= REX_R;
8679
8680           /* ModRM.rm and VEX.B encodes the other source.  */
8681           if (!i.mem_operands)
8682             {
8683               i.rm.mode = 3;
8684
8685               if (i.tm.opcode_modifier.vexw == VEXW0)
8686                 i.rm.regmem = i.op[1].regs->reg_num;
8687               else
8688                 i.rm.regmem = i.op[0].regs->reg_num;
8689
8690               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8691                 i.rex |= REX_B;
8692             }
8693         }
8694       else if (i.tm.opcode_modifier.vexvvvv == VEXLWP)
8695         {
8696           i.vex.register_specifier = i.op[2].regs;
8697           if (!i.mem_operands)
8698             {
8699               i.rm.mode = 3;
8700               i.rm.regmem = i.op[1].regs->reg_num;
8701               if ((i.op[1].regs->reg_flags & RegRex) != 0)
8702                 i.rex |= REX_B;
8703             }
8704         }
8705       /* Fill in i.rm.reg or i.rm.regmem field with register operand
8706          (if any) based on i.tm.extension_opcode.  Again, we must be
8707          careful to make sure that segment/control/debug/test/MMX
8708          registers are coded into the i.rm.reg field.  */
8709       else if (i.reg_operands)
8710         {
8711           unsigned int op;
8712           unsigned int vex_reg = ~0;
8713
8714           for (op = 0; op < i.operands; op++)
8715             if (i.types[op].bitfield.class == Reg
8716                 || i.types[op].bitfield.class == RegBND
8717                 || i.types[op].bitfield.class == RegMask
8718                 || i.types[op].bitfield.class == SReg
8719                 || i.types[op].bitfield.class == RegCR
8720                 || i.types[op].bitfield.class == RegDR
8721                 || i.types[op].bitfield.class == RegTR
8722                 || i.types[op].bitfield.class == RegSIMD
8723                 || i.types[op].bitfield.class == RegMMX)
8724               break;
8725
8726           if (vex_3_sources)
8727             op = dest;
8728           else if (i.tm.opcode_modifier.vexvvvv == VEXXDS)
8729             {
8730               /* For instructions with VexNDS, the register-only
8731                  source operand is encoded in VEX prefix. */
8732               gas_assert (mem != (unsigned int) ~0);
8733
8734               if (op > mem || i.tm.cpu_flags.bitfield.cpucmpccxadd)
8735                 {
8736                   vex_reg = op++;
8737                   gas_assert (op < i.operands);
8738                 }
8739               else
8740                 {
8741                   /* Check register-only source operand when two source
8742                      operands are swapped.  */
8743                   if (!i.tm.operand_types[op].bitfield.baseindex
8744                       && i.tm.operand_types[op + 1].bitfield.baseindex)
8745                     {
8746                       vex_reg = op;
8747                       op += 2;
8748                       gas_assert (mem == (vex_reg + 1)
8749                                   && op < i.operands);
8750                     }
8751                   else
8752                     {
8753                       vex_reg = op + 1;
8754                       gas_assert (vex_reg < i.operands);
8755                     }
8756                 }
8757             }
8758           else if (i.tm.opcode_modifier.vexvvvv == VEXNDD)
8759             {
8760               /* For instructions with VexNDD, the register destination
8761                  is encoded in VEX prefix.  */
8762               if (i.mem_operands == 0)
8763                 {
8764                   /* There is no memory operand.  */
8765                   gas_assert ((op + 2) == i.operands);
8766                   vex_reg = op + 1;
8767                 }
8768               else
8769                 {
8770                   /* There are only 2 non-immediate operands.  */
8771                   gas_assert (op < i.imm_operands + 2
8772                               && i.operands == i.imm_operands + 2);
8773                   vex_reg = i.imm_operands + 1;
8774                 }
8775             }
8776           else
8777             gas_assert (op < i.operands);
8778
8779           if (vex_reg != (unsigned int) ~0)
8780             {
8781               i386_operand_type *type = &i.tm.operand_types[vex_reg];
8782
8783               if ((type->bitfield.class != Reg
8784                    || (!type->bitfield.dword && !type->bitfield.qword))
8785                   && type->bitfield.class != RegSIMD
8786                   && type->bitfield.class != RegMask)
8787                 abort ();
8788
8789               i.vex.register_specifier = i.op[vex_reg].regs;
8790             }
8791
8792           /* Don't set OP operand twice.  */
8793           if (vex_reg != op)
8794             {
8795               /* If there is an extension opcode to put here, the
8796                  register number must be put into the regmem field.  */
8797               if (i.tm.extension_opcode != None)
8798                 {
8799                   i.rm.regmem = i.op[op].regs->reg_num;
8800                   set_rex_vrex (i.op[op].regs, REX_B,
8801                                 i.tm.opcode_modifier.sse2avx);
8802                 }
8803               else
8804                 {
8805                   i.rm.reg = i.op[op].regs->reg_num;
8806                   set_rex_vrex (i.op[op].regs, REX_R,
8807                                 i.tm.opcode_modifier.sse2avx);
8808                 }
8809             }
8810
8811           /* Now, if no memory operand has set i.rm.mode = 0, 1, 2 we
8812              must set it to 3 to indicate this is a register operand
8813              in the regmem field.  */
8814           if (!i.mem_operands)
8815             i.rm.mode = 3;
8816         }
8817
8818       /* Fill in i.rm.reg field with extension opcode (if any).  */
8819       if (i.tm.extension_opcode != None)
8820         i.rm.reg = i.tm.extension_opcode;
8821     }
8822   return default_seg;
8823 }
8824
8825 static INLINE void
8826 frag_opcode_byte (unsigned char byte)
8827 {
8828   if (now_seg != absolute_section)
8829     FRAG_APPEND_1_CHAR (byte);
8830   else
8831     ++abs_section_offset;
8832 }
8833
8834 static unsigned int
8835 flip_code16 (unsigned int code16)
8836 {
8837   gas_assert (i.tm.operands == 1);
8838
8839   return !(i.prefix[REX_PREFIX] & REX_W)
8840          && (code16 ? i.tm.operand_types[0].bitfield.disp32
8841                     : i.tm.operand_types[0].bitfield.disp16)
8842          ? CODE16 : 0;
8843 }
8844
8845 static void
8846 output_branch (void)
8847 {
8848   char *p;
8849   int size;
8850   int code16;
8851   int prefix;
8852   relax_substateT subtype;
8853   symbolS *sym;
8854   offsetT off;
8855
8856   if (now_seg == absolute_section)
8857     {
8858       as_bad (_("relaxable branches not supported in absolute section"));
8859       return;
8860     }
8861
8862   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
8863   size = i.disp_encoding > disp_encoding_8bit ? BIG : SMALL;
8864
8865   prefix = 0;
8866   if (i.prefix[DATA_PREFIX] != 0)
8867     {
8868       prefix = 1;
8869       i.prefixes -= 1;
8870       code16 ^= flip_code16(code16);
8871     }
8872   /* Pentium4 branch hints.  */
8873   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8874       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8875     {
8876       prefix++;
8877       i.prefixes--;
8878     }
8879   if (i.prefix[REX_PREFIX] != 0)
8880     {
8881       prefix++;
8882       i.prefixes--;
8883     }
8884
8885   /* BND prefixed jump.  */
8886   if (i.prefix[BND_PREFIX] != 0)
8887     {
8888       prefix++;
8889       i.prefixes--;
8890     }
8891
8892   if (i.prefixes != 0)
8893     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
8894
8895   /* It's always a symbol;  End frag & setup for relax.
8896      Make sure there is enough room in this frag for the largest
8897      instruction we may generate in md_convert_frag.  This is 2
8898      bytes for the opcode and room for the prefix and largest
8899      displacement.  */
8900   frag_grow (prefix + 2 + 4);
8901   /* Prefix and 1 opcode byte go in fr_fix.  */
8902   p = frag_more (prefix + 1);
8903   if (i.prefix[DATA_PREFIX] != 0)
8904     *p++ = DATA_PREFIX_OPCODE;
8905   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
8906       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
8907     *p++ = i.prefix[SEG_PREFIX];
8908   if (i.prefix[BND_PREFIX] != 0)
8909     *p++ = BND_PREFIX_OPCODE;
8910   if (i.prefix[REX_PREFIX] != 0)
8911     *p++ = i.prefix[REX_PREFIX];
8912   *p = i.tm.base_opcode;
8913
8914   if ((unsigned char) *p == JUMP_PC_RELATIVE)
8915     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
8916   else if (cpu_arch_flags.bitfield.cpui386)
8917     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
8918   else
8919     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
8920   subtype |= code16;
8921
8922   sym = i.op[0].disps->X_add_symbol;
8923   off = i.op[0].disps->X_add_number;
8924
8925   if (i.op[0].disps->X_op != O_constant
8926       && i.op[0].disps->X_op != O_symbol)
8927     {
8928       /* Handle complex expressions.  */
8929       sym = make_expr_symbol (i.op[0].disps);
8930       off = 0;
8931     }
8932
8933   frag_now->tc_frag_data.code64 = flag_code == CODE_64BIT;
8934
8935   /* 1 possible extra opcode + 4 byte displacement go in var part.
8936      Pass reloc in fr_var.  */
8937   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
8938 }
8939
8940 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
8941 /* Return TRUE iff PLT32 relocation should be used for branching to
8942    symbol S.  */
8943
8944 static bool
8945 need_plt32_p (symbolS *s)
8946 {
8947   /* PLT32 relocation is ELF only.  */
8948   if (!IS_ELF)
8949     return false;
8950
8951 #ifdef TE_SOLARIS
8952   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
8953      krtld support it.  */
8954   return false;
8955 #endif
8956
8957   /* Since there is no need to prepare for PLT branch on x86-64, we
8958      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
8959      be used as a marker for 32-bit PC-relative branches.  */
8960   if (!object_64bit)
8961     return false;
8962
8963   if (s == NULL)
8964     return false;
8965
8966   /* Weak or undefined symbol need PLT32 relocation.  */
8967   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
8968     return true;
8969
8970   /* Non-global symbol doesn't need PLT32 relocation.  */
8971   if (! S_IS_EXTERNAL (s))
8972     return false;
8973
8974   /* Other global symbols need PLT32 relocation.  NB: Symbol with
8975      non-default visibilities are treated as normal global symbol
8976      so that PLT32 relocation can be used as a marker for 32-bit
8977      PC-relative branches.  It is useful for linker relaxation.  */
8978   return true;
8979 }
8980 #endif
8981
8982 static void
8983 output_jump (void)
8984 {
8985   char *p;
8986   int size;
8987   fixS *fixP;
8988   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
8989
8990   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
8991     {
8992       /* This is a loop or jecxz type instruction.  */
8993       size = 1;
8994       if (i.prefix[ADDR_PREFIX] != 0)
8995         {
8996           frag_opcode_byte (ADDR_PREFIX_OPCODE);
8997           i.prefixes -= 1;
8998         }
8999       /* Pentium4 branch hints.  */
9000       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
9001           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
9002         {
9003           frag_opcode_byte (i.prefix[SEG_PREFIX]);
9004           i.prefixes--;
9005         }
9006     }
9007   else
9008     {
9009       int code16;
9010
9011       code16 = 0;
9012       if (flag_code == CODE_16BIT)
9013         code16 = CODE16;
9014
9015       if (i.prefix[DATA_PREFIX] != 0)
9016         {
9017           frag_opcode_byte (DATA_PREFIX_OPCODE);
9018           i.prefixes -= 1;
9019           code16 ^= flip_code16(code16);
9020         }
9021
9022       size = 4;
9023       if (code16)
9024         size = 2;
9025     }
9026
9027   /* BND prefixed jump.  */
9028   if (i.prefix[BND_PREFIX] != 0)
9029     {
9030       frag_opcode_byte (i.prefix[BND_PREFIX]);
9031       i.prefixes -= 1;
9032     }
9033
9034   if (i.prefix[REX_PREFIX] != 0)
9035     {
9036       frag_opcode_byte (i.prefix[REX_PREFIX]);
9037       i.prefixes -= 1;
9038     }
9039
9040   if (i.prefixes != 0)
9041     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
9042
9043   if (now_seg == absolute_section)
9044     {
9045       abs_section_offset += i.opcode_length + size;
9046       return;
9047     }
9048
9049   p = frag_more (i.opcode_length + size);
9050   switch (i.opcode_length)
9051     {
9052     case 2:
9053       *p++ = i.tm.base_opcode >> 8;
9054       /* Fall through.  */
9055     case 1:
9056       *p++ = i.tm.base_opcode;
9057       break;
9058     default:
9059       abort ();
9060     }
9061
9062 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9063   if (flag_code == CODE_64BIT && size == 4
9064       && jump_reloc == NO_RELOC && i.op[0].disps->X_add_number == 0
9065       && need_plt32_p (i.op[0].disps->X_add_symbol))
9066     jump_reloc = BFD_RELOC_X86_64_PLT32;
9067 #endif
9068
9069   jump_reloc = reloc (size, 1, 1, jump_reloc);
9070
9071   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9072                       i.op[0].disps, 1, jump_reloc);
9073
9074   /* All jumps handled here are signed, but don't unconditionally use a
9075      signed limit check for 32 and 16 bit jumps as we want to allow wrap
9076      around at 4G (outside of 64-bit mode) and 64k (except for XBEGIN)
9077      respectively.  */
9078   switch (size)
9079     {
9080     case 1:
9081       fixP->fx_signed = 1;
9082       break;
9083
9084     case 2:
9085       if (i.tm.base_opcode == 0xc7f8)
9086         fixP->fx_signed = 1;
9087       break;
9088
9089     case 4:
9090       if (flag_code == CODE_64BIT)
9091         fixP->fx_signed = 1;
9092       break;
9093     }
9094 }
9095
9096 static void
9097 output_interseg_jump (void)
9098 {
9099   char *p;
9100   int size;
9101   int prefix;
9102   int code16;
9103
9104   code16 = 0;
9105   if (flag_code == CODE_16BIT)
9106     code16 = CODE16;
9107
9108   prefix = 0;
9109   if (i.prefix[DATA_PREFIX] != 0)
9110     {
9111       prefix = 1;
9112       i.prefixes -= 1;
9113       code16 ^= CODE16;
9114     }
9115
9116   gas_assert (!i.prefix[REX_PREFIX]);
9117
9118   size = 4;
9119   if (code16)
9120     size = 2;
9121
9122   if (i.prefixes != 0)
9123     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
9124
9125   if (now_seg == absolute_section)
9126     {
9127       abs_section_offset += prefix + 1 + 2 + size;
9128       return;
9129     }
9130
9131   /* 1 opcode; 2 segment; offset  */
9132   p = frag_more (prefix + 1 + 2 + size);
9133
9134   if (i.prefix[DATA_PREFIX] != 0)
9135     *p++ = DATA_PREFIX_OPCODE;
9136
9137   if (i.prefix[REX_PREFIX] != 0)
9138     *p++ = i.prefix[REX_PREFIX];
9139
9140   *p++ = i.tm.base_opcode;
9141   if (i.op[1].imms->X_op == O_constant)
9142     {
9143       offsetT n = i.op[1].imms->X_add_number;
9144
9145       if (size == 2
9146           && !fits_in_unsigned_word (n)
9147           && !fits_in_signed_word (n))
9148         {
9149           as_bad (_("16-bit jump out of range"));
9150           return;
9151         }
9152       md_number_to_chars (p, n, size);
9153     }
9154   else
9155     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9156                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
9157
9158   p += size;
9159   if (i.op[0].imms->X_op == O_constant)
9160     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
9161   else
9162     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
9163                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
9164 }
9165
9166 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9167 void
9168 x86_cleanup (void)
9169 {
9170   char *p;
9171   asection *seg = now_seg;
9172   subsegT subseg = now_subseg;
9173   asection *sec;
9174   unsigned int alignment, align_size_1;
9175   unsigned int isa_1_descsz, feature_2_descsz, descsz;
9176   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
9177   unsigned int padding;
9178
9179   if (!IS_ELF || !x86_used_note)
9180     return;
9181
9182   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
9183
9184   /* The .note.gnu.property section layout:
9185
9186      Field      Length          Contents
9187      ----       ----            ----
9188      n_namsz    4               4
9189      n_descsz   4               The note descriptor size
9190      n_type     4               NT_GNU_PROPERTY_TYPE_0
9191      n_name     4               "GNU"
9192      n_desc     n_descsz        The program property array
9193      ....       ....            ....
9194    */
9195
9196   /* Create the .note.gnu.property section.  */
9197   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
9198   bfd_set_section_flags (sec,
9199                          (SEC_ALLOC
9200                           | SEC_LOAD
9201                           | SEC_DATA
9202                           | SEC_HAS_CONTENTS
9203                           | SEC_READONLY));
9204
9205   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
9206     {
9207       align_size_1 = 7;
9208       alignment = 3;
9209     }
9210   else
9211     {
9212       align_size_1 = 3;
9213       alignment = 2;
9214     }
9215
9216   bfd_set_section_alignment (sec, alignment);
9217   elf_section_type (sec) = SHT_NOTE;
9218
9219   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
9220                                   + 4-byte data  */
9221   isa_1_descsz_raw = 4 + 4 + 4;
9222   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
9223   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
9224
9225   feature_2_descsz_raw = isa_1_descsz;
9226   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
9227                                       + 4-byte data  */
9228   feature_2_descsz_raw += 4 + 4 + 4;
9229   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
9230   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
9231                       & ~align_size_1);
9232
9233   descsz = feature_2_descsz;
9234   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
9235   p = frag_more (4 + 4 + 4 + 4 + descsz);
9236
9237   /* Write n_namsz.  */
9238   md_number_to_chars (p, (valueT) 4, 4);
9239
9240   /* Write n_descsz.  */
9241   md_number_to_chars (p + 4, (valueT) descsz, 4);
9242
9243   /* Write n_type.  */
9244   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
9245
9246   /* Write n_name.  */
9247   memcpy (p + 4 * 3, "GNU", 4);
9248
9249   /* Write 4-byte type.  */
9250   md_number_to_chars (p + 4 * 4,
9251                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
9252
9253   /* Write 4-byte data size.  */
9254   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
9255
9256   /* Write 4-byte data.  */
9257   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
9258
9259   /* Zero out paddings.  */
9260   padding = isa_1_descsz - isa_1_descsz_raw;
9261   if (padding)
9262     memset (p + 4 * 7, 0, padding);
9263
9264   /* Write 4-byte type.  */
9265   md_number_to_chars (p + isa_1_descsz + 4 * 4,
9266                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
9267
9268   /* Write 4-byte data size.  */
9269   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
9270
9271   /* Write 4-byte data.  */
9272   md_number_to_chars (p + isa_1_descsz + 4 * 6,
9273                       (valueT) x86_feature_2_used, 4);
9274
9275   /* Zero out paddings.  */
9276   padding = feature_2_descsz - feature_2_descsz_raw;
9277   if (padding)
9278     memset (p + isa_1_descsz + 4 * 7, 0, padding);
9279
9280   /* We probably can't restore the current segment, for there likely
9281      isn't one yet...  */
9282   if (seg && subseg)
9283     subseg_set (seg, subseg);
9284 }
9285
9286 bool
9287 x86_support_sframe_p (void)
9288 {
9289   /* At this time, SFrame stack trace is supported for AMD64 ABI only.  */
9290   return (x86_elf_abi == X86_64_ABI);
9291 }
9292
9293 bool
9294 x86_sframe_ra_tracking_p (void)
9295 {
9296   /* In AMD64, return address is always stored on the stack at a fixed offset
9297      from the CFA (provided via x86_sframe_cfa_ra_offset ()).
9298      Do not track explicitly via an SFrame Frame Row Entry.  */
9299   return false;
9300 }
9301
9302 offsetT
9303 x86_sframe_cfa_ra_offset (void)
9304 {
9305   gas_assert (x86_elf_abi == X86_64_ABI);
9306   return (offsetT) -8;
9307 }
9308
9309 unsigned char
9310 x86_sframe_get_abi_arch (void)
9311 {
9312   unsigned char sframe_abi_arch = 0;
9313
9314   if (x86_support_sframe_p ())
9315     {
9316       gas_assert (!target_big_endian);
9317       sframe_abi_arch = SFRAME_ABI_AMD64_ENDIAN_LITTLE;
9318     }
9319
9320   return sframe_abi_arch;
9321 }
9322
9323 #endif
9324
9325 static unsigned int
9326 encoding_length (const fragS *start_frag, offsetT start_off,
9327                  const char *frag_now_ptr)
9328 {
9329   unsigned int len = 0;
9330
9331   if (start_frag != frag_now)
9332     {
9333       const fragS *fr = start_frag;
9334
9335       do {
9336         len += fr->fr_fix;
9337         fr = fr->fr_next;
9338       } while (fr && fr != frag_now);
9339     }
9340
9341   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
9342 }
9343
9344 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
9345    be macro-fused with conditional jumps.
9346    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
9347    or is one of the following format:
9348
9349     cmp m, imm
9350     add m, imm
9351     sub m, imm
9352    test m, imm
9353     and m, imm
9354     inc m
9355     dec m
9356
9357    it is unfusible.  */
9358
9359 static int
9360 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
9361 {
9362   /* No RIP address.  */
9363   if (i.base_reg && i.base_reg->reg_num == RegIP)
9364     return 0;
9365
9366   /* No opcodes outside of base encoding space.  */
9367   if (i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9368     return 0;
9369
9370   /* add, sub without add/sub m, imm.  */
9371   if (i.tm.base_opcode <= 5
9372       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
9373       || ((i.tm.base_opcode | 3) == 0x83
9374           && (i.tm.extension_opcode == 0x5
9375               || i.tm.extension_opcode == 0x0)))
9376     {
9377       *mf_cmp_p = mf_cmp_alu_cmp;
9378       return !(i.mem_operands && i.imm_operands);
9379     }
9380
9381   /* and without and m, imm.  */
9382   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
9383       || ((i.tm.base_opcode | 3) == 0x83
9384           && i.tm.extension_opcode == 0x4))
9385     {
9386       *mf_cmp_p = mf_cmp_test_and;
9387       return !(i.mem_operands && i.imm_operands);
9388     }
9389
9390   /* test without test m imm.  */
9391   if ((i.tm.base_opcode | 1) == 0x85
9392       || (i.tm.base_opcode | 1) == 0xa9
9393       || ((i.tm.base_opcode | 1) == 0xf7
9394           && i.tm.extension_opcode == 0))
9395     {
9396       *mf_cmp_p = mf_cmp_test_and;
9397       return !(i.mem_operands && i.imm_operands);
9398     }
9399
9400   /* cmp without cmp m, imm.  */
9401   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
9402       || ((i.tm.base_opcode | 3) == 0x83
9403           && (i.tm.extension_opcode == 0x7)))
9404     {
9405       *mf_cmp_p = mf_cmp_alu_cmp;
9406       return !(i.mem_operands && i.imm_operands);
9407     }
9408
9409   /* inc, dec without inc/dec m.   */
9410   if ((i.tm.cpu_flags.bitfield.cpuno64
9411        && (i.tm.base_opcode | 0xf) == 0x4f)
9412       || ((i.tm.base_opcode | 1) == 0xff
9413           && i.tm.extension_opcode <= 0x1))
9414     {
9415       *mf_cmp_p = mf_cmp_incdec;
9416       return !i.mem_operands;
9417     }
9418
9419   return 0;
9420 }
9421
9422 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
9423
9424 static int
9425 add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
9426 {
9427   /* NB: Don't work with COND_JUMP86 without i386.  */
9428   if (!align_branch_power
9429       || now_seg == absolute_section
9430       || !cpu_arch_flags.bitfield.cpui386
9431       || !(align_branch & align_branch_fused_bit))
9432     return 0;
9433
9434   if (maybe_fused_with_jcc_p (mf_cmp_p))
9435     {
9436       if (last_insn.kind == last_insn_other
9437           || last_insn.seg != now_seg)
9438         return 1;
9439       if (flag_debug)
9440         as_warn_where (last_insn.file, last_insn.line,
9441                        _("`%s` skips -malign-branch-boundary on `%s`"),
9442                        last_insn.name, insn_name (&i.tm));
9443     }
9444
9445   return 0;
9446 }
9447
9448 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
9449
9450 static int
9451 add_branch_prefix_frag_p (void)
9452 {
9453   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
9454      to PadLock instructions since they include prefixes in opcode.  */
9455   if (!align_branch_power
9456       || !align_branch_prefix_size
9457       || now_seg == absolute_section
9458       || i.tm.cpu_flags.bitfield.cpupadlock
9459       || !cpu_arch_flags.bitfield.cpui386)
9460     return 0;
9461
9462   /* Don't add prefix if it is a prefix or there is no operand in case
9463      that segment prefix is special.  */
9464   if (!i.operands || i.tm.opcode_modifier.isprefix)
9465     return 0;
9466
9467   if (last_insn.kind == last_insn_other
9468       || last_insn.seg != now_seg)
9469     return 1;
9470
9471   if (flag_debug)
9472     as_warn_where (last_insn.file, last_insn.line,
9473                    _("`%s` skips -malign-branch-boundary on `%s`"),
9474                    last_insn.name, insn_name (&i.tm));
9475
9476   return 0;
9477 }
9478
9479 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
9480
9481 static int
9482 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
9483                            enum mf_jcc_kind *mf_jcc_p)
9484 {
9485   int add_padding;
9486
9487   /* NB: Don't work with COND_JUMP86 without i386.  */
9488   if (!align_branch_power
9489       || now_seg == absolute_section
9490       || !cpu_arch_flags.bitfield.cpui386
9491       || i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9492     return 0;
9493
9494   add_padding = 0;
9495
9496   /* Check for jcc and direct jmp.  */
9497   if (i.tm.opcode_modifier.jump == JUMP)
9498     {
9499       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
9500         {
9501           *branch_p = align_branch_jmp;
9502           add_padding = align_branch & align_branch_jmp_bit;
9503         }
9504       else
9505         {
9506           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
9507              igore the lowest bit.  */
9508           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
9509           *branch_p = align_branch_jcc;
9510           if ((align_branch & align_branch_jcc_bit))
9511             add_padding = 1;
9512         }
9513     }
9514   else if ((i.tm.base_opcode | 1) == 0xc3)
9515     {
9516       /* Near ret.  */
9517       *branch_p = align_branch_ret;
9518       if ((align_branch & align_branch_ret_bit))
9519         add_padding = 1;
9520     }
9521   else
9522     {
9523       /* Check for indirect jmp, direct and indirect calls.  */
9524       if (i.tm.base_opcode == 0xe8)
9525         {
9526           /* Direct call.  */
9527           *branch_p = align_branch_call;
9528           if ((align_branch & align_branch_call_bit))
9529             add_padding = 1;
9530         }
9531       else if (i.tm.base_opcode == 0xff
9532                && (i.tm.extension_opcode == 2
9533                    || i.tm.extension_opcode == 4))
9534         {
9535           /* Indirect call and jmp.  */
9536           *branch_p = align_branch_indirect;
9537           if ((align_branch & align_branch_indirect_bit))
9538             add_padding = 1;
9539         }
9540
9541       if (add_padding
9542           && i.disp_operands
9543           && tls_get_addr
9544           && (i.op[0].disps->X_op == O_symbol
9545               || (i.op[0].disps->X_op == O_subtract
9546                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
9547         {
9548           symbolS *s = i.op[0].disps->X_add_symbol;
9549           /* No padding to call to global or undefined tls_get_addr.  */
9550           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
9551               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
9552             return 0;
9553         }
9554     }
9555
9556   if (add_padding
9557       && last_insn.kind != last_insn_other
9558       && last_insn.seg == now_seg)
9559     {
9560       if (flag_debug)
9561         as_warn_where (last_insn.file, last_insn.line,
9562                        _("`%s` skips -malign-branch-boundary on `%s`"),
9563                        last_insn.name, insn_name (&i.tm));
9564       return 0;
9565     }
9566
9567   return add_padding;
9568 }
9569
9570 static void
9571 output_insn (void)
9572 {
9573   fragS *insn_start_frag;
9574   offsetT insn_start_off;
9575   fragS *fragP = NULL;
9576   enum align_branch_kind branch = align_branch_none;
9577   /* The initializer is arbitrary just to avoid uninitialized error.
9578      it's actually either assigned in add_branch_padding_frag_p
9579      or never be used.  */
9580   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
9581
9582 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9583   if (IS_ELF && x86_used_note && now_seg != absolute_section)
9584     {
9585       if ((i.xstate & xstate_tmm) == xstate_tmm
9586           || i.tm.cpu_flags.bitfield.cpuamx_tile)
9587         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
9588
9589       if (i.tm.cpu_flags.bitfield.cpu8087
9590           || i.tm.cpu_flags.bitfield.cpu287
9591           || i.tm.cpu_flags.bitfield.cpu387
9592           || i.tm.cpu_flags.bitfield.cpu687
9593           || i.tm.cpu_flags.bitfield.cpufisttp)
9594         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
9595
9596       if ((i.xstate & xstate_mmx)
9597           || (i.tm.opcode_modifier.opcodespace == SPACE_0F
9598               && !is_any_vex_encoding (&i.tm)
9599               && (i.tm.base_opcode == 0x77 /* emms */
9600                   || i.tm.base_opcode == 0x0e /* femms */)))
9601         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
9602
9603       if (i.index_reg)
9604         {
9605           if (i.index_reg->reg_type.bitfield.zmmword)
9606             i.xstate |= xstate_zmm;
9607           else if (i.index_reg->reg_type.bitfield.ymmword)
9608             i.xstate |= xstate_ymm;
9609           else if (i.index_reg->reg_type.bitfield.xmmword)
9610             i.xstate |= xstate_xmm;
9611         }
9612
9613       /* vzeroall / vzeroupper */
9614       if (i.tm.base_opcode == 0x77 && i.tm.cpu_flags.bitfield.cpuavx)
9615         i.xstate |= xstate_ymm;
9616
9617       if ((i.xstate & xstate_xmm)
9618           /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
9619           || (i.tm.base_opcode == 0xae
9620               && (i.tm.cpu_flags.bitfield.cpusse
9621                   || i.tm.cpu_flags.bitfield.cpuavx))
9622           || i.tm.cpu_flags.bitfield.cpuwidekl
9623           || i.tm.cpu_flags.bitfield.cpukl)
9624         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
9625
9626       if ((i.xstate & xstate_ymm) == xstate_ymm)
9627         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
9628       if ((i.xstate & xstate_zmm) == xstate_zmm)
9629         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
9630       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
9631         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
9632       if (i.tm.cpu_flags.bitfield.cpufxsr)
9633         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
9634       if (i.tm.cpu_flags.bitfield.cpuxsave)
9635         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
9636       if (i.tm.cpu_flags.bitfield.cpuxsaveopt)
9637         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
9638       if (i.tm.cpu_flags.bitfield.cpuxsavec)
9639         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
9640
9641       if (x86_feature_2_used
9642           || i.tm.cpu_flags.bitfield.cpucmov
9643           || i.tm.cpu_flags.bitfield.cpusyscall
9644           || (i.tm.opcode_modifier.opcodespace == SPACE_0F
9645               && i.tm.base_opcode == 0xc7
9646               && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
9647               && i.tm.extension_opcode == 1) /* cmpxchg8b */)
9648         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
9649       if (i.tm.cpu_flags.bitfield.cpusse3
9650           || i.tm.cpu_flags.bitfield.cpussse3
9651           || i.tm.cpu_flags.bitfield.cpusse4_1
9652           || i.tm.cpu_flags.bitfield.cpusse4_2
9653           || i.tm.cpu_flags.bitfield.cpucx16
9654           || i.tm.cpu_flags.bitfield.cpupopcnt
9655           /* LAHF-SAHF insns in 64-bit mode.  */
9656           || (flag_code == CODE_64BIT
9657               && (i.tm.base_opcode | 1) == 0x9f
9658               && i.tm.opcode_modifier.opcodespace == SPACE_BASE))
9659         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
9660       if (i.tm.cpu_flags.bitfield.cpuavx
9661           || i.tm.cpu_flags.bitfield.cpuavx2
9662           /* Any VEX encoded insns execpt for AVX512F, AVX512BW, AVX512DQ,
9663              XOP, FMA4, LPW, TBM, and AMX.  */
9664           || (i.tm.opcode_modifier.vex
9665               && !i.tm.cpu_flags.bitfield.cpuavx512f
9666               && !i.tm.cpu_flags.bitfield.cpuavx512bw
9667               && !i.tm.cpu_flags.bitfield.cpuavx512dq
9668               && !i.tm.cpu_flags.bitfield.cpuxop
9669               && !i.tm.cpu_flags.bitfield.cpufma4
9670               && !i.tm.cpu_flags.bitfield.cpulwp
9671               && !i.tm.cpu_flags.bitfield.cputbm
9672               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
9673           || i.tm.cpu_flags.bitfield.cpuf16c
9674           || i.tm.cpu_flags.bitfield.cpufma
9675           || i.tm.cpu_flags.bitfield.cpulzcnt
9676           || i.tm.cpu_flags.bitfield.cpumovbe
9677           || i.tm.cpu_flags.bitfield.cpuxsaves
9678           || (x86_feature_2_used
9679               & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
9680                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
9681                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
9682         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
9683       if (i.tm.cpu_flags.bitfield.cpuavx512f
9684           || i.tm.cpu_flags.bitfield.cpuavx512bw
9685           || i.tm.cpu_flags.bitfield.cpuavx512dq
9686           || i.tm.cpu_flags.bitfield.cpuavx512vl
9687           /* Any EVEX encoded insns except for AVX512ER, AVX512PF,
9688              AVX512-4FMAPS, and AVX512-4VNNIW.  */
9689           || (i.tm.opcode_modifier.evex
9690               && !i.tm.cpu_flags.bitfield.cpuavx512er
9691               && !i.tm.cpu_flags.bitfield.cpuavx512pf
9692               && !i.tm.cpu_flags.bitfield.cpuavx512_4fmaps
9693               && !i.tm.cpu_flags.bitfield.cpuavx512_4vnniw))
9694         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
9695     }
9696 #endif
9697
9698   /* Tie dwarf2 debug info to the address at the start of the insn.
9699      We can't do this after the insn has been output as the current
9700      frag may have been closed off.  eg. by frag_var.  */
9701   dwarf2_emit_insn (0);
9702
9703   insn_start_frag = frag_now;
9704   insn_start_off = frag_now_fix ();
9705
9706   if (add_branch_padding_frag_p (&branch, &mf_jcc))
9707     {
9708       char *p;
9709       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
9710       unsigned int max_branch_padding_size = 14;
9711
9712       /* Align section to boundary.  */
9713       record_alignment (now_seg, align_branch_power);
9714
9715       /* Make room for padding.  */
9716       frag_grow (max_branch_padding_size);
9717
9718       /* Start of the padding.  */
9719       p = frag_more (0);
9720
9721       fragP = frag_now;
9722
9723       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
9724                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
9725                 NULL, 0, p);
9726
9727       fragP->tc_frag_data.mf_type = mf_jcc;
9728       fragP->tc_frag_data.branch_type = branch;
9729       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
9730     }
9731
9732   if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT)
9733       && !pre_386_16bit_warned)
9734     {
9735       as_warn (_("use .code16 to ensure correct addressing mode"));
9736       pre_386_16bit_warned = true;
9737     }
9738
9739   /* Output jumps.  */
9740   if (i.tm.opcode_modifier.jump == JUMP)
9741     output_branch ();
9742   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
9743            || i.tm.opcode_modifier.jump == JUMP_DWORD)
9744     output_jump ();
9745   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
9746     output_interseg_jump ();
9747   else
9748     {
9749       /* Output normal instructions here.  */
9750       char *p;
9751       unsigned char *q;
9752       unsigned int j;
9753       enum mf_cmp_kind mf_cmp;
9754
9755       if (avoid_fence
9756           && (i.tm.base_opcode == 0xaee8
9757               || i.tm.base_opcode == 0xaef0
9758               || i.tm.base_opcode == 0xaef8))
9759         {
9760           /* Encode lfence, mfence, and sfence as
9761              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
9762           if (flag_code == CODE_16BIT)
9763             as_bad (_("Cannot convert `%s' in 16-bit mode"), insn_name (&i.tm));
9764           else if (omit_lock_prefix)
9765             as_bad (_("Cannot convert `%s' with `-momit-lock-prefix=yes' in effect"),
9766                     insn_name (&i.tm));
9767           else if (now_seg != absolute_section)
9768             {
9769               offsetT val = 0x240483f0ULL;
9770
9771               p = frag_more (5);
9772               md_number_to_chars (p, val, 5);
9773             }
9774           else
9775             abs_section_offset += 5;
9776           return;
9777         }
9778
9779       /* Some processors fail on LOCK prefix. This options makes
9780          assembler ignore LOCK prefix and serves as a workaround.  */
9781       if (omit_lock_prefix)
9782         {
9783           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
9784               && i.tm.opcode_modifier.isprefix)
9785             return;
9786           i.prefix[LOCK_PREFIX] = 0;
9787         }
9788
9789       if (branch)
9790         /* Skip if this is a branch.  */
9791         ;
9792       else if (add_fused_jcc_padding_frag_p (&mf_cmp))
9793         {
9794           /* Make room for padding.  */
9795           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
9796           p = frag_more (0);
9797
9798           fragP = frag_now;
9799
9800           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
9801                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
9802                     NULL, 0, p);
9803
9804           fragP->tc_frag_data.mf_type = mf_cmp;
9805           fragP->tc_frag_data.branch_type = align_branch_fused;
9806           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
9807         }
9808       else if (add_branch_prefix_frag_p ())
9809         {
9810           unsigned int max_prefix_size = align_branch_prefix_size;
9811
9812           /* Make room for padding.  */
9813           frag_grow (max_prefix_size);
9814           p = frag_more (0);
9815
9816           fragP = frag_now;
9817
9818           frag_var (rs_machine_dependent, max_prefix_size, 0,
9819                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
9820                     NULL, 0, p);
9821
9822           fragP->tc_frag_data.max_bytes = max_prefix_size;
9823         }
9824
9825       /* Since the VEX/EVEX prefix contains the implicit prefix, we
9826          don't need the explicit prefix.  */
9827       if (!is_any_vex_encoding (&i.tm))
9828         {
9829           switch (i.tm.opcode_modifier.opcodeprefix)
9830             {
9831             case PREFIX_0X66:
9832               add_prefix (0x66);
9833               break;
9834             case PREFIX_0XF2:
9835               add_prefix (0xf2);
9836               break;
9837             case PREFIX_0XF3:
9838               if (!i.tm.cpu_flags.bitfield.cpupadlock
9839                   || (i.prefix[REP_PREFIX] != 0xf3))
9840                 add_prefix (0xf3);
9841               break;
9842             case PREFIX_NONE:
9843               switch (i.opcode_length)
9844                 {
9845                 case 2:
9846                   break;
9847                 case 1:
9848                   /* Check for pseudo prefixes.  */
9849                   if (!i.tm.opcode_modifier.isprefix || i.tm.base_opcode)
9850                     break;
9851                   as_bad_where (insn_start_frag->fr_file,
9852                                 insn_start_frag->fr_line,
9853                                 _("pseudo prefix without instruction"));
9854                   return;
9855                 default:
9856                   abort ();
9857                 }
9858               break;
9859             default:
9860               abort ();
9861             }
9862
9863 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
9864           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
9865              R_X86_64_GOTTPOFF relocation so that linker can safely
9866              perform IE->LE optimization.  A dummy REX_OPCODE prefix
9867              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
9868              relocation for GDesc -> IE/LE optimization.  */
9869           if (x86_elf_abi == X86_64_X32_ABI
9870               && i.operands == 2
9871               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
9872                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
9873               && i.prefix[REX_PREFIX] == 0)
9874             add_prefix (REX_OPCODE);
9875 #endif
9876
9877           /* The prefix bytes.  */
9878           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
9879             if (*q)
9880               frag_opcode_byte (*q);
9881         }
9882       else
9883         {
9884           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
9885             if (*q)
9886               switch (j)
9887                 {
9888                 case SEG_PREFIX:
9889                 case ADDR_PREFIX:
9890                   frag_opcode_byte (*q);
9891                   break;
9892                 default:
9893                   /* There should be no other prefixes for instructions
9894                      with VEX prefix.  */
9895                   abort ();
9896                 }
9897
9898           /* For EVEX instructions i.vrex should become 0 after
9899              build_evex_prefix.  For VEX instructions upper 16 registers
9900              aren't available, so VREX should be 0.  */
9901           if (i.vrex)
9902             abort ();
9903           /* Now the VEX prefix.  */
9904           if (now_seg != absolute_section)
9905             {
9906               p = frag_more (i.vex.length);
9907               for (j = 0; j < i.vex.length; j++)
9908                 p[j] = i.vex.bytes[j];
9909             }
9910           else
9911             abs_section_offset += i.vex.length;
9912         }
9913
9914       /* Now the opcode; be careful about word order here!  */
9915       j = i.opcode_length;
9916       if (!i.vex.length)
9917         switch (i.tm.opcode_modifier.opcodespace)
9918           {
9919           case SPACE_BASE:
9920             break;
9921           case SPACE_0F:
9922             ++j;
9923             break;
9924           case SPACE_0F38:
9925           case SPACE_0F3A:
9926             j += 2;
9927             break;
9928           default:
9929             abort ();
9930           }
9931
9932       if (now_seg == absolute_section)
9933         abs_section_offset += j;
9934       else if (j == 1)
9935         {
9936           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
9937         }
9938       else
9939         {
9940           p = frag_more (j);
9941           if (!i.vex.length
9942               && i.tm.opcode_modifier.opcodespace != SPACE_BASE)
9943             {
9944               *p++ = 0x0f;
9945               if (i.tm.opcode_modifier.opcodespace != SPACE_0F)
9946                 *p++ = i.tm.opcode_modifier.opcodespace == SPACE_0F38
9947                        ? 0x38 : 0x3a;
9948             }
9949
9950           switch (i.opcode_length)
9951             {
9952             case 2:
9953               /* Put out high byte first: can't use md_number_to_chars!  */
9954               *p++ = (i.tm.base_opcode >> 8) & 0xff;
9955               /* Fall through.  */
9956             case 1:
9957               *p = i.tm.base_opcode & 0xff;
9958               break;
9959             default:
9960               abort ();
9961               break;
9962             }
9963
9964         }
9965
9966       /* Now the modrm byte and sib byte (if present).  */
9967       if (i.tm.opcode_modifier.modrm)
9968         {
9969           frag_opcode_byte ((i.rm.regmem << 0)
9970                              | (i.rm.reg << 3)
9971                              | (i.rm.mode << 6));
9972           /* If i.rm.regmem == ESP (4)
9973              && i.rm.mode != (Register mode)
9974              && not 16 bit
9975              ==> need second modrm byte.  */
9976           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
9977               && i.rm.mode != 3
9978               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
9979             frag_opcode_byte ((i.sib.base << 0)
9980                               | (i.sib.index << 3)
9981                               | (i.sib.scale << 6));
9982         }
9983
9984       if (i.disp_operands)
9985         output_disp (insn_start_frag, insn_start_off);
9986
9987       if (i.imm_operands)
9988         output_imm (insn_start_frag, insn_start_off);
9989
9990       /*
9991        * frag_now_fix () returning plain abs_section_offset when we're in the
9992        * absolute section, and abs_section_offset not getting updated as data
9993        * gets added to the frag breaks the logic below.
9994        */
9995       if (now_seg != absolute_section)
9996         {
9997           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
9998           if (j > 15)
9999             as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
10000                      j);
10001           else if (fragP)
10002             {
10003               /* NB: Don't add prefix with GOTPC relocation since
10004                  output_disp() above depends on the fixed encoding
10005                  length.  Can't add prefix with TLS relocation since
10006                  it breaks TLS linker optimization.  */
10007               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
10008               /* Prefix count on the current instruction.  */
10009               unsigned int count = i.vex.length;
10010               unsigned int k;
10011               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
10012                 /* REX byte is encoded in VEX/EVEX prefix.  */
10013                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
10014                   count++;
10015
10016               /* Count prefixes for extended opcode maps.  */
10017               if (!i.vex.length)
10018                 switch (i.tm.opcode_modifier.opcodespace)
10019                   {
10020                   case SPACE_BASE:
10021                     break;
10022                   case SPACE_0F:
10023                     count++;
10024                     break;
10025                   case SPACE_0F38:
10026                   case SPACE_0F3A:
10027                     count += 2;
10028                     break;
10029                   default:
10030                     abort ();
10031                   }
10032
10033               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
10034                   == BRANCH_PREFIX)
10035                 {
10036                   /* Set the maximum prefix size in BRANCH_PREFIX
10037                      frag.  */
10038                   if (fragP->tc_frag_data.max_bytes > max)
10039                     fragP->tc_frag_data.max_bytes = max;
10040                   if (fragP->tc_frag_data.max_bytes > count)
10041                     fragP->tc_frag_data.max_bytes -= count;
10042                   else
10043                     fragP->tc_frag_data.max_bytes = 0;
10044                 }
10045               else
10046                 {
10047                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
10048                      frag.  */
10049                   unsigned int max_prefix_size;
10050                   if (align_branch_prefix_size > max)
10051                     max_prefix_size = max;
10052                   else
10053                     max_prefix_size = align_branch_prefix_size;
10054                   if (max_prefix_size > count)
10055                     fragP->tc_frag_data.max_prefix_length
10056                       = max_prefix_size - count;
10057                 }
10058
10059               /* Use existing segment prefix if possible.  Use CS
10060                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
10061                  segment prefix with ESP/EBP base register and use DS
10062                  segment prefix without ESP/EBP base register.  */
10063               if (i.prefix[SEG_PREFIX])
10064                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
10065               else if (flag_code == CODE_64BIT)
10066                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
10067               else if (i.base_reg
10068                        && (i.base_reg->reg_num == 4
10069                            || i.base_reg->reg_num == 5))
10070                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
10071               else
10072                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
10073             }
10074         }
10075     }
10076
10077   /* NB: Don't work with COND_JUMP86 without i386.  */
10078   if (align_branch_power
10079       && now_seg != absolute_section
10080       && cpu_arch_flags.bitfield.cpui386)
10081     {
10082       /* Terminate each frag so that we can add prefix and check for
10083          fused jcc.  */
10084       frag_wane (frag_now);
10085       frag_new (0);
10086     }
10087
10088 #ifdef DEBUG386
10089   if (flag_debug)
10090     {
10091       pi ("" /*line*/, &i);
10092     }
10093 #endif /* DEBUG386  */
10094 }
10095
10096 /* Return the size of the displacement operand N.  */
10097
10098 static int
10099 disp_size (unsigned int n)
10100 {
10101   int size = 4;
10102
10103   if (i.types[n].bitfield.disp64)
10104     size = 8;
10105   else if (i.types[n].bitfield.disp8)
10106     size = 1;
10107   else if (i.types[n].bitfield.disp16)
10108     size = 2;
10109   return size;
10110 }
10111
10112 /* Return the size of the immediate operand N.  */
10113
10114 static int
10115 imm_size (unsigned int n)
10116 {
10117   int size = 4;
10118   if (i.types[n].bitfield.imm64)
10119     size = 8;
10120   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
10121     size = 1;
10122   else if (i.types[n].bitfield.imm16)
10123     size = 2;
10124   return size;
10125 }
10126
10127 static void
10128 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
10129 {
10130   char *p;
10131   unsigned int n;
10132
10133   for (n = 0; n < i.operands; n++)
10134     {
10135       if (operand_type_check (i.types[n], disp))
10136         {
10137           int size = disp_size (n);
10138
10139           if (now_seg == absolute_section)
10140             abs_section_offset += size;
10141           else if (i.op[n].disps->X_op == O_constant)
10142             {
10143               offsetT val = i.op[n].disps->X_add_number;
10144
10145               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
10146                                      size);
10147               p = frag_more (size);
10148               md_number_to_chars (p, val, size);
10149             }
10150           else
10151             {
10152               enum bfd_reloc_code_real reloc_type;
10153               bool pcrel = (i.flags[n] & Operand_PCrel) != 0;
10154               bool sign = (flag_code == CODE_64BIT && size == 4
10155                            && (!want_disp32 (&i.tm)
10156                                || (i.tm.opcode_modifier.jump && !i.jumpabsolute
10157                                    && !i.types[n].bitfield.baseindex)))
10158                           || pcrel;
10159               fixS *fixP;
10160
10161               /* We can't have 8 bit displacement here.  */
10162               gas_assert (!i.types[n].bitfield.disp8);
10163
10164               /* The PC relative address is computed relative
10165                  to the instruction boundary, so in case immediate
10166                  fields follows, we need to adjust the value.  */
10167               if (pcrel && i.imm_operands)
10168                 {
10169                   unsigned int n1;
10170                   int sz = 0;
10171
10172                   for (n1 = 0; n1 < i.operands; n1++)
10173                     if (operand_type_check (i.types[n1], imm))
10174                       {
10175                         /* Only one immediate is allowed for PC
10176                            relative address.  */
10177                         gas_assert (sz == 0);
10178                         sz = imm_size (n1);
10179                         i.op[n].disps->X_add_number -= sz;
10180                       }
10181                   /* We should find the immediate.  */
10182                   gas_assert (sz != 0);
10183                 }
10184
10185               p = frag_more (size);
10186               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
10187               if (GOT_symbol
10188                   && GOT_symbol == i.op[n].disps->X_add_symbol
10189                   && (((reloc_type == BFD_RELOC_32
10190                         || reloc_type == BFD_RELOC_X86_64_32S
10191                         || (reloc_type == BFD_RELOC_64
10192                             && object_64bit))
10193                        && (i.op[n].disps->X_op == O_symbol
10194                            || (i.op[n].disps->X_op == O_add
10195                                && ((symbol_get_value_expression
10196                                     (i.op[n].disps->X_op_symbol)->X_op)
10197                                    == O_subtract))))
10198                       || reloc_type == BFD_RELOC_32_PCREL))
10199                 {
10200                   if (!object_64bit)
10201                     {
10202                       reloc_type = BFD_RELOC_386_GOTPC;
10203                       i.has_gotpc_tls_reloc = true;
10204                       i.op[n].disps->X_add_number +=
10205                         encoding_length (insn_start_frag, insn_start_off, p);
10206                     }
10207                   else if (reloc_type == BFD_RELOC_64)
10208                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10209                   else
10210                     /* Don't do the adjustment for x86-64, as there
10211                        the pcrel addressing is relative to the _next_
10212                        insn, and that is taken care of in other code.  */
10213                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10214                 }
10215               else if (align_branch_power)
10216                 {
10217                   switch (reloc_type)
10218                     {
10219                     case BFD_RELOC_386_TLS_GD:
10220                     case BFD_RELOC_386_TLS_LDM:
10221                     case BFD_RELOC_386_TLS_IE:
10222                     case BFD_RELOC_386_TLS_IE_32:
10223                     case BFD_RELOC_386_TLS_GOTIE:
10224                     case BFD_RELOC_386_TLS_GOTDESC:
10225                     case BFD_RELOC_386_TLS_DESC_CALL:
10226                     case BFD_RELOC_X86_64_TLSGD:
10227                     case BFD_RELOC_X86_64_TLSLD:
10228                     case BFD_RELOC_X86_64_GOTTPOFF:
10229                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
10230                     case BFD_RELOC_X86_64_TLSDESC_CALL:
10231                       i.has_gotpc_tls_reloc = true;
10232                     default:
10233                       break;
10234                     }
10235                 }
10236               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
10237                                   size, i.op[n].disps, pcrel,
10238                                   reloc_type);
10239
10240               if (flag_code == CODE_64BIT && size == 4 && pcrel
10241                   && !i.prefix[ADDR_PREFIX])
10242                 fixP->fx_signed = 1;
10243
10244               /* Check for "call/jmp *mem", "mov mem, %reg",
10245                  "test %reg, mem" and "binop mem, %reg" where binop
10246                  is one of adc, add, and, cmp, or, sbb, sub, xor
10247                  instructions without data prefix.  Always generate
10248                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
10249               if (i.prefix[DATA_PREFIX] == 0
10250                   && (generate_relax_relocations
10251                       || (!object_64bit
10252                           && i.rm.mode == 0
10253                           && i.rm.regmem == 5))
10254                   && (i.rm.mode == 2
10255                       || (i.rm.mode == 0 && i.rm.regmem == 5))
10256                   && i.tm.opcode_modifier.opcodespace == SPACE_BASE
10257                   && ((i.operands == 1
10258                        && i.tm.base_opcode == 0xff
10259                        && (i.rm.reg == 2 || i.rm.reg == 4))
10260                       || (i.operands == 2
10261                           && (i.tm.base_opcode == 0x8b
10262                               || i.tm.base_opcode == 0x85
10263                               || (i.tm.base_opcode & ~0x38) == 0x03))))
10264                 {
10265                   if (object_64bit)
10266                     {
10267                       fixP->fx_tcbit = i.rex != 0;
10268                       if (i.base_reg
10269                           && (i.base_reg->reg_num == RegIP))
10270                       fixP->fx_tcbit2 = 1;
10271                     }
10272                   else
10273                     fixP->fx_tcbit2 = 1;
10274                 }
10275             }
10276         }
10277     }
10278 }
10279
10280 static void
10281 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
10282 {
10283   char *p;
10284   unsigned int n;
10285
10286   for (n = 0; n < i.operands; n++)
10287     {
10288       if (operand_type_check (i.types[n], imm))
10289         {
10290           int size = imm_size (n);
10291
10292           if (now_seg == absolute_section)
10293             abs_section_offset += size;
10294           else if (i.op[n].imms->X_op == O_constant)
10295             {
10296               offsetT val;
10297
10298               val = offset_in_range (i.op[n].imms->X_add_number,
10299                                      size);
10300               p = frag_more (size);
10301               md_number_to_chars (p, val, size);
10302             }
10303           else
10304             {
10305               /* Not absolute_section.
10306                  Need a 32-bit fixup (don't support 8bit
10307                  non-absolute imms).  Try to support other
10308                  sizes ...  */
10309               enum bfd_reloc_code_real reloc_type;
10310               int sign;
10311
10312               if (i.types[n].bitfield.imm32s
10313                   && (i.suffix == QWORD_MNEM_SUFFIX
10314                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)))
10315                 sign = 1;
10316               else
10317                 sign = 0;
10318
10319               p = frag_more (size);
10320               reloc_type = reloc (size, 0, sign, i.reloc[n]);
10321
10322               /*   This is tough to explain.  We end up with this one if we
10323                * have operands that look like
10324                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
10325                * obtain the absolute address of the GOT, and it is strongly
10326                * preferable from a performance point of view to avoid using
10327                * a runtime relocation for this.  The actual sequence of
10328                * instructions often look something like:
10329                *
10330                *        call    .L66
10331                * .L66:
10332                *        popl    %ebx
10333                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
10334                *
10335                *   The call and pop essentially return the absolute address
10336                * of the label .L66 and store it in %ebx.  The linker itself
10337                * will ultimately change the first operand of the addl so
10338                * that %ebx points to the GOT, but to keep things simple, the
10339                * .o file must have this operand set so that it generates not
10340                * the absolute address of .L66, but the absolute address of
10341                * itself.  This allows the linker itself simply treat a GOTPC
10342                * relocation as asking for a pcrel offset to the GOT to be
10343                * added in, and the addend of the relocation is stored in the
10344                * operand field for the instruction itself.
10345                *
10346                *   Our job here is to fix the operand so that it would add
10347                * the correct offset so that %ebx would point to itself.  The
10348                * thing that is tricky is that .-.L66 will point to the
10349                * beginning of the instruction, so we need to further modify
10350                * the operand so that it will point to itself.  There are
10351                * other cases where you have something like:
10352                *
10353                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
10354                *
10355                * and here no correction would be required.  Internally in
10356                * the assembler we treat operands of this form as not being
10357                * pcrel since the '.' is explicitly mentioned, and I wonder
10358                * whether it would simplify matters to do it this way.  Who
10359                * knows.  In earlier versions of the PIC patches, the
10360                * pcrel_adjust field was used to store the correction, but
10361                * since the expression is not pcrel, I felt it would be
10362                * confusing to do it this way.  */
10363
10364               if ((reloc_type == BFD_RELOC_32
10365                    || reloc_type == BFD_RELOC_X86_64_32S
10366                    || reloc_type == BFD_RELOC_64)
10367                   && GOT_symbol
10368                   && GOT_symbol == i.op[n].imms->X_add_symbol
10369                   && (i.op[n].imms->X_op == O_symbol
10370                       || (i.op[n].imms->X_op == O_add
10371                           && ((symbol_get_value_expression
10372                                (i.op[n].imms->X_op_symbol)->X_op)
10373                               == O_subtract))))
10374                 {
10375                   if (!object_64bit)
10376                     reloc_type = BFD_RELOC_386_GOTPC;
10377                   else if (size == 4)
10378                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10379                   else if (size == 8)
10380                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10381                   i.has_gotpc_tls_reloc = true;
10382                   i.op[n].imms->X_add_number +=
10383                     encoding_length (insn_start_frag, insn_start_off, p);
10384                 }
10385               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
10386                            i.op[n].imms, 0, reloc_type);
10387             }
10388         }
10389     }
10390 }
10391 \f
10392 /* x86_cons_fix_new is called via the expression parsing code when a
10393    reloc is needed.  We use this hook to get the correct .got reloc.  */
10394 static int cons_sign = -1;
10395
10396 void
10397 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
10398                   expressionS *exp, bfd_reloc_code_real_type r)
10399 {
10400   r = reloc (len, 0, cons_sign, r);
10401
10402 #ifdef TE_PE
10403   if (exp->X_op == O_secrel)
10404     {
10405       exp->X_op = O_symbol;
10406       r = BFD_RELOC_32_SECREL;
10407     }
10408   else if (exp->X_op == O_secidx)
10409     r = BFD_RELOC_16_SECIDX;
10410 #endif
10411
10412   fix_new_exp (frag, off, len, exp, 0, r);
10413 }
10414
10415 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
10416    purpose of the `.dc.a' internal pseudo-op.  */
10417
10418 int
10419 x86_address_bytes (void)
10420 {
10421   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
10422     return 4;
10423   return stdoutput->arch_info->bits_per_address / 8;
10424 }
10425
10426 #if (!(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
10427      || defined (LEX_AT)) && !defined (TE_PE)
10428 # define lex_got(reloc, adjust, types) NULL
10429 #else
10430 /* Parse operands of the form
10431    <symbol>@GOTOFF+<nnn>
10432    and similar .plt or .got references.
10433
10434    If we find one, set up the correct relocation in RELOC and copy the
10435    input string, minus the `@GOTOFF' into a malloc'd buffer for
10436    parsing by the calling routine.  Return this buffer, and if ADJUST
10437    is non-null set it to the length of the string we removed from the
10438    input line.  Otherwise return NULL.  */
10439 static char *
10440 lex_got (enum bfd_reloc_code_real *rel,
10441          int *adjust,
10442          i386_operand_type *types)
10443 {
10444   /* Some of the relocations depend on the size of what field is to
10445      be relocated.  But in our callers i386_immediate and i386_displacement
10446      we don't yet know the operand size (this will be set by insn
10447      matching).  Hence we record the word32 relocation here,
10448      and adjust the reloc according to the real size in reloc().  */
10449   static const struct
10450   {
10451     const char *str;
10452     int len;
10453     const enum bfd_reloc_code_real rel[2];
10454     const i386_operand_type types64;
10455     bool need_GOT_symbol;
10456   }
10457     gotrel[] =
10458   {
10459
10460 #define OPERAND_TYPE_IMM32_32S_DISP32 { .bitfield = \
10461   { .imm32 = 1, .imm32s = 1, .disp32 = 1 } }
10462 #define OPERAND_TYPE_IMM32_32S_64_DISP32 { .bitfield = \
10463   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1 } }
10464 #define OPERAND_TYPE_IMM32_32S_64_DISP32_64 { .bitfield = \
10465   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1, .disp64 = 1 } }
10466 #define OPERAND_TYPE_IMM64_DISP64 { .bitfield = \
10467   { .imm64 = 1, .disp64 = 1 } }
10468
10469 #ifndef TE_PE
10470 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10471     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
10472                                         BFD_RELOC_SIZE32 },
10473       { .bitfield = { .imm32 = 1, .imm64 = 1 } }, false },
10474 #endif
10475     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
10476                                        BFD_RELOC_X86_64_PLTOFF64 },
10477       { .bitfield = { .imm64 = 1 } }, true },
10478     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
10479                                        BFD_RELOC_X86_64_PLT32    },
10480       OPERAND_TYPE_IMM32_32S_DISP32, false },
10481     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
10482                                        BFD_RELOC_X86_64_GOTPLT64 },
10483       OPERAND_TYPE_IMM64_DISP64, true },
10484     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
10485                                        BFD_RELOC_X86_64_GOTOFF64 },
10486       OPERAND_TYPE_IMM64_DISP64, true },
10487     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
10488                                        BFD_RELOC_X86_64_GOTPCREL },
10489       OPERAND_TYPE_IMM32_32S_DISP32, true },
10490     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
10491                                        BFD_RELOC_X86_64_TLSGD    },
10492       OPERAND_TYPE_IMM32_32S_DISP32, true },
10493     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
10494                                        _dummy_first_bfd_reloc_code_real },
10495       OPERAND_TYPE_NONE, true },
10496     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
10497                                        BFD_RELOC_X86_64_TLSLD    },
10498       OPERAND_TYPE_IMM32_32S_DISP32, true },
10499     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
10500                                        BFD_RELOC_X86_64_GOTTPOFF },
10501       OPERAND_TYPE_IMM32_32S_DISP32, true },
10502     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
10503                                        BFD_RELOC_X86_64_TPOFF32  },
10504       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10505     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
10506                                        _dummy_first_bfd_reloc_code_real },
10507       OPERAND_TYPE_NONE, true },
10508     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
10509                                        BFD_RELOC_X86_64_DTPOFF32 },
10510       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10511     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
10512                                        _dummy_first_bfd_reloc_code_real },
10513       OPERAND_TYPE_NONE, true },
10514     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
10515                                        _dummy_first_bfd_reloc_code_real },
10516       OPERAND_TYPE_NONE, true },
10517     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
10518                                        BFD_RELOC_X86_64_GOT32    },
10519       OPERAND_TYPE_IMM32_32S_64_DISP32, true },
10520     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
10521                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
10522       OPERAND_TYPE_IMM32_32S_DISP32, true },
10523     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
10524                                        BFD_RELOC_X86_64_TLSDESC_CALL },
10525       OPERAND_TYPE_IMM32_32S_DISP32, true },
10526 #else /* TE_PE */
10527     { STRING_COMMA_LEN ("SECREL32"), { BFD_RELOC_32_SECREL,
10528                                        BFD_RELOC_32_SECREL },
10529       OPERAND_TYPE_IMM32_32S_64_DISP32_64, false },
10530 #endif
10531
10532 #undef OPERAND_TYPE_IMM32_32S_DISP32
10533 #undef OPERAND_TYPE_IMM32_32S_64_DISP32
10534 #undef OPERAND_TYPE_IMM32_32S_64_DISP32_64
10535 #undef OPERAND_TYPE_IMM64_DISP64
10536
10537   };
10538   char *cp;
10539   unsigned int j;
10540
10541 #if defined (OBJ_MAYBE_ELF) && !defined (TE_PE)
10542   if (!IS_ELF)
10543     return NULL;
10544 #endif
10545
10546   for (cp = input_line_pointer; *cp != '@'; cp++)
10547     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
10548       return NULL;
10549
10550   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
10551     {
10552       int len = gotrel[j].len;
10553       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
10554         {
10555           if (gotrel[j].rel[object_64bit] != 0)
10556             {
10557               int first, second;
10558               char *tmpbuf, *past_reloc;
10559
10560               *rel = gotrel[j].rel[object_64bit];
10561
10562               if (types)
10563                 {
10564                   if (flag_code != CODE_64BIT)
10565                     {
10566                       types->bitfield.imm32 = 1;
10567                       types->bitfield.disp32 = 1;
10568                     }
10569                   else
10570                     *types = gotrel[j].types64;
10571                 }
10572
10573               if (gotrel[j].need_GOT_symbol && GOT_symbol == NULL)
10574                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
10575
10576               /* The length of the first part of our input line.  */
10577               first = cp - input_line_pointer;
10578
10579               /* The second part goes from after the reloc token until
10580                  (and including) an end_of_line char or comma.  */
10581               past_reloc = cp + 1 + len;
10582               cp = past_reloc;
10583               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
10584                 ++cp;
10585               second = cp + 1 - past_reloc;
10586
10587               /* Allocate and copy string.  The trailing NUL shouldn't
10588                  be necessary, but be safe.  */
10589               tmpbuf = XNEWVEC (char, first + second + 2);
10590               memcpy (tmpbuf, input_line_pointer, first);
10591               if (second != 0 && *past_reloc != ' ')
10592                 /* Replace the relocation token with ' ', so that
10593                    errors like foo@GOTOFF1 will be detected.  */
10594                 tmpbuf[first++] = ' ';
10595               else
10596                 /* Increment length by 1 if the relocation token is
10597                    removed.  */
10598                 len++;
10599               if (adjust)
10600                 *adjust = len;
10601               memcpy (tmpbuf + first, past_reloc, second);
10602               tmpbuf[first + second] = '\0';
10603               return tmpbuf;
10604             }
10605
10606           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10607                   gotrel[j].str, 1 << (5 + object_64bit));
10608           return NULL;
10609         }
10610     }
10611
10612   /* Might be a symbol version string.  Don't as_bad here.  */
10613   return NULL;
10614 }
10615 #endif
10616
10617 bfd_reloc_code_real_type
10618 x86_cons (expressionS *exp, int size)
10619 {
10620   bfd_reloc_code_real_type got_reloc = NO_RELOC;
10621
10622 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
10623       && !defined (LEX_AT)) \
10624     || defined (TE_PE)
10625   intel_syntax = -intel_syntax;
10626
10627   exp->X_md = 0;
10628   if (size == 4 || (object_64bit && size == 8))
10629     {
10630       /* Handle @GOTOFF and the like in an expression.  */
10631       char *save;
10632       char *gotfree_input_line;
10633       int adjust = 0;
10634
10635       save = input_line_pointer;
10636       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
10637       if (gotfree_input_line)
10638         input_line_pointer = gotfree_input_line;
10639
10640       expression (exp);
10641
10642       if (gotfree_input_line)
10643         {
10644           /* expression () has merrily parsed up to the end of line,
10645              or a comma - in the wrong buffer.  Transfer how far
10646              input_line_pointer has moved to the right buffer.  */
10647           input_line_pointer = (save
10648                                 + (input_line_pointer - gotfree_input_line)
10649                                 + adjust);
10650           free (gotfree_input_line);
10651           if (exp->X_op == O_constant
10652               || exp->X_op == O_absent
10653               || exp->X_op == O_illegal
10654               || exp->X_op == O_register
10655               || exp->X_op == O_big)
10656             {
10657               char c = *input_line_pointer;
10658               *input_line_pointer = 0;
10659               as_bad (_("missing or invalid expression `%s'"), save);
10660               *input_line_pointer = c;
10661             }
10662           else if ((got_reloc == BFD_RELOC_386_PLT32
10663                     || got_reloc == BFD_RELOC_X86_64_PLT32)
10664                    && exp->X_op != O_symbol)
10665             {
10666               char c = *input_line_pointer;
10667               *input_line_pointer = 0;
10668               as_bad (_("invalid PLT expression `%s'"), save);
10669               *input_line_pointer = c;
10670             }
10671         }
10672     }
10673   else
10674     expression (exp);
10675
10676   intel_syntax = -intel_syntax;
10677
10678   if (intel_syntax)
10679     i386_intel_simplify (exp);
10680 #else
10681   expression (exp);
10682 #endif
10683
10684   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
10685   if (size == 4 && exp->X_op == O_constant && !object_64bit)
10686     exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10687
10688   return got_reloc;
10689 }
10690
10691 static void
10692 signed_cons (int size)
10693 {
10694   if (object_64bit)
10695     cons_sign = 1;
10696   cons (size);
10697   cons_sign = -1;
10698 }
10699
10700 #ifdef TE_PE
10701 static void
10702 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
10703 {
10704   expressionS exp;
10705
10706   do
10707     {
10708       expression (&exp);
10709       if (exp.X_op == O_symbol)
10710         exp.X_op = O_secrel;
10711
10712       emit_expr (&exp, 4);
10713     }
10714   while (*input_line_pointer++ == ',');
10715
10716   input_line_pointer--;
10717   demand_empty_rest_of_line ();
10718 }
10719
10720 static void
10721 pe_directive_secidx (int dummy ATTRIBUTE_UNUSED)
10722 {
10723   expressionS exp;
10724
10725   do
10726     {
10727       expression (&exp);
10728       if (exp.X_op == O_symbol)
10729         exp.X_op = O_secidx;
10730
10731       emit_expr (&exp, 2);
10732     }
10733   while (*input_line_pointer++ == ',');
10734
10735   input_line_pointer--;
10736   demand_empty_rest_of_line ();
10737 }
10738 #endif
10739
10740 /* Handle Rounding Control / SAE specifiers.  */
10741
10742 static char *
10743 RC_SAE_specifier (const char *pstr)
10744 {
10745   unsigned int j;
10746
10747   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
10748     {
10749       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
10750         {
10751           if (i.rounding.type != rc_none)
10752             {
10753               as_bad (_("duplicated `{%s}'"), RC_NamesTable[j].name);
10754               return NULL;
10755             }
10756
10757           i.rounding.type = RC_NamesTable[j].type;
10758
10759           return (char *)(pstr + RC_NamesTable[j].len);
10760         }
10761     }
10762
10763   return NULL;
10764 }
10765
10766 /* Handle Vector operations.  */
10767
10768 static char *
10769 check_VecOperations (char *op_string)
10770 {
10771   const reg_entry *mask;
10772   const char *saved;
10773   char *end_op;
10774
10775   while (*op_string)
10776     {
10777       saved = op_string;
10778       if (*op_string == '{')
10779         {
10780           op_string++;
10781
10782           /* Check broadcasts.  */
10783           if (startswith (op_string, "1to"))
10784             {
10785               unsigned int bcst_type;
10786
10787               if (i.broadcast.type)
10788                 goto duplicated_vec_op;
10789
10790               op_string += 3;
10791               if (*op_string == '8')
10792                 bcst_type = 8;
10793               else if (*op_string == '4')
10794                 bcst_type = 4;
10795               else if (*op_string == '2')
10796                 bcst_type = 2;
10797               else if (*op_string == '1'
10798                        && *(op_string+1) == '6')
10799                 {
10800                   bcst_type = 16;
10801                   op_string++;
10802                 }
10803               else if (*op_string == '3'
10804                        && *(op_string+1) == '2')
10805                 {
10806                   bcst_type = 32;
10807                   op_string++;
10808                 }
10809               else
10810                 {
10811                   as_bad (_("Unsupported broadcast: `%s'"), saved);
10812                   return NULL;
10813                 }
10814               op_string++;
10815
10816               i.broadcast.type = bcst_type;
10817               i.broadcast.operand = this_operand;
10818             }
10819           /* Check masking operation.  */
10820           else if ((mask = parse_register (op_string, &end_op)) != NULL)
10821             {
10822               if (mask == &bad_reg)
10823                 return NULL;
10824
10825               /* k0 can't be used for write mask.  */
10826               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
10827                 {
10828                   as_bad (_("`%s%s' can't be used for write mask"),
10829                           register_prefix, mask->reg_name);
10830                   return NULL;
10831                 }
10832
10833               if (!i.mask.reg)
10834                 {
10835                   i.mask.reg = mask;
10836                   i.mask.operand = this_operand;
10837                 }
10838               else if (i.mask.reg->reg_num)
10839                 goto duplicated_vec_op;
10840               else
10841                 {
10842                   i.mask.reg = mask;
10843
10844                   /* Only "{z}" is allowed here.  No need to check
10845                      zeroing mask explicitly.  */
10846                   if (i.mask.operand != (unsigned int) this_operand)
10847                     {
10848                       as_bad (_("invalid write mask `%s'"), saved);
10849                       return NULL;
10850                     }
10851                 }
10852
10853               op_string = end_op;
10854             }
10855           /* Check zeroing-flag for masking operation.  */
10856           else if (*op_string == 'z')
10857             {
10858               if (!i.mask.reg)
10859                 {
10860                   i.mask.reg = reg_k0;
10861                   i.mask.zeroing = 1;
10862                   i.mask.operand = this_operand;
10863                 }
10864               else
10865                 {
10866                   if (i.mask.zeroing)
10867                     {
10868                     duplicated_vec_op:
10869                       as_bad (_("duplicated `%s'"), saved);
10870                       return NULL;
10871                     }
10872
10873                   i.mask.zeroing = 1;
10874
10875                   /* Only "{%k}" is allowed here.  No need to check mask
10876                      register explicitly.  */
10877                   if (i.mask.operand != (unsigned int) this_operand)
10878                     {
10879                       as_bad (_("invalid zeroing-masking `%s'"),
10880                               saved);
10881                       return NULL;
10882                     }
10883                 }
10884
10885               op_string++;
10886             }
10887           else if (intel_syntax
10888                    && (op_string = RC_SAE_specifier (op_string)) != NULL)
10889             i.rounding.modifier = true;
10890           else
10891             goto unknown_vec_op;
10892
10893           if (*op_string != '}')
10894             {
10895               as_bad (_("missing `}' in `%s'"), saved);
10896               return NULL;
10897             }
10898           op_string++;
10899
10900           /* Strip whitespace since the addition of pseudo prefixes
10901              changed how the scrubber treats '{'.  */
10902           if (is_space_char (*op_string))
10903             ++op_string;
10904
10905           continue;
10906         }
10907     unknown_vec_op:
10908       /* We don't know this one.  */
10909       as_bad (_("unknown vector operation: `%s'"), saved);
10910       return NULL;
10911     }
10912
10913   if (i.mask.reg && i.mask.zeroing && !i.mask.reg->reg_num)
10914     {
10915       as_bad (_("zeroing-masking only allowed with write mask"));
10916       return NULL;
10917     }
10918
10919   return op_string;
10920 }
10921
10922 static int
10923 i386_immediate (char *imm_start)
10924 {
10925   char *save_input_line_pointer;
10926   char *gotfree_input_line;
10927   segT exp_seg = 0;
10928   expressionS *exp;
10929   i386_operand_type types;
10930
10931   operand_type_set (&types, ~0);
10932
10933   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
10934     {
10935       as_bad (_("at most %d immediate operands are allowed"),
10936               MAX_IMMEDIATE_OPERANDS);
10937       return 0;
10938     }
10939
10940   exp = &im_expressions[i.imm_operands++];
10941   i.op[this_operand].imms = exp;
10942
10943   if (is_space_char (*imm_start))
10944     ++imm_start;
10945
10946   save_input_line_pointer = input_line_pointer;
10947   input_line_pointer = imm_start;
10948
10949   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
10950   if (gotfree_input_line)
10951     input_line_pointer = gotfree_input_line;
10952
10953   exp_seg = expression (exp);
10954
10955   SKIP_WHITESPACE ();
10956   if (*input_line_pointer)
10957     as_bad (_("junk `%s' after expression"), input_line_pointer);
10958
10959   input_line_pointer = save_input_line_pointer;
10960   if (gotfree_input_line)
10961     {
10962       free (gotfree_input_line);
10963
10964       if (exp->X_op == O_constant)
10965         exp->X_op = O_illegal;
10966     }
10967
10968   if (exp_seg == reg_section)
10969     {
10970       as_bad (_("illegal immediate register operand %s"), imm_start);
10971       return 0;
10972     }
10973
10974   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
10975 }
10976
10977 static int
10978 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
10979                          i386_operand_type types, const char *imm_start)
10980 {
10981   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
10982     {
10983       if (imm_start)
10984         as_bad (_("missing or invalid immediate expression `%s'"),
10985                 imm_start);
10986       return 0;
10987     }
10988   else if (exp->X_op == O_constant)
10989     {
10990       /* Size it properly later.  */
10991       i.types[this_operand].bitfield.imm64 = 1;
10992
10993       /* If not 64bit, sign/zero extend val, to account for wraparound
10994          when !BFD64.  */
10995       if (flag_code != CODE_64BIT)
10996         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10997     }
10998 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
10999   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
11000            && exp_seg != absolute_section
11001            && exp_seg != text_section
11002            && exp_seg != data_section
11003            && exp_seg != bss_section
11004            && exp_seg != undefined_section
11005            && !bfd_is_com_section (exp_seg))
11006     {
11007       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
11008       return 0;
11009     }
11010 #endif
11011   else
11012     {
11013       /* This is an address.  The size of the address will be
11014          determined later, depending on destination register,
11015          suffix, or the default for the section.  */
11016       i.types[this_operand].bitfield.imm8 = 1;
11017       i.types[this_operand].bitfield.imm16 = 1;
11018       i.types[this_operand].bitfield.imm32 = 1;
11019       i.types[this_operand].bitfield.imm32s = 1;
11020       i.types[this_operand].bitfield.imm64 = 1;
11021       i.types[this_operand] = operand_type_and (i.types[this_operand],
11022                                                 types);
11023     }
11024
11025   return 1;
11026 }
11027
11028 static char *
11029 i386_scale (char *scale)
11030 {
11031   offsetT val;
11032   char *save = input_line_pointer;
11033
11034   input_line_pointer = scale;
11035   val = get_absolute_expression ();
11036
11037   switch (val)
11038     {
11039     case 1:
11040       i.log2_scale_factor = 0;
11041       break;
11042     case 2:
11043       i.log2_scale_factor = 1;
11044       break;
11045     case 4:
11046       i.log2_scale_factor = 2;
11047       break;
11048     case 8:
11049       i.log2_scale_factor = 3;
11050       break;
11051     default:
11052       {
11053         char sep = *input_line_pointer;
11054
11055         *input_line_pointer = '\0';
11056         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
11057                 scale);
11058         *input_line_pointer = sep;
11059         input_line_pointer = save;
11060         return NULL;
11061       }
11062     }
11063   if (i.log2_scale_factor != 0 && i.index_reg == 0)
11064     {
11065       as_warn (_("scale factor of %d without an index register"),
11066                1 << i.log2_scale_factor);
11067       i.log2_scale_factor = 0;
11068     }
11069   scale = input_line_pointer;
11070   input_line_pointer = save;
11071   return scale;
11072 }
11073
11074 static int
11075 i386_displacement (char *disp_start, char *disp_end)
11076 {
11077   expressionS *exp;
11078   segT exp_seg = 0;
11079   char *save_input_line_pointer;
11080   char *gotfree_input_line;
11081   int override;
11082   i386_operand_type bigdisp, types = anydisp;
11083   int ret;
11084
11085   if (i.disp_operands == MAX_MEMORY_OPERANDS)
11086     {
11087       as_bad (_("at most %d displacement operands are allowed"),
11088               MAX_MEMORY_OPERANDS);
11089       return 0;
11090     }
11091
11092   operand_type_set (&bigdisp, 0);
11093   if (i.jumpabsolute
11094       || i.types[this_operand].bitfield.baseindex
11095       || (current_templates->start->opcode_modifier.jump != JUMP
11096           && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
11097     {
11098       i386_addressing_mode ();
11099       override = (i.prefix[ADDR_PREFIX] != 0);
11100       if (flag_code == CODE_64BIT)
11101         {
11102           bigdisp.bitfield.disp32 = 1;
11103           if (!override)
11104             bigdisp.bitfield.disp64 = 1;
11105         }
11106       else if ((flag_code == CODE_16BIT) ^ override)
11107           bigdisp.bitfield.disp16 = 1;
11108       else
11109           bigdisp.bitfield.disp32 = 1;
11110     }
11111   else
11112     {
11113       /* For PC-relative branches, the width of the displacement may be
11114          dependent upon data size, but is never dependent upon address size.
11115          Also make sure to not unintentionally match against a non-PC-relative
11116          branch template.  */
11117       static templates aux_templates;
11118       const insn_template *t = current_templates->start;
11119       bool has_intel64 = false;
11120
11121       aux_templates.start = t;
11122       while (++t < current_templates->end)
11123         {
11124           if (t->opcode_modifier.jump
11125               != current_templates->start->opcode_modifier.jump)
11126             break;
11127           if ((t->opcode_modifier.isa64 >= INTEL64))
11128             has_intel64 = true;
11129         }
11130       if (t < current_templates->end)
11131         {
11132           aux_templates.end = t;
11133           current_templates = &aux_templates;
11134         }
11135
11136       override = (i.prefix[DATA_PREFIX] != 0);
11137       if (flag_code == CODE_64BIT)
11138         {
11139           if ((override || i.suffix == WORD_MNEM_SUFFIX)
11140               && (!intel64 || !has_intel64))
11141             bigdisp.bitfield.disp16 = 1;
11142           else
11143             bigdisp.bitfield.disp32 = 1;
11144         }
11145       else
11146         {
11147           if (!override)
11148             override = (i.suffix == (flag_code != CODE_16BIT
11149                                      ? WORD_MNEM_SUFFIX
11150                                      : LONG_MNEM_SUFFIX));
11151           bigdisp.bitfield.disp32 = 1;
11152           if ((flag_code == CODE_16BIT) ^ override)
11153             {
11154               bigdisp.bitfield.disp32 = 0;
11155               bigdisp.bitfield.disp16 = 1;
11156             }
11157         }
11158     }
11159   i.types[this_operand] = operand_type_or (i.types[this_operand],
11160                                            bigdisp);
11161
11162   exp = &disp_expressions[i.disp_operands];
11163   i.op[this_operand].disps = exp;
11164   i.disp_operands++;
11165   save_input_line_pointer = input_line_pointer;
11166   input_line_pointer = disp_start;
11167   END_STRING_AND_SAVE (disp_end);
11168
11169 #ifndef GCC_ASM_O_HACK
11170 #define GCC_ASM_O_HACK 0
11171 #endif
11172 #if GCC_ASM_O_HACK
11173   END_STRING_AND_SAVE (disp_end + 1);
11174   if (i.types[this_operand].bitfield.baseIndex
11175       && displacement_string_end[-1] == '+')
11176     {
11177       /* This hack is to avoid a warning when using the "o"
11178          constraint within gcc asm statements.
11179          For instance:
11180
11181          #define _set_tssldt_desc(n,addr,limit,type) \
11182          __asm__ __volatile__ ( \
11183          "movw %w2,%0\n\t" \
11184          "movw %w1,2+%0\n\t" \
11185          "rorl $16,%1\n\t" \
11186          "movb %b1,4+%0\n\t" \
11187          "movb %4,5+%0\n\t" \
11188          "movb $0,6+%0\n\t" \
11189          "movb %h1,7+%0\n\t" \
11190          "rorl $16,%1" \
11191          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
11192
11193          This works great except that the output assembler ends
11194          up looking a bit weird if it turns out that there is
11195          no offset.  You end up producing code that looks like:
11196
11197          #APP
11198          movw $235,(%eax)
11199          movw %dx,2+(%eax)
11200          rorl $16,%edx
11201          movb %dl,4+(%eax)
11202          movb $137,5+(%eax)
11203          movb $0,6+(%eax)
11204          movb %dh,7+(%eax)
11205          rorl $16,%edx
11206          #NO_APP
11207
11208          So here we provide the missing zero.  */
11209
11210       *displacement_string_end = '0';
11211     }
11212 #endif
11213   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
11214   if (gotfree_input_line)
11215     input_line_pointer = gotfree_input_line;
11216
11217   exp_seg = expression (exp);
11218
11219   SKIP_WHITESPACE ();
11220   if (*input_line_pointer)
11221     as_bad (_("junk `%s' after expression"), input_line_pointer);
11222 #if GCC_ASM_O_HACK
11223   RESTORE_END_STRING (disp_end + 1);
11224 #endif
11225   input_line_pointer = save_input_line_pointer;
11226   if (gotfree_input_line)
11227     {
11228       free (gotfree_input_line);
11229
11230       if (exp->X_op == O_constant || exp->X_op == O_register)
11231         exp->X_op = O_illegal;
11232     }
11233
11234   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
11235
11236   RESTORE_END_STRING (disp_end);
11237
11238   return ret;
11239 }
11240
11241 static int
11242 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
11243                             i386_operand_type types, const char *disp_start)
11244 {
11245   int ret = 1;
11246
11247   /* We do this to make sure that the section symbol is in
11248      the symbol table.  We will ultimately change the relocation
11249      to be relative to the beginning of the section.  */
11250   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
11251       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
11252       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11253     {
11254       if (exp->X_op != O_symbol)
11255         goto inv_disp;
11256
11257       if (S_IS_LOCAL (exp->X_add_symbol)
11258           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
11259           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
11260         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
11261       exp->X_op = O_subtract;
11262       exp->X_op_symbol = GOT_symbol;
11263       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
11264         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
11265       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
11266         i.reloc[this_operand] = BFD_RELOC_64;
11267       else
11268         i.reloc[this_operand] = BFD_RELOC_32;
11269     }
11270
11271   else if (exp->X_op == O_absent
11272            || exp->X_op == O_illegal
11273            || exp->X_op == O_big)
11274     {
11275     inv_disp:
11276       as_bad (_("missing or invalid displacement expression `%s'"),
11277               disp_start);
11278       ret = 0;
11279     }
11280
11281   else if (exp->X_op == O_constant)
11282     {
11283       /* Sizing gets taken care of by optimize_disp().
11284
11285          If not 64bit, sign/zero extend val, to account for wraparound
11286          when !BFD64.  */
11287       if (flag_code != CODE_64BIT)
11288         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
11289     }
11290
11291 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
11292   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
11293            && exp_seg != absolute_section
11294            && exp_seg != text_section
11295            && exp_seg != data_section
11296            && exp_seg != bss_section
11297            && exp_seg != undefined_section
11298            && !bfd_is_com_section (exp_seg))
11299     {
11300       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
11301       ret = 0;
11302     }
11303 #endif
11304
11305   else if (current_templates->start->opcode_modifier.jump == JUMP_BYTE)
11306     i.types[this_operand].bitfield.disp8 = 1;
11307
11308   /* Check if this is a displacement only operand.  */
11309   if (!i.types[this_operand].bitfield.baseindex)
11310     i.types[this_operand] =
11311       operand_type_or (operand_type_and_not (i.types[this_operand], anydisp),
11312                        operand_type_and (i.types[this_operand], types));
11313
11314   return ret;
11315 }
11316
11317 /* Return the active addressing mode, taking address override and
11318    registers forming the address into consideration.  Update the
11319    address override prefix if necessary.  */
11320
11321 static enum flag_code
11322 i386_addressing_mode (void)
11323 {
11324   enum flag_code addr_mode;
11325
11326   if (i.prefix[ADDR_PREFIX])
11327     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
11328   else if (flag_code == CODE_16BIT
11329            && current_templates->start->cpu_flags.bitfield.cpumpx
11330            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
11331               from md_assemble() by "is not a valid base/index expression"
11332               when there is a base and/or index.  */
11333            && !i.types[this_operand].bitfield.baseindex)
11334     {
11335       /* MPX insn memory operands with neither base nor index must be forced
11336          to use 32-bit addressing in 16-bit mode.  */
11337       addr_mode = CODE_32BIT;
11338       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11339       ++i.prefixes;
11340       gas_assert (!i.types[this_operand].bitfield.disp16);
11341       gas_assert (!i.types[this_operand].bitfield.disp32);
11342     }
11343   else
11344     {
11345       addr_mode = flag_code;
11346
11347 #if INFER_ADDR_PREFIX
11348       if (i.mem_operands == 0)
11349         {
11350           /* Infer address prefix from the first memory operand.  */
11351           const reg_entry *addr_reg = i.base_reg;
11352
11353           if (addr_reg == NULL)
11354             addr_reg = i.index_reg;
11355
11356           if (addr_reg)
11357             {
11358               if (addr_reg->reg_type.bitfield.dword)
11359                 addr_mode = CODE_32BIT;
11360               else if (flag_code != CODE_64BIT
11361                        && addr_reg->reg_type.bitfield.word)
11362                 addr_mode = CODE_16BIT;
11363
11364               if (addr_mode != flag_code)
11365                 {
11366                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
11367                   i.prefixes += 1;
11368                   /* Change the size of any displacement too.  At most one
11369                      of Disp16 or Disp32 is set.
11370                      FIXME.  There doesn't seem to be any real need for
11371                      separate Disp16 and Disp32 flags.  The same goes for
11372                      Imm16 and Imm32.  Removing them would probably clean
11373                      up the code quite a lot.  */
11374                   if (flag_code != CODE_64BIT
11375                       && (i.types[this_operand].bitfield.disp16
11376                           || i.types[this_operand].bitfield.disp32))
11377                     {
11378                       static const i386_operand_type disp16_32 = {
11379                         .bitfield = { .disp16 = 1, .disp32 = 1 }
11380                       };
11381
11382                       i.types[this_operand]
11383                         = operand_type_xor (i.types[this_operand], disp16_32);
11384                     }
11385                 }
11386             }
11387         }
11388 #endif
11389     }
11390
11391   return addr_mode;
11392 }
11393
11394 /* Make sure the memory operand we've been dealt is valid.
11395    Return 1 on success, 0 on a failure.  */
11396
11397 static int
11398 i386_index_check (const char *operand_string)
11399 {
11400   const char *kind = "base/index";
11401   enum flag_code addr_mode = i386_addressing_mode ();
11402   const insn_template *t = current_templates->end - 1;
11403
11404   if (t->opcode_modifier.isstring)
11405     {
11406       /* Memory operands of string insns are special in that they only allow
11407          a single register (rDI, rSI, or rBX) as their memory address.  */
11408       const reg_entry *expected_reg;
11409       static const char *di_si[][2] =
11410         {
11411           { "esi", "edi" },
11412           { "si", "di" },
11413           { "rsi", "rdi" }
11414         };
11415       static const char *bx[] = { "ebx", "bx", "rbx" };
11416
11417       kind = "string address";
11418
11419       if (t->opcode_modifier.prefixok == PrefixRep)
11420         {
11421           int es_op = t->opcode_modifier.isstring - IS_STRING_ES_OP0;
11422           int op = 0;
11423
11424           if (!t->operand_types[0].bitfield.baseindex
11425               || ((!i.mem_operands != !intel_syntax)
11426                   && t->operand_types[1].bitfield.baseindex))
11427             op = 1;
11428           expected_reg
11429             = (const reg_entry *) str_hash_find (reg_hash,
11430                                                  di_si[addr_mode][op == es_op]);
11431         }
11432       else
11433         expected_reg
11434           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
11435
11436       if (i.base_reg != expected_reg
11437           || i.index_reg
11438           || operand_type_check (i.types[this_operand], disp))
11439         {
11440           /* The second memory operand must have the same size as
11441              the first one.  */
11442           if (i.mem_operands
11443               && i.base_reg
11444               && !((addr_mode == CODE_64BIT
11445                     && i.base_reg->reg_type.bitfield.qword)
11446                    || (addr_mode == CODE_32BIT
11447                        ? i.base_reg->reg_type.bitfield.dword
11448                        : i.base_reg->reg_type.bitfield.word)))
11449             goto bad_address;
11450
11451           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
11452                    operand_string,
11453                    intel_syntax ? '[' : '(',
11454                    register_prefix,
11455                    expected_reg->reg_name,
11456                    intel_syntax ? ']' : ')');
11457           return 1;
11458         }
11459       else
11460         return 1;
11461
11462     bad_address:
11463       as_bad (_("`%s' is not a valid %s expression"),
11464               operand_string, kind);
11465       return 0;
11466     }
11467   else
11468     {
11469       t = current_templates->start;
11470
11471       if (addr_mode != CODE_16BIT)
11472         {
11473           /* 32-bit/64-bit checks.  */
11474           if (i.disp_encoding == disp_encoding_16bit)
11475             {
11476             bad_disp:
11477               as_bad (_("invalid `%s' prefix"),
11478                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
11479               return 0;
11480             }
11481
11482           if ((i.base_reg
11483                && ((addr_mode == CODE_64BIT
11484                     ? !i.base_reg->reg_type.bitfield.qword
11485                     : !i.base_reg->reg_type.bitfield.dword)
11486                    || (i.index_reg && i.base_reg->reg_num == RegIP)
11487                    || i.base_reg->reg_num == RegIZ))
11488               || (i.index_reg
11489                   && !i.index_reg->reg_type.bitfield.xmmword
11490                   && !i.index_reg->reg_type.bitfield.ymmword
11491                   && !i.index_reg->reg_type.bitfield.zmmword
11492                   && ((addr_mode == CODE_64BIT
11493                        ? !i.index_reg->reg_type.bitfield.qword
11494                        : !i.index_reg->reg_type.bitfield.dword)
11495                       || !i.index_reg->reg_type.bitfield.baseindex)))
11496             goto bad_address;
11497
11498           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
11499           if ((t->opcode_modifier.opcodeprefix == PREFIX_0XF3
11500                && t->opcode_modifier.opcodespace == SPACE_0F
11501                && t->base_opcode == 0x1b)
11502               || (t->opcode_modifier.opcodeprefix == PREFIX_NONE
11503                   && t->opcode_modifier.opcodespace == SPACE_0F
11504                   && (t->base_opcode & ~1) == 0x1a)
11505               || t->opcode_modifier.sib == SIBMEM)
11506             {
11507               /* They cannot use RIP-relative addressing. */
11508               if (i.base_reg && i.base_reg->reg_num == RegIP)
11509                 {
11510                   as_bad (_("`%s' cannot be used here"), operand_string);
11511                   return 0;
11512                 }
11513
11514               /* bndldx and bndstx ignore their scale factor. */
11515               if (t->opcode_modifier.opcodeprefix == PREFIX_NONE
11516                   && t->opcode_modifier.opcodespace == SPACE_0F
11517                   && (t->base_opcode & ~1) == 0x1a
11518                   && i.log2_scale_factor)
11519                 as_warn (_("register scaling is being ignored here"));
11520             }
11521         }
11522       else
11523         {
11524           /* 16-bit checks.  */
11525           if (i.disp_encoding == disp_encoding_32bit)
11526             goto bad_disp;
11527
11528           if ((i.base_reg
11529                && (!i.base_reg->reg_type.bitfield.word
11530                    || !i.base_reg->reg_type.bitfield.baseindex))
11531               || (i.index_reg
11532                   && (!i.index_reg->reg_type.bitfield.word
11533                       || !i.index_reg->reg_type.bitfield.baseindex
11534                       || !(i.base_reg
11535                            && i.base_reg->reg_num < 6
11536                            && i.index_reg->reg_num >= 6
11537                            && i.log2_scale_factor == 0))))
11538             goto bad_address;
11539         }
11540     }
11541   return 1;
11542 }
11543
11544 /* Handle vector immediates.  */
11545
11546 static int
11547 RC_SAE_immediate (const char *imm_start)
11548 {
11549   const char *pstr = imm_start;
11550
11551   if (*pstr != '{')
11552     return 0;
11553
11554   pstr = RC_SAE_specifier (pstr + 1);
11555   if (pstr == NULL)
11556     return 0;
11557
11558   if (*pstr++ != '}')
11559     {
11560       as_bad (_("Missing '}': '%s'"), imm_start);
11561       return 0;
11562     }
11563   /* RC/SAE immediate string should contain nothing more.  */;
11564   if (*pstr != 0)
11565     {
11566       as_bad (_("Junk after '}': '%s'"), imm_start);
11567       return 0;
11568     }
11569
11570   /* Internally this doesn't count as an operand.  */
11571   --i.operands;
11572
11573   return 1;
11574 }
11575
11576 static INLINE bool starts_memory_operand (char c)
11577 {
11578   return ISDIGIT (c)
11579          || is_identifier_char (c)
11580          || strchr ("([\"+-!~", c);
11581 }
11582
11583 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
11584    on error.  */
11585
11586 static int
11587 i386_att_operand (char *operand_string)
11588 {
11589   const reg_entry *r;
11590   char *end_op;
11591   char *op_string = operand_string;
11592
11593   if (is_space_char (*op_string))
11594     ++op_string;
11595
11596   /* We check for an absolute prefix (differentiating,
11597      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
11598   if (*op_string == ABSOLUTE_PREFIX)
11599     {
11600       ++op_string;
11601       if (is_space_char (*op_string))
11602         ++op_string;
11603       i.jumpabsolute = true;
11604     }
11605
11606   /* Check if operand is a register.  */
11607   if ((r = parse_register (op_string, &end_op)) != NULL)
11608     {
11609       i386_operand_type temp;
11610
11611       if (r == &bad_reg)
11612         return 0;
11613
11614       /* Check for a segment override by searching for ':' after a
11615          segment register.  */
11616       op_string = end_op;
11617       if (is_space_char (*op_string))
11618         ++op_string;
11619       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
11620         {
11621           i.seg[i.mem_operands] = r;
11622
11623           /* Skip the ':' and whitespace.  */
11624           ++op_string;
11625           if (is_space_char (*op_string))
11626             ++op_string;
11627
11628           /* Handle case of %es:*foo.  */
11629           if (!i.jumpabsolute && *op_string == ABSOLUTE_PREFIX)
11630             {
11631               ++op_string;
11632               if (is_space_char (*op_string))
11633                 ++op_string;
11634               i.jumpabsolute = true;
11635             }
11636
11637           if (!starts_memory_operand (*op_string))
11638             {
11639               as_bad (_("bad memory operand `%s'"), op_string);
11640               return 0;
11641             }
11642           goto do_memory_reference;
11643         }
11644
11645       /* Handle vector operations.  */
11646       if (*op_string == '{')
11647         {
11648           op_string = check_VecOperations (op_string);
11649           if (op_string == NULL)
11650             return 0;
11651         }
11652
11653       if (*op_string)
11654         {
11655           as_bad (_("junk `%s' after register"), op_string);
11656           return 0;
11657         }
11658       temp = r->reg_type;
11659       temp.bitfield.baseindex = 0;
11660       i.types[this_operand] = operand_type_or (i.types[this_operand],
11661                                                temp);
11662       i.types[this_operand].bitfield.unspecified = 0;
11663       i.op[this_operand].regs = r;
11664       i.reg_operands++;
11665
11666       /* A GPR may follow an RC or SAE immediate only if a (vector) register
11667          operand was also present earlier on.  */
11668       if (i.rounding.type != rc_none && temp.bitfield.class == Reg
11669           && i.reg_operands == 1)
11670         {
11671           unsigned int j;
11672
11673           for (j = 0; j < ARRAY_SIZE (RC_NamesTable); ++j)
11674             if (i.rounding.type == RC_NamesTable[j].type)
11675               break;
11676           as_bad (_("`%s': misplaced `{%s}'"),
11677                   insn_name (current_templates->start), RC_NamesTable[j].name);
11678           return 0;
11679         }
11680     }
11681   else if (*op_string == REGISTER_PREFIX)
11682     {
11683       as_bad (_("bad register name `%s'"), op_string);
11684       return 0;
11685     }
11686   else if (*op_string == IMMEDIATE_PREFIX)
11687     {
11688       ++op_string;
11689       if (i.jumpabsolute)
11690         {
11691           as_bad (_("immediate operand illegal with absolute jump"));
11692           return 0;
11693         }
11694       if (!i386_immediate (op_string))
11695         return 0;
11696       if (i.rounding.type != rc_none)
11697         {
11698           as_bad (_("`%s': RC/SAE operand must follow immediate operands"),
11699                   insn_name (current_templates->start));
11700           return 0;
11701         }
11702     }
11703   else if (RC_SAE_immediate (operand_string))
11704     {
11705       /* If it is a RC or SAE immediate, do the necessary placement check:
11706          Only another immediate or a GPR may precede it.  */
11707       if (i.mem_operands || i.reg_operands + i.imm_operands > 1
11708           || (i.reg_operands == 1
11709               && i.op[0].regs->reg_type.bitfield.class != Reg))
11710         {
11711           as_bad (_("`%s': misplaced `%s'"),
11712                   insn_name (current_templates->start), operand_string);
11713           return 0;
11714         }
11715     }
11716   else if (starts_memory_operand (*op_string))
11717     {
11718       /* This is a memory reference of some sort.  */
11719       char *base_string;
11720
11721       /* Start and end of displacement string expression (if found).  */
11722       char *displacement_string_start;
11723       char *displacement_string_end;
11724
11725     do_memory_reference:
11726       /* Check for base index form.  We detect the base index form by
11727          looking for an ')' at the end of the operand, searching
11728          for the '(' matching it, and finding a REGISTER_PREFIX or ','
11729          after the '('.  */
11730       base_string = op_string + strlen (op_string);
11731
11732       /* Handle vector operations.  */
11733       --base_string;
11734       if (is_space_char (*base_string))
11735         --base_string;
11736
11737       if (*base_string == '}')
11738         {
11739           char *vop_start = NULL;
11740
11741           while (base_string-- > op_string)
11742             {
11743               if (*base_string == '"')
11744                 break;
11745               if (*base_string != '{')
11746                 continue;
11747
11748               vop_start = base_string;
11749
11750               --base_string;
11751               if (is_space_char (*base_string))
11752                 --base_string;
11753
11754               if (*base_string != '}')
11755                 break;
11756
11757               vop_start = NULL;
11758             }
11759
11760           if (!vop_start)
11761             {
11762               as_bad (_("unbalanced figure braces"));
11763               return 0;
11764             }
11765
11766           if (check_VecOperations (vop_start) == NULL)
11767             return 0;
11768         }
11769
11770       /* If we only have a displacement, set-up for it to be parsed later.  */
11771       displacement_string_start = op_string;
11772       displacement_string_end = base_string + 1;
11773
11774       if (*base_string == ')')
11775         {
11776           char *temp_string;
11777           unsigned int parens_not_balanced = 0;
11778           bool in_quotes = false;
11779
11780           /* We've already checked that the number of left & right ()'s are
11781              equal, and that there's a matching set of double quotes.  */
11782           end_op = base_string;
11783           for (temp_string = op_string; temp_string < end_op; temp_string++)
11784             {
11785               if (*temp_string == '\\' && temp_string[1] == '"')
11786                 ++temp_string;
11787               else if (*temp_string == '"')
11788                 in_quotes = !in_quotes;
11789               else if (!in_quotes)
11790                 {
11791                   if (*temp_string == '(' && !parens_not_balanced++)
11792                     base_string = temp_string;
11793                   if (*temp_string == ')')
11794                     --parens_not_balanced;
11795                 }
11796             }
11797
11798           temp_string = base_string;
11799
11800           /* Skip past '(' and whitespace.  */
11801           gas_assert (*base_string == '(');
11802           ++base_string;
11803           if (is_space_char (*base_string))
11804             ++base_string;
11805
11806           if (*base_string == ','
11807               || ((i.base_reg = parse_register (base_string, &end_op))
11808                   != NULL))
11809             {
11810               displacement_string_end = temp_string;
11811
11812               i.types[this_operand].bitfield.baseindex = 1;
11813
11814               if (i.base_reg)
11815                 {
11816                   if (i.base_reg == &bad_reg)
11817                     return 0;
11818                   base_string = end_op;
11819                   if (is_space_char (*base_string))
11820                     ++base_string;
11821                 }
11822
11823               /* There may be an index reg or scale factor here.  */
11824               if (*base_string == ',')
11825                 {
11826                   ++base_string;
11827                   if (is_space_char (*base_string))
11828                     ++base_string;
11829
11830                   if ((i.index_reg = parse_register (base_string, &end_op))
11831                       != NULL)
11832                     {
11833                       if (i.index_reg == &bad_reg)
11834                         return 0;
11835                       base_string = end_op;
11836                       if (is_space_char (*base_string))
11837                         ++base_string;
11838                       if (*base_string == ',')
11839                         {
11840                           ++base_string;
11841                           if (is_space_char (*base_string))
11842                             ++base_string;
11843                         }
11844                       else if (*base_string != ')')
11845                         {
11846                           as_bad (_("expecting `,' or `)' "
11847                                     "after index register in `%s'"),
11848                                   operand_string);
11849                           return 0;
11850                         }
11851                     }
11852                   else if (*base_string == REGISTER_PREFIX)
11853                     {
11854                       end_op = strchr (base_string, ',');
11855                       if (end_op)
11856                         *end_op = '\0';
11857                       as_bad (_("bad register name `%s'"), base_string);
11858                       return 0;
11859                     }
11860
11861                   /* Check for scale factor.  */
11862                   if (*base_string != ')')
11863                     {
11864                       char *end_scale = i386_scale (base_string);
11865
11866                       if (!end_scale)
11867                         return 0;
11868
11869                       base_string = end_scale;
11870                       if (is_space_char (*base_string))
11871                         ++base_string;
11872                       if (*base_string != ')')
11873                         {
11874                           as_bad (_("expecting `)' "
11875                                     "after scale factor in `%s'"),
11876                                   operand_string);
11877                           return 0;
11878                         }
11879                     }
11880                   else if (!i.index_reg)
11881                     {
11882                       as_bad (_("expecting index register or scale factor "
11883                                 "after `,'; got '%c'"),
11884                               *base_string);
11885                       return 0;
11886                     }
11887                 }
11888               else if (*base_string != ')')
11889                 {
11890                   as_bad (_("expecting `,' or `)' "
11891                             "after base register in `%s'"),
11892                           operand_string);
11893                   return 0;
11894                 }
11895             }
11896           else if (*base_string == REGISTER_PREFIX)
11897             {
11898               end_op = strchr (base_string, ',');
11899               if (end_op)
11900                 *end_op = '\0';
11901               as_bad (_("bad register name `%s'"), base_string);
11902               return 0;
11903             }
11904         }
11905
11906       /* If there's an expression beginning the operand, parse it,
11907          assuming displacement_string_start and
11908          displacement_string_end are meaningful.  */
11909       if (displacement_string_start != displacement_string_end)
11910         {
11911           if (!i386_displacement (displacement_string_start,
11912                                   displacement_string_end))
11913             return 0;
11914         }
11915
11916       /* Special case for (%dx) while doing input/output op.  */
11917       if (i.base_reg
11918           && i.base_reg->reg_type.bitfield.instance == RegD
11919           && i.base_reg->reg_type.bitfield.word
11920           && i.index_reg == 0
11921           && i.log2_scale_factor == 0
11922           && i.seg[i.mem_operands] == 0
11923           && !operand_type_check (i.types[this_operand], disp))
11924         {
11925           i.types[this_operand] = i.base_reg->reg_type;
11926           i.input_output_operand = true;
11927           return 1;
11928         }
11929
11930       if (i386_index_check (operand_string) == 0)
11931         return 0;
11932       i.flags[this_operand] |= Operand_Mem;
11933       i.mem_operands++;
11934     }
11935   else
11936     {
11937       /* It's not a memory operand; argh!  */
11938       as_bad (_("invalid char %s beginning operand %d `%s'"),
11939               output_invalid (*op_string),
11940               this_operand + 1,
11941               op_string);
11942       return 0;
11943     }
11944   return 1;                     /* Normal return.  */
11945 }
11946 \f
11947 /* Calculate the maximum variable size (i.e., excluding fr_fix)
11948    that an rs_machine_dependent frag may reach.  */
11949
11950 unsigned int
11951 i386_frag_max_var (fragS *frag)
11952 {
11953   /* The only relaxable frags are for jumps.
11954      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
11955   gas_assert (frag->fr_type == rs_machine_dependent);
11956   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
11957 }
11958
11959 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
11960 static int
11961 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
11962 {
11963   /* STT_GNU_IFUNC symbol must go through PLT.  */
11964   if ((symbol_get_bfdsym (fr_symbol)->flags
11965        & BSF_GNU_INDIRECT_FUNCTION) != 0)
11966     return 0;
11967
11968   if (!S_IS_EXTERNAL (fr_symbol))
11969     /* Symbol may be weak or local.  */
11970     return !S_IS_WEAK (fr_symbol);
11971
11972   /* Global symbols with non-default visibility can't be preempted. */
11973   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
11974     return 1;
11975
11976   if (fr_var != NO_RELOC)
11977     switch ((enum bfd_reloc_code_real) fr_var)
11978       {
11979       case BFD_RELOC_386_PLT32:
11980       case BFD_RELOC_X86_64_PLT32:
11981         /* Symbol with PLT relocation may be preempted. */
11982         return 0;
11983       default:
11984         abort ();
11985       }
11986
11987   /* Global symbols with default visibility in a shared library may be
11988      preempted by another definition.  */
11989   return !shared;
11990 }
11991 #endif
11992
11993 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
11994    Note also work for Skylake and Cascadelake.
11995 ---------------------------------------------------------------------
11996 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
11997 | ------  | ----------- | ------- | -------- |
11998 |   Jo    |      N      |    N    |     Y    |
11999 |   Jno   |      N      |    N    |     Y    |
12000 |  Jc/Jb  |      Y      |    N    |     Y    |
12001 | Jae/Jnb |      Y      |    N    |     Y    |
12002 |  Je/Jz  |      Y      |    Y    |     Y    |
12003 | Jne/Jnz |      Y      |    Y    |     Y    |
12004 | Jna/Jbe |      Y      |    N    |     Y    |
12005 | Ja/Jnbe |      Y      |    N    |     Y    |
12006 |   Js    |      N      |    N    |     Y    |
12007 |   Jns   |      N      |    N    |     Y    |
12008 |  Jp/Jpe |      N      |    N    |     Y    |
12009 | Jnp/Jpo |      N      |    N    |     Y    |
12010 | Jl/Jnge |      Y      |    Y    |     Y    |
12011 | Jge/Jnl |      Y      |    Y    |     Y    |
12012 | Jle/Jng |      Y      |    Y    |     Y    |
12013 | Jg/Jnle |      Y      |    Y    |     Y    |
12014 ---------------------------------------------------------------------  */
12015 static int
12016 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
12017 {
12018   if (mf_cmp == mf_cmp_alu_cmp)
12019     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
12020             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
12021   if (mf_cmp == mf_cmp_incdec)
12022     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
12023             || mf_jcc == mf_jcc_jle);
12024   if (mf_cmp == mf_cmp_test_and)
12025     return 1;
12026   return 0;
12027 }
12028
12029 /* Return the next non-empty frag.  */
12030
12031 static fragS *
12032 i386_next_non_empty_frag (fragS *fragP)
12033 {
12034   /* There may be a frag with a ".fill 0" when there is no room in
12035      the current frag for frag_grow in output_insn.  */
12036   for (fragP = fragP->fr_next;
12037        (fragP != NULL
12038         && fragP->fr_type == rs_fill
12039         && fragP->fr_fix == 0);
12040        fragP = fragP->fr_next)
12041     ;
12042   return fragP;
12043 }
12044
12045 /* Return the next jcc frag after BRANCH_PADDING.  */
12046
12047 static fragS *
12048 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
12049 {
12050   fragS *branch_fragP;
12051   if (!pad_fragP)
12052     return NULL;
12053
12054   if (pad_fragP->fr_type == rs_machine_dependent
12055       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
12056           == BRANCH_PADDING))
12057     {
12058       branch_fragP = i386_next_non_empty_frag (pad_fragP);
12059       if (branch_fragP->fr_type != rs_machine_dependent)
12060         return NULL;
12061       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
12062           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
12063                                    pad_fragP->tc_frag_data.mf_type))
12064         return branch_fragP;
12065     }
12066
12067   return NULL;
12068 }
12069
12070 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
12071
12072 static void
12073 i386_classify_machine_dependent_frag (fragS *fragP)
12074 {
12075   fragS *cmp_fragP;
12076   fragS *pad_fragP;
12077   fragS *branch_fragP;
12078   fragS *next_fragP;
12079   unsigned int max_prefix_length;
12080
12081   if (fragP->tc_frag_data.classified)
12082     return;
12083
12084   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
12085      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
12086   for (next_fragP = fragP;
12087        next_fragP != NULL;
12088        next_fragP = next_fragP->fr_next)
12089     {
12090       next_fragP->tc_frag_data.classified = 1;
12091       if (next_fragP->fr_type == rs_machine_dependent)
12092         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
12093           {
12094           case BRANCH_PADDING:
12095             /* The BRANCH_PADDING frag must be followed by a branch
12096                frag.  */
12097             branch_fragP = i386_next_non_empty_frag (next_fragP);
12098             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
12099             break;
12100           case FUSED_JCC_PADDING:
12101             /* Check if this is a fused jcc:
12102                FUSED_JCC_PADDING
12103                CMP like instruction
12104                BRANCH_PADDING
12105                COND_JUMP
12106                */
12107             cmp_fragP = i386_next_non_empty_frag (next_fragP);
12108             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
12109             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
12110             if (branch_fragP)
12111               {
12112                 /* The BRANCH_PADDING frag is merged with the
12113                    FUSED_JCC_PADDING frag.  */
12114                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
12115                 /* CMP like instruction size.  */
12116                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
12117                 frag_wane (pad_fragP);
12118                 /* Skip to branch_fragP.  */
12119                 next_fragP = branch_fragP;
12120               }
12121             else if (next_fragP->tc_frag_data.max_prefix_length)
12122               {
12123                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
12124                    a fused jcc.  */
12125                 next_fragP->fr_subtype
12126                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
12127                 next_fragP->tc_frag_data.max_bytes
12128                   = next_fragP->tc_frag_data.max_prefix_length;
12129                 /* This will be updated in the BRANCH_PREFIX scan.  */
12130                 next_fragP->tc_frag_data.max_prefix_length = 0;
12131               }
12132             else
12133               frag_wane (next_fragP);
12134             break;
12135           }
12136     }
12137
12138   /* Stop if there is no BRANCH_PREFIX.  */
12139   if (!align_branch_prefix_size)
12140     return;
12141
12142   /* Scan for BRANCH_PREFIX.  */
12143   for (; fragP != NULL; fragP = fragP->fr_next)
12144     {
12145       if (fragP->fr_type != rs_machine_dependent
12146           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12147               != BRANCH_PREFIX))
12148         continue;
12149
12150       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
12151          COND_JUMP_PREFIX.  */
12152       max_prefix_length = 0;
12153       for (next_fragP = fragP;
12154            next_fragP != NULL;
12155            next_fragP = next_fragP->fr_next)
12156         {
12157           if (next_fragP->fr_type == rs_fill)
12158             /* Skip rs_fill frags.  */
12159             continue;
12160           else if (next_fragP->fr_type != rs_machine_dependent)
12161             /* Stop for all other frags.  */
12162             break;
12163
12164           /* rs_machine_dependent frags.  */
12165           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12166               == BRANCH_PREFIX)
12167             {
12168               /* Count BRANCH_PREFIX frags.  */
12169               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
12170                 {
12171                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
12172                   frag_wane (next_fragP);
12173                 }
12174               else
12175                 max_prefix_length
12176                   += next_fragP->tc_frag_data.max_bytes;
12177             }
12178           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12179                     == BRANCH_PADDING)
12180                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12181                        == FUSED_JCC_PADDING))
12182             {
12183               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
12184               fragP->tc_frag_data.u.padding_fragP = next_fragP;
12185               break;
12186             }
12187           else
12188             /* Stop for other rs_machine_dependent frags.  */
12189             break;
12190         }
12191
12192       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
12193
12194       /* Skip to the next frag.  */
12195       fragP = next_fragP;
12196     }
12197 }
12198
12199 /* Compute padding size for
12200
12201         FUSED_JCC_PADDING
12202         CMP like instruction
12203         BRANCH_PADDING
12204         COND_JUMP/UNCOND_JUMP
12205
12206    or
12207
12208         BRANCH_PADDING
12209         COND_JUMP/UNCOND_JUMP
12210  */
12211
12212 static int
12213 i386_branch_padding_size (fragS *fragP, offsetT address)
12214 {
12215   unsigned int offset, size, padding_size;
12216   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
12217
12218   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
12219   if (!address)
12220     address = fragP->fr_address;
12221   address += fragP->fr_fix;
12222
12223   /* CMP like instrunction size.  */
12224   size = fragP->tc_frag_data.cmp_size;
12225
12226   /* The base size of the branch frag.  */
12227   size += branch_fragP->fr_fix;
12228
12229   /* Add opcode and displacement bytes for the rs_machine_dependent
12230      branch frag.  */
12231   if (branch_fragP->fr_type == rs_machine_dependent)
12232     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
12233
12234   /* Check if branch is within boundary and doesn't end at the last
12235      byte.  */
12236   offset = address & ((1U << align_branch_power) - 1);
12237   if ((offset + size) >= (1U << align_branch_power))
12238     /* Padding needed to avoid crossing boundary.  */
12239     padding_size = (1U << align_branch_power) - offset;
12240   else
12241     /* No padding needed.  */
12242     padding_size = 0;
12243
12244   /* The return value may be saved in tc_frag_data.length which is
12245      unsigned byte.  */
12246   if (!fits_in_unsigned_byte (padding_size))
12247     abort ();
12248
12249   return padding_size;
12250 }
12251
12252 /* i386_generic_table_relax_frag()
12253
12254    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
12255    grow/shrink padding to align branch frags.  Hand others to
12256    relax_frag().  */
12257
12258 long
12259 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
12260 {
12261   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12262       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12263     {
12264       long padding_size = i386_branch_padding_size (fragP, 0);
12265       long grow = padding_size - fragP->tc_frag_data.length;
12266
12267       /* When the BRANCH_PREFIX frag is used, the computed address
12268          must match the actual address and there should be no padding.  */
12269       if (fragP->tc_frag_data.padding_address
12270           && (fragP->tc_frag_data.padding_address != fragP->fr_address
12271               || padding_size))
12272         abort ();
12273
12274       /* Update the padding size.  */
12275       if (grow)
12276         fragP->tc_frag_data.length = padding_size;
12277
12278       return grow;
12279     }
12280   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12281     {
12282       fragS *padding_fragP, *next_fragP;
12283       long padding_size, left_size, last_size;
12284
12285       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12286       if (!padding_fragP)
12287         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
12288         return (fragP->tc_frag_data.length
12289                 - fragP->tc_frag_data.last_length);
12290
12291       /* Compute the relative address of the padding frag in the very
12292         first time where the BRANCH_PREFIX frag sizes are zero.  */
12293       if (!fragP->tc_frag_data.padding_address)
12294         fragP->tc_frag_data.padding_address
12295           = padding_fragP->fr_address - (fragP->fr_address - stretch);
12296
12297       /* First update the last length from the previous interation.  */
12298       left_size = fragP->tc_frag_data.prefix_length;
12299       for (next_fragP = fragP;
12300            next_fragP != padding_fragP;
12301            next_fragP = next_fragP->fr_next)
12302         if (next_fragP->fr_type == rs_machine_dependent
12303             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12304                 == BRANCH_PREFIX))
12305           {
12306             if (left_size)
12307               {
12308                 int max = next_fragP->tc_frag_data.max_bytes;
12309                 if (max)
12310                   {
12311                     int size;
12312                     if (max > left_size)
12313                       size = left_size;
12314                     else
12315                       size = max;
12316                     left_size -= size;
12317                     next_fragP->tc_frag_data.last_length = size;
12318                   }
12319               }
12320             else
12321               next_fragP->tc_frag_data.last_length = 0;
12322           }
12323
12324       /* Check the padding size for the padding frag.  */
12325       padding_size = i386_branch_padding_size
12326         (padding_fragP, (fragP->fr_address
12327                          + fragP->tc_frag_data.padding_address));
12328
12329       last_size = fragP->tc_frag_data.prefix_length;
12330       /* Check if there is change from the last interation.  */
12331       if (padding_size == last_size)
12332         {
12333           /* Update the expected address of the padding frag.  */
12334           padding_fragP->tc_frag_data.padding_address
12335             = (fragP->fr_address + padding_size
12336                + fragP->tc_frag_data.padding_address);
12337           return 0;
12338         }
12339
12340       if (padding_size > fragP->tc_frag_data.max_prefix_length)
12341         {
12342           /* No padding if there is no sufficient room.  Clear the
12343              expected address of the padding frag.  */
12344           padding_fragP->tc_frag_data.padding_address = 0;
12345           padding_size = 0;
12346         }
12347       else
12348         /* Store the expected address of the padding frag.  */
12349         padding_fragP->tc_frag_data.padding_address
12350           = (fragP->fr_address + padding_size
12351              + fragP->tc_frag_data.padding_address);
12352
12353       fragP->tc_frag_data.prefix_length = padding_size;
12354
12355       /* Update the length for the current interation.  */
12356       left_size = padding_size;
12357       for (next_fragP = fragP;
12358            next_fragP != padding_fragP;
12359            next_fragP = next_fragP->fr_next)
12360         if (next_fragP->fr_type == rs_machine_dependent
12361             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
12362                 == BRANCH_PREFIX))
12363           {
12364             if (left_size)
12365               {
12366                 int max = next_fragP->tc_frag_data.max_bytes;
12367                 if (max)
12368                   {
12369                     int size;
12370                     if (max > left_size)
12371                       size = left_size;
12372                     else
12373                       size = max;
12374                     left_size -= size;
12375                     next_fragP->tc_frag_data.length = size;
12376                   }
12377               }
12378             else
12379               next_fragP->tc_frag_data.length = 0;
12380           }
12381
12382       return (fragP->tc_frag_data.length
12383               - fragP->tc_frag_data.last_length);
12384     }
12385   return relax_frag (segment, fragP, stretch);
12386 }
12387
12388 /* md_estimate_size_before_relax()
12389
12390    Called just before relax() for rs_machine_dependent frags.  The x86
12391    assembler uses these frags to handle variable size jump
12392    instructions.
12393
12394    Any symbol that is now undefined will not become defined.
12395    Return the correct fr_subtype in the frag.
12396    Return the initial "guess for variable size of frag" to caller.
12397    The guess is actually the growth beyond the fixed part.  Whatever
12398    we do to grow the fixed or variable part contributes to our
12399    returned value.  */
12400
12401 int
12402 md_estimate_size_before_relax (fragS *fragP, segT segment)
12403 {
12404   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12405       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
12406       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
12407     {
12408       i386_classify_machine_dependent_frag (fragP);
12409       return fragP->tc_frag_data.length;
12410     }
12411
12412   /* We've already got fragP->fr_subtype right;  all we have to do is
12413      check for un-relaxable symbols.  On an ELF system, we can't relax
12414      an externally visible symbol, because it may be overridden by a
12415      shared library.  */
12416   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
12417 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12418       || (IS_ELF
12419           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
12420                                                 fragP->fr_var))
12421 #endif
12422 #if defined (OBJ_COFF) && defined (TE_PE)
12423       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
12424           && S_IS_WEAK (fragP->fr_symbol))
12425 #endif
12426       )
12427     {
12428       /* Symbol is undefined in this segment, or we need to keep a
12429          reloc so that weak symbols can be overridden.  */
12430       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
12431       enum bfd_reloc_code_real reloc_type;
12432       unsigned char *opcode;
12433       int old_fr_fix;
12434       fixS *fixP = NULL;
12435
12436       if (fragP->fr_var != NO_RELOC)
12437         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
12438       else if (size == 2)
12439         reloc_type = BFD_RELOC_16_PCREL;
12440 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12441       else if (fragP->tc_frag_data.code64 && fragP->fr_offset == 0
12442                && need_plt32_p (fragP->fr_symbol))
12443         reloc_type = BFD_RELOC_X86_64_PLT32;
12444 #endif
12445       else
12446         reloc_type = BFD_RELOC_32_PCREL;
12447
12448       old_fr_fix = fragP->fr_fix;
12449       opcode = (unsigned char *) fragP->fr_opcode;
12450
12451       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
12452         {
12453         case UNCOND_JUMP:
12454           /* Make jmp (0xeb) a (d)word displacement jump.  */
12455           opcode[0] = 0xe9;
12456           fragP->fr_fix += size;
12457           fixP = fix_new (fragP, old_fr_fix, size,
12458                           fragP->fr_symbol,
12459                           fragP->fr_offset, 1,
12460                           reloc_type);
12461           break;
12462
12463         case COND_JUMP86:
12464           if (size == 2
12465               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
12466             {
12467               /* Negate the condition, and branch past an
12468                  unconditional jump.  */
12469               opcode[0] ^= 1;
12470               opcode[1] = 3;
12471               /* Insert an unconditional jump.  */
12472               opcode[2] = 0xe9;
12473               /* We added two extra opcode bytes, and have a two byte
12474                  offset.  */
12475               fragP->fr_fix += 2 + 2;
12476               fix_new (fragP, old_fr_fix + 2, 2,
12477                        fragP->fr_symbol,
12478                        fragP->fr_offset, 1,
12479                        reloc_type);
12480               break;
12481             }
12482           /* Fall through.  */
12483
12484         case COND_JUMP:
12485           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
12486             {
12487               fragP->fr_fix += 1;
12488               fixP = fix_new (fragP, old_fr_fix, 1,
12489                               fragP->fr_symbol,
12490                               fragP->fr_offset, 1,
12491                               BFD_RELOC_8_PCREL);
12492               fixP->fx_signed = 1;
12493               break;
12494             }
12495
12496           /* This changes the byte-displacement jump 0x7N
12497              to the (d)word-displacement jump 0x0f,0x8N.  */
12498           opcode[1] = opcode[0] + 0x10;
12499           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12500           /* We've added an opcode byte.  */
12501           fragP->fr_fix += 1 + size;
12502           fixP = fix_new (fragP, old_fr_fix + 1, size,
12503                           fragP->fr_symbol,
12504                           fragP->fr_offset, 1,
12505                           reloc_type);
12506           break;
12507
12508         default:
12509           BAD_CASE (fragP->fr_subtype);
12510           break;
12511         }
12512
12513       /* All jumps handled here are signed, but don't unconditionally use a
12514          signed limit check for 32 and 16 bit jumps as we want to allow wrap
12515          around at 4G (outside of 64-bit mode) and 64k.  */
12516       if (size == 4 && flag_code == CODE_64BIT)
12517         fixP->fx_signed = 1;
12518
12519       frag_wane (fragP);
12520       return fragP->fr_fix - old_fr_fix;
12521     }
12522
12523   /* Guess size depending on current relax state.  Initially the relax
12524      state will correspond to a short jump and we return 1, because
12525      the variable part of the frag (the branch offset) is one byte
12526      long.  However, we can relax a section more than once and in that
12527      case we must either set fr_subtype back to the unrelaxed state,
12528      or return the value for the appropriate branch.  */
12529   return md_relax_table[fragP->fr_subtype].rlx_length;
12530 }
12531
12532 /* Called after relax() is finished.
12533
12534    In:  Address of frag.
12535         fr_type == rs_machine_dependent.
12536         fr_subtype is what the address relaxed to.
12537
12538    Out: Any fixSs and constants are set up.
12539         Caller will turn frag into a ".space 0".  */
12540
12541 void
12542 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
12543                  fragS *fragP)
12544 {
12545   unsigned char *opcode;
12546   unsigned char *where_to_put_displacement = NULL;
12547   offsetT target_address;
12548   offsetT opcode_address;
12549   unsigned int extension = 0;
12550   offsetT displacement_from_opcode_start;
12551
12552   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
12553       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
12554       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12555     {
12556       /* Generate nop padding.  */
12557       unsigned int size = fragP->tc_frag_data.length;
12558       if (size)
12559         {
12560           if (size > fragP->tc_frag_data.max_bytes)
12561             abort ();
12562
12563           if (flag_debug)
12564             {
12565               const char *msg;
12566               const char *branch = "branch";
12567               const char *prefix = "";
12568               fragS *padding_fragP;
12569               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
12570                   == BRANCH_PREFIX)
12571                 {
12572                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
12573                   switch (fragP->tc_frag_data.default_prefix)
12574                     {
12575                     default:
12576                       abort ();
12577                       break;
12578                     case CS_PREFIX_OPCODE:
12579                       prefix = " cs";
12580                       break;
12581                     case DS_PREFIX_OPCODE:
12582                       prefix = " ds";
12583                       break;
12584                     case ES_PREFIX_OPCODE:
12585                       prefix = " es";
12586                       break;
12587                     case FS_PREFIX_OPCODE:
12588                       prefix = " fs";
12589                       break;
12590                     case GS_PREFIX_OPCODE:
12591                       prefix = " gs";
12592                       break;
12593                     case SS_PREFIX_OPCODE:
12594                       prefix = " ss";
12595                       break;
12596                     }
12597                   if (padding_fragP)
12598                     msg = _("%s:%u: add %d%s at 0x%llx to align "
12599                             "%s within %d-byte boundary\n");
12600                   else
12601                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
12602                             "align %s within %d-byte boundary\n");
12603                 }
12604               else
12605                 {
12606                   padding_fragP = fragP;
12607                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
12608                           "%s within %d-byte boundary\n");
12609                 }
12610
12611               if (padding_fragP)
12612                 switch (padding_fragP->tc_frag_data.branch_type)
12613                   {
12614                   case align_branch_jcc:
12615                     branch = "jcc";
12616                     break;
12617                   case align_branch_fused:
12618                     branch = "fused jcc";
12619                     break;
12620                   case align_branch_jmp:
12621                     branch = "jmp";
12622                     break;
12623                   case align_branch_call:
12624                     branch = "call";
12625                     break;
12626                   case align_branch_indirect:
12627                     branch = "indiret branch";
12628                     break;
12629                   case align_branch_ret:
12630                     branch = "ret";
12631                     break;
12632                   default:
12633                     break;
12634                   }
12635
12636               fprintf (stdout, msg,
12637                        fragP->fr_file, fragP->fr_line, size, prefix,
12638                        (long long) fragP->fr_address, branch,
12639                        1 << align_branch_power);
12640             }
12641           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
12642             memset (fragP->fr_opcode,
12643                     fragP->tc_frag_data.default_prefix, size);
12644           else
12645             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
12646                                 size, 0);
12647           fragP->fr_fix += size;
12648         }
12649       return;
12650     }
12651
12652   opcode = (unsigned char *) fragP->fr_opcode;
12653
12654   /* Address we want to reach in file space.  */
12655   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
12656
12657   /* Address opcode resides at in file space.  */
12658   opcode_address = fragP->fr_address + fragP->fr_fix;
12659
12660   /* Displacement from opcode start to fill into instruction.  */
12661   displacement_from_opcode_start = target_address - opcode_address;
12662
12663   if ((fragP->fr_subtype & BIG) == 0)
12664     {
12665       /* Don't have to change opcode.  */
12666       extension = 1;            /* 1 opcode + 1 displacement  */
12667       where_to_put_displacement = &opcode[1];
12668     }
12669   else
12670     {
12671       if (no_cond_jump_promotion
12672           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
12673         as_warn_where (fragP->fr_file, fragP->fr_line,
12674                        _("long jump required"));
12675
12676       switch (fragP->fr_subtype)
12677         {
12678         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
12679           extension = 4;                /* 1 opcode + 4 displacement  */
12680           opcode[0] = 0xe9;
12681           where_to_put_displacement = &opcode[1];
12682           break;
12683
12684         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
12685           extension = 2;                /* 1 opcode + 2 displacement  */
12686           opcode[0] = 0xe9;
12687           where_to_put_displacement = &opcode[1];
12688           break;
12689
12690         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
12691         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
12692           extension = 5;                /* 2 opcode + 4 displacement  */
12693           opcode[1] = opcode[0] + 0x10;
12694           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12695           where_to_put_displacement = &opcode[2];
12696           break;
12697
12698         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
12699           extension = 3;                /* 2 opcode + 2 displacement  */
12700           opcode[1] = opcode[0] + 0x10;
12701           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
12702           where_to_put_displacement = &opcode[2];
12703           break;
12704
12705         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
12706           extension = 4;
12707           opcode[0] ^= 1;
12708           opcode[1] = 3;
12709           opcode[2] = 0xe9;
12710           where_to_put_displacement = &opcode[3];
12711           break;
12712
12713         default:
12714           BAD_CASE (fragP->fr_subtype);
12715           break;
12716         }
12717     }
12718
12719   /* If size if less then four we are sure that the operand fits,
12720      but if it's 4, then it could be that the displacement is larger
12721      then -/+ 2GB.  */
12722   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
12723       && object_64bit
12724       && ((addressT) (displacement_from_opcode_start - extension
12725                       + ((addressT) 1 << 31))
12726           > (((addressT) 2 << 31) - 1)))
12727     {
12728       as_bad_where (fragP->fr_file, fragP->fr_line,
12729                     _("jump target out of range"));
12730       /* Make us emit 0.  */
12731       displacement_from_opcode_start = extension;
12732     }
12733   /* Now put displacement after opcode.  */
12734   md_number_to_chars ((char *) where_to_put_displacement,
12735                       (valueT) (displacement_from_opcode_start - extension),
12736                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
12737   fragP->fr_fix += extension;
12738 }
12739 \f
12740 /* Apply a fixup (fixP) to segment data, once it has been determined
12741    by our caller that we have all the info we need to fix it up.
12742
12743    Parameter valP is the pointer to the value of the bits.
12744
12745    On the 386, immediates, displacements, and data pointers are all in
12746    the same (little-endian) format, so we don't need to care about which
12747    we are handling.  */
12748
12749 void
12750 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
12751 {
12752   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
12753   valueT value = *valP;
12754
12755 #if !defined (TE_Mach)
12756   if (fixP->fx_pcrel)
12757     {
12758       switch (fixP->fx_r_type)
12759         {
12760         default:
12761           break;
12762
12763         case BFD_RELOC_64:
12764           fixP->fx_r_type = BFD_RELOC_64_PCREL;
12765           break;
12766         case BFD_RELOC_32:
12767         case BFD_RELOC_X86_64_32S:
12768           fixP->fx_r_type = BFD_RELOC_32_PCREL;
12769           break;
12770         case BFD_RELOC_16:
12771           fixP->fx_r_type = BFD_RELOC_16_PCREL;
12772           break;
12773         case BFD_RELOC_8:
12774           fixP->fx_r_type = BFD_RELOC_8_PCREL;
12775           break;
12776         }
12777     }
12778
12779   if (fixP->fx_addsy != NULL
12780       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
12781           || fixP->fx_r_type == BFD_RELOC_64_PCREL
12782           || fixP->fx_r_type == BFD_RELOC_16_PCREL
12783           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
12784       && !use_rela_relocations)
12785     {
12786       /* This is a hack.  There should be a better way to handle this.
12787          This covers for the fact that bfd_install_relocation will
12788          subtract the current location (for partial_inplace, PC relative
12789          relocations); see more below.  */
12790 #ifndef OBJ_AOUT
12791       if (IS_ELF
12792 #ifdef TE_PE
12793           || OUTPUT_FLAVOR == bfd_target_coff_flavour
12794 #endif
12795           )
12796         value += fixP->fx_where + fixP->fx_frag->fr_address;
12797 #endif
12798 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12799       if (IS_ELF)
12800         {
12801           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
12802
12803           if ((sym_seg == seg
12804                || (symbol_section_p (fixP->fx_addsy)
12805                    && sym_seg != absolute_section))
12806               && !generic_force_reloc (fixP))
12807             {
12808               /* Yes, we add the values in twice.  This is because
12809                  bfd_install_relocation subtracts them out again.  I think
12810                  bfd_install_relocation is broken, but I don't dare change
12811                  it.  FIXME.  */
12812               value += fixP->fx_where + fixP->fx_frag->fr_address;
12813             }
12814         }
12815 #endif
12816 #if defined (OBJ_COFF) && defined (TE_PE)
12817       /* For some reason, the PE format does not store a
12818          section address offset for a PC relative symbol.  */
12819       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
12820           || S_IS_WEAK (fixP->fx_addsy))
12821         value += md_pcrel_from (fixP);
12822 #endif
12823     }
12824 #if defined (OBJ_COFF) && defined (TE_PE)
12825   if (fixP->fx_addsy != NULL
12826       && S_IS_WEAK (fixP->fx_addsy)
12827       /* PR 16858: Do not modify weak function references.  */
12828       && ! fixP->fx_pcrel)
12829     {
12830 #if !defined (TE_PEP)
12831       /* For x86 PE weak function symbols are neither PC-relative
12832          nor do they set S_IS_FUNCTION.  So the only reliable way
12833          to detect them is to check the flags of their containing
12834          section.  */
12835       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
12836           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
12837         ;
12838       else
12839 #endif
12840       value -= S_GET_VALUE (fixP->fx_addsy);
12841     }
12842 #endif
12843
12844   /* Fix a few things - the dynamic linker expects certain values here,
12845      and we must not disappoint it.  */
12846 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12847   if (IS_ELF && fixP->fx_addsy)
12848     switch (fixP->fx_r_type)
12849       {
12850       case BFD_RELOC_386_PLT32:
12851       case BFD_RELOC_X86_64_PLT32:
12852         /* Make the jump instruction point to the address of the operand.
12853            At runtime we merely add the offset to the actual PLT entry.
12854            NB: Subtract the offset size only for jump instructions.  */
12855         if (fixP->fx_pcrel)
12856           value = -4;
12857         break;
12858
12859       case BFD_RELOC_386_TLS_GD:
12860       case BFD_RELOC_386_TLS_LDM:
12861       case BFD_RELOC_386_TLS_IE_32:
12862       case BFD_RELOC_386_TLS_IE:
12863       case BFD_RELOC_386_TLS_GOTIE:
12864       case BFD_RELOC_386_TLS_GOTDESC:
12865       case BFD_RELOC_X86_64_TLSGD:
12866       case BFD_RELOC_X86_64_TLSLD:
12867       case BFD_RELOC_X86_64_GOTTPOFF:
12868       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
12869         value = 0; /* Fully resolved at runtime.  No addend.  */
12870         /* Fallthrough */
12871       case BFD_RELOC_386_TLS_LE:
12872       case BFD_RELOC_386_TLS_LDO_32:
12873       case BFD_RELOC_386_TLS_LE_32:
12874       case BFD_RELOC_X86_64_DTPOFF32:
12875       case BFD_RELOC_X86_64_DTPOFF64:
12876       case BFD_RELOC_X86_64_TPOFF32:
12877       case BFD_RELOC_X86_64_TPOFF64:
12878         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12879         break;
12880
12881       case BFD_RELOC_386_TLS_DESC_CALL:
12882       case BFD_RELOC_X86_64_TLSDESC_CALL:
12883         value = 0; /* Fully resolved at runtime.  No addend.  */
12884         S_SET_THREAD_LOCAL (fixP->fx_addsy);
12885         fixP->fx_done = 0;
12886         return;
12887
12888       case BFD_RELOC_VTABLE_INHERIT:
12889       case BFD_RELOC_VTABLE_ENTRY:
12890         fixP->fx_done = 0;
12891         return;
12892
12893       default:
12894         break;
12895       }
12896 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
12897
12898   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
12899   if (!object_64bit)
12900     value = extend_to_32bit_address (value);
12901
12902   *valP = value;
12903 #endif /* !defined (TE_Mach)  */
12904
12905   /* Are we finished with this relocation now?  */
12906   if (fixP->fx_addsy == NULL)
12907     {
12908       fixP->fx_done = 1;
12909       switch (fixP->fx_r_type)
12910         {
12911         case BFD_RELOC_X86_64_32S:
12912           fixP->fx_signed = 1;
12913           break;
12914
12915         default:
12916           break;
12917         }
12918     }
12919 #if defined (OBJ_COFF) && defined (TE_PE)
12920   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
12921     {
12922       fixP->fx_done = 0;
12923       /* Remember value for tc_gen_reloc.  */
12924       fixP->fx_addnumber = value;
12925       /* Clear out the frag for now.  */
12926       value = 0;
12927     }
12928 #endif
12929   else if (use_rela_relocations)
12930     {
12931       if (!disallow_64bit_reloc || fixP->fx_r_type == NO_RELOC)
12932         fixP->fx_no_overflow = 1;
12933       /* Remember value for tc_gen_reloc.  */
12934       fixP->fx_addnumber = value;
12935       value = 0;
12936     }
12937
12938   md_number_to_chars (p, value, fixP->fx_size);
12939 }
12940 \f
12941 const char *
12942 md_atof (int type, char *litP, int *sizeP)
12943 {
12944   /* This outputs the LITTLENUMs in REVERSE order;
12945      in accord with the bigendian 386.  */
12946   return ieee_md_atof (type, litP, sizeP, false);
12947 }
12948 \f
12949 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
12950
12951 static char *
12952 output_invalid (int c)
12953 {
12954   if (ISPRINT (c))
12955     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12956               "'%c'", c);
12957   else
12958     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
12959               "(0x%x)", (unsigned char) c);
12960   return output_invalid_buf;
12961 }
12962
12963 /* Verify that @r can be used in the current context.  */
12964
12965 static bool check_register (const reg_entry *r)
12966 {
12967   if (allow_pseudo_reg)
12968     return true;
12969
12970   if (operand_type_all_zero (&r->reg_type))
12971     return false;
12972
12973   if ((r->reg_type.bitfield.dword
12974        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
12975        || r->reg_type.bitfield.class == RegCR
12976        || r->reg_type.bitfield.class == RegDR)
12977       && !cpu_arch_flags.bitfield.cpui386)
12978     return false;
12979
12980   if (r->reg_type.bitfield.class == RegTR
12981       && (flag_code == CODE_64BIT
12982           || !cpu_arch_flags.bitfield.cpui386
12983           || cpu_arch_isa_flags.bitfield.cpui586
12984           || cpu_arch_isa_flags.bitfield.cpui686))
12985     return false;
12986
12987   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
12988     return false;
12989
12990   if (!cpu_arch_flags.bitfield.cpuavx512f)
12991     {
12992       if (r->reg_type.bitfield.zmmword
12993           || r->reg_type.bitfield.class == RegMask)
12994         return false;
12995
12996       if (!cpu_arch_flags.bitfield.cpuavx)
12997         {
12998           if (r->reg_type.bitfield.ymmword)
12999             return false;
13000
13001           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
13002             return false;
13003         }
13004     }
13005
13006   if (r->reg_type.bitfield.tmmword
13007       && (!cpu_arch_flags.bitfield.cpuamx_tile
13008           || flag_code != CODE_64BIT))
13009     return false;
13010
13011   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
13012     return false;
13013
13014   /* Don't allow fake index register unless allow_index_reg isn't 0. */
13015   if (!allow_index_reg && r->reg_num == RegIZ)
13016     return false;
13017
13018   /* Upper 16 vector registers are only available with VREX in 64bit
13019      mode, and require EVEX encoding.  */
13020   if (r->reg_flags & RegVRex)
13021     {
13022       if (!cpu_arch_flags.bitfield.cpuavx512f
13023           || flag_code != CODE_64BIT)
13024         return false;
13025
13026       if (i.vec_encoding == vex_encoding_default)
13027         i.vec_encoding = vex_encoding_evex;
13028       else if (i.vec_encoding != vex_encoding_evex)
13029         i.vec_encoding = vex_encoding_error;
13030     }
13031
13032   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
13033       && (!cpu_arch_flags.bitfield.cpulm || r->reg_type.bitfield.class != RegCR)
13034       && flag_code != CODE_64BIT)
13035     return false;
13036
13037   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
13038       && !intel_syntax)
13039     return false;
13040
13041   return true;
13042 }
13043
13044 /* REG_STRING starts *before* REGISTER_PREFIX.  */
13045
13046 static const reg_entry *
13047 parse_real_register (char *reg_string, char **end_op)
13048 {
13049   char *s = reg_string;
13050   char *p;
13051   char reg_name_given[MAX_REG_NAME_SIZE + 1];
13052   const reg_entry *r;
13053
13054   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
13055   if (*s == REGISTER_PREFIX)
13056     ++s;
13057
13058   if (is_space_char (*s))
13059     ++s;
13060
13061   p = reg_name_given;
13062   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
13063     {
13064       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
13065         return (const reg_entry *) NULL;
13066       s++;
13067     }
13068
13069   /* For naked regs, make sure that we are not dealing with an identifier.
13070      This prevents confusing an identifier like `eax_var' with register
13071      `eax'.  */
13072   if (allow_naked_reg && identifier_chars[(unsigned char) *s])
13073     return (const reg_entry *) NULL;
13074
13075   *end_op = s;
13076
13077   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
13078
13079   /* Handle floating point regs, allowing spaces in the (i) part.  */
13080   if (r == reg_st0)
13081     {
13082       if (!cpu_arch_flags.bitfield.cpu8087
13083           && !cpu_arch_flags.bitfield.cpu287
13084           && !cpu_arch_flags.bitfield.cpu387
13085           && !allow_pseudo_reg)
13086         return (const reg_entry *) NULL;
13087
13088       if (is_space_char (*s))
13089         ++s;
13090       if (*s == '(')
13091         {
13092           ++s;
13093           if (is_space_char (*s))
13094             ++s;
13095           if (*s >= '0' && *s <= '7')
13096             {
13097               int fpr = *s - '0';
13098               ++s;
13099               if (is_space_char (*s))
13100                 ++s;
13101               if (*s == ')')
13102                 {
13103                   *end_op = s + 1;
13104                   know (r[fpr].reg_num == fpr);
13105                   return r + fpr;
13106                 }
13107             }
13108           /* We have "%st(" then garbage.  */
13109           return (const reg_entry *) NULL;
13110         }
13111     }
13112
13113   return r && check_register (r) ? r : NULL;
13114 }
13115
13116 /* REG_STRING starts *before* REGISTER_PREFIX.  */
13117
13118 static const reg_entry *
13119 parse_register (char *reg_string, char **end_op)
13120 {
13121   const reg_entry *r;
13122
13123   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
13124     r = parse_real_register (reg_string, end_op);
13125   else
13126     r = NULL;
13127   if (!r)
13128     {
13129       char *save = input_line_pointer;
13130       char c;
13131       symbolS *symbolP;
13132
13133       input_line_pointer = reg_string;
13134       c = get_symbol_name (&reg_string);
13135       symbolP = symbol_find (reg_string);
13136       while (symbolP && S_GET_SEGMENT (symbolP) != reg_section)
13137         {
13138           const expressionS *e = symbol_get_value_expression(symbolP);
13139
13140           if (e->X_op != O_symbol || e->X_add_number)
13141             break;
13142           symbolP = e->X_add_symbol;
13143         }
13144       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
13145         {
13146           const expressionS *e = symbol_get_value_expression (symbolP);
13147
13148           know (e->X_op == O_register);
13149           know (e->X_add_number >= 0
13150                 && (valueT) e->X_add_number < i386_regtab_size);
13151           r = i386_regtab + e->X_add_number;
13152           if (!check_register (r))
13153             {
13154               as_bad (_("register '%s%s' cannot be used here"),
13155                       register_prefix, r->reg_name);
13156               r = &bad_reg;
13157             }
13158           *end_op = input_line_pointer;
13159         }
13160       *input_line_pointer = c;
13161       input_line_pointer = save;
13162     }
13163   return r;
13164 }
13165
13166 int
13167 i386_parse_name (char *name, expressionS *e, char *nextcharP)
13168 {
13169   const reg_entry *r = NULL;
13170   char *end = input_line_pointer;
13171
13172   *end = *nextcharP;
13173   if (*name == REGISTER_PREFIX || allow_naked_reg)
13174     r = parse_real_register (name, &input_line_pointer);
13175   if (r && end <= input_line_pointer)
13176     {
13177       *nextcharP = *input_line_pointer;
13178       *input_line_pointer = 0;
13179       if (r != &bad_reg)
13180         {
13181           e->X_op = O_register;
13182           e->X_add_number = r - i386_regtab;
13183         }
13184       else
13185           e->X_op = O_illegal;
13186       return 1;
13187     }
13188   input_line_pointer = end;
13189   *end = 0;
13190   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
13191 }
13192
13193 void
13194 md_operand (expressionS *e)
13195 {
13196   char *end;
13197   const reg_entry *r;
13198
13199   switch (*input_line_pointer)
13200     {
13201     case REGISTER_PREFIX:
13202       r = parse_real_register (input_line_pointer, &end);
13203       if (r)
13204         {
13205           e->X_op = O_register;
13206           e->X_add_number = r - i386_regtab;
13207           input_line_pointer = end;
13208         }
13209       break;
13210
13211     case '[':
13212       gas_assert (intel_syntax);
13213       end = input_line_pointer++;
13214       expression (e);
13215       if (*input_line_pointer == ']')
13216         {
13217           ++input_line_pointer;
13218           e->X_op_symbol = make_expr_symbol (e);
13219           e->X_add_symbol = NULL;
13220           e->X_add_number = 0;
13221           e->X_op = O_index;
13222         }
13223       else
13224         {
13225           e->X_op = O_absent;
13226           input_line_pointer = end;
13227         }
13228       break;
13229     }
13230 }
13231
13232 \f
13233 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13234 const char *md_shortopts = "kVQ:sqnO::";
13235 #else
13236 const char *md_shortopts = "qnO::";
13237 #endif
13238
13239 #define OPTION_32 (OPTION_MD_BASE + 0)
13240 #define OPTION_64 (OPTION_MD_BASE + 1)
13241 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
13242 #define OPTION_MARCH (OPTION_MD_BASE + 3)
13243 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
13244 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
13245 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
13246 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
13247 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
13248 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
13249 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
13250 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
13251 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
13252 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
13253 #define OPTION_X32 (OPTION_MD_BASE + 14)
13254 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
13255 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
13256 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
13257 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
13258 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
13259 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
13260 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
13261 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
13262 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
13263 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
13264 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
13265 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
13266 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
13267 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
13268 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
13269 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
13270 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
13271 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
13272 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
13273 #define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34)
13274
13275 struct option md_longopts[] =
13276 {
13277   {"32", no_argument, NULL, OPTION_32},
13278 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13279      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13280   {"64", no_argument, NULL, OPTION_64},
13281 #endif
13282 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13283   {"x32", no_argument, NULL, OPTION_X32},
13284   {"mshared", no_argument, NULL, OPTION_MSHARED},
13285   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
13286 #endif
13287   {"divide", no_argument, NULL, OPTION_DIVIDE},
13288   {"march", required_argument, NULL, OPTION_MARCH},
13289   {"mtune", required_argument, NULL, OPTION_MTUNE},
13290   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
13291   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
13292   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
13293   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
13294   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
13295   {"muse-unaligned-vector-move", no_argument, NULL, OPTION_MUSE_UNALIGNED_VECTOR_MOVE},
13296   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
13297   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
13298   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
13299   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
13300   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
13301   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
13302   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
13303 # if defined (TE_PE) || defined (TE_PEP)
13304   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
13305 #endif
13306   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
13307   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
13308   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
13309   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
13310   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
13311   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
13312   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
13313   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
13314   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
13315   {"mlfence-before-indirect-branch", required_argument, NULL,
13316    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
13317   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
13318   {"mamd64", no_argument, NULL, OPTION_MAMD64},
13319   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
13320   {NULL, no_argument, NULL, 0}
13321 };
13322 size_t md_longopts_size = sizeof (md_longopts);
13323
13324 int
13325 md_parse_option (int c, const char *arg)
13326 {
13327   unsigned int j;
13328   char *arch, *next, *saved, *type;
13329
13330   switch (c)
13331     {
13332     case 'n':
13333       optimize_align_code = 0;
13334       break;
13335
13336     case 'q':
13337       quiet_warnings = 1;
13338       break;
13339
13340 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13341       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
13342          should be emitted or not.  FIXME: Not implemented.  */
13343     case 'Q':
13344       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
13345         return 0;
13346       break;
13347
13348       /* -V: SVR4 argument to print version ID.  */
13349     case 'V':
13350       print_version_id ();
13351       break;
13352
13353       /* -k: Ignore for FreeBSD compatibility.  */
13354     case 'k':
13355       break;
13356
13357     case 's':
13358       /* -s: On i386 Solaris, this tells the native assembler to use
13359          .stab instead of .stab.excl.  We always use .stab anyhow.  */
13360       break;
13361
13362     case OPTION_MSHARED:
13363       shared = 1;
13364       break;
13365
13366     case OPTION_X86_USED_NOTE:
13367       if (strcasecmp (arg, "yes") == 0)
13368         x86_used_note = 1;
13369       else if (strcasecmp (arg, "no") == 0)
13370         x86_used_note = 0;
13371       else
13372         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
13373       break;
13374
13375
13376 #endif
13377 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
13378      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
13379     case OPTION_64:
13380       {
13381         const char **list, **l;
13382
13383         list = bfd_target_list ();
13384         for (l = list; *l != NULL; l++)
13385           if (startswith (*l, "elf64-x86-64")
13386               || strcmp (*l, "coff-x86-64") == 0
13387               || strcmp (*l, "pe-x86-64") == 0
13388               || strcmp (*l, "pei-x86-64") == 0
13389               || strcmp (*l, "mach-o-x86-64") == 0)
13390             {
13391               default_arch = "x86_64";
13392               break;
13393             }
13394         if (*l == NULL)
13395           as_fatal (_("no compiled in support for x86_64"));
13396         free (list);
13397       }
13398       break;
13399 #endif
13400
13401 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13402     case OPTION_X32:
13403       if (IS_ELF)
13404         {
13405           const char **list, **l;
13406
13407           list = bfd_target_list ();
13408           for (l = list; *l != NULL; l++)
13409             if (startswith (*l, "elf32-x86-64"))
13410               {
13411                 default_arch = "x86_64:32";
13412                 break;
13413               }
13414           if (*l == NULL)
13415             as_fatal (_("no compiled in support for 32bit x86_64"));
13416           free (list);
13417         }
13418       else
13419         as_fatal (_("32bit x86_64 is only supported for ELF"));
13420       break;
13421 #endif
13422
13423     case OPTION_32:
13424       default_arch = "i386";
13425       break;
13426
13427     case OPTION_DIVIDE:
13428 #ifdef SVR4_COMMENT_CHARS
13429       {
13430         char *n, *t;
13431         const char *s;
13432
13433         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
13434         t = n;
13435         for (s = i386_comment_chars; *s != '\0'; s++)
13436           if (*s != '/')
13437             *t++ = *s;
13438         *t = '\0';
13439         i386_comment_chars = n;
13440       }
13441 #endif
13442       break;
13443
13444     case OPTION_MARCH:
13445       saved = xstrdup (arg);
13446       arch = saved;
13447       /* Allow -march=+nosse.  */
13448       if (*arch == '+')
13449         arch++;
13450       do
13451         {
13452           if (*arch == '.')
13453             as_fatal (_("invalid -march= option: `%s'"), arg);
13454           next = strchr (arch, '+');
13455           if (next)
13456             *next++ = '\0';
13457           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13458             {
13459               if (arch == saved && cpu_arch[j].type != PROCESSOR_NONE
13460                   && strcmp (arch, cpu_arch[j].name) == 0)
13461                 {
13462                   /* Processor.  */
13463                   if (! cpu_arch[j].enable.bitfield.cpui386)
13464                     continue;
13465
13466                   cpu_arch_name = cpu_arch[j].name;
13467                   free (cpu_sub_arch_name);
13468                   cpu_sub_arch_name = NULL;
13469                   cpu_arch_flags = cpu_arch[j].enable;
13470                   cpu_arch_isa = cpu_arch[j].type;
13471                   cpu_arch_isa_flags = cpu_arch[j].enable;
13472                   if (!cpu_arch_tune_set)
13473                     {
13474                       cpu_arch_tune = cpu_arch_isa;
13475                       cpu_arch_tune_flags = cpu_arch_isa_flags;
13476                     }
13477                   break;
13478                 }
13479               else if (cpu_arch[j].type == PROCESSOR_NONE
13480                        && strcmp (arch, cpu_arch[j].name) == 0
13481                        && !cpu_flags_all_zero (&cpu_arch[j].enable))
13482                 {
13483                   /* ISA extension.  */
13484                   i386_cpu_flags flags;
13485
13486                   flags = cpu_flags_or (cpu_arch_flags,
13487                                         cpu_arch[j].enable);
13488
13489                   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13490                     {
13491                       extend_cpu_sub_arch_name (arch);
13492                       cpu_arch_flags = flags;
13493                       cpu_arch_isa_flags = flags;
13494                     }
13495                   else
13496                     cpu_arch_isa_flags
13497                       = cpu_flags_or (cpu_arch_isa_flags,
13498                                       cpu_arch[j].enable);
13499                   break;
13500                 }
13501             }
13502
13503           if (j >= ARRAY_SIZE (cpu_arch) && startswith (arch, "no"))
13504             {
13505               /* Disable an ISA extension.  */
13506               for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13507                 if (cpu_arch[j].type == PROCESSOR_NONE
13508                     && strcmp (arch + 2, cpu_arch[j].name) == 0)
13509                   {
13510                     i386_cpu_flags flags;
13511
13512                     flags = cpu_flags_and_not (cpu_arch_flags,
13513                                                cpu_arch[j].disable);
13514                     if (!cpu_flags_equal (&flags, &cpu_arch_flags))
13515                       {
13516                         extend_cpu_sub_arch_name (arch);
13517                         cpu_arch_flags = flags;
13518                         cpu_arch_isa_flags = flags;
13519                       }
13520                     break;
13521                   }
13522             }
13523
13524           if (j >= ARRAY_SIZE (cpu_arch))
13525             as_fatal (_("invalid -march= option: `%s'"), arg);
13526
13527           arch = next;
13528         }
13529       while (next != NULL);
13530       free (saved);
13531       break;
13532
13533     case OPTION_MTUNE:
13534       if (*arg == '.')
13535         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13536       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13537         {
13538           if (cpu_arch[j].type != PROCESSOR_NONE
13539               && strcmp (arg, cpu_arch[j].name) == 0)
13540             {
13541               cpu_arch_tune_set = 1;
13542               cpu_arch_tune = cpu_arch [j].type;
13543               cpu_arch_tune_flags = cpu_arch[j].enable;
13544               break;
13545             }
13546         }
13547       if (j >= ARRAY_SIZE (cpu_arch))
13548         as_fatal (_("invalid -mtune= option: `%s'"), arg);
13549       break;
13550
13551     case OPTION_MMNEMONIC:
13552       if (strcasecmp (arg, "att") == 0)
13553         intel_mnemonic = 0;
13554       else if (strcasecmp (arg, "intel") == 0)
13555         intel_mnemonic = 1;
13556       else
13557         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
13558       break;
13559
13560     case OPTION_MSYNTAX:
13561       if (strcasecmp (arg, "att") == 0)
13562         intel_syntax = 0;
13563       else if (strcasecmp (arg, "intel") == 0)
13564         intel_syntax = 1;
13565       else
13566         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
13567       break;
13568
13569     case OPTION_MINDEX_REG:
13570       allow_index_reg = 1;
13571       break;
13572
13573     case OPTION_MNAKED_REG:
13574       allow_naked_reg = 1;
13575       break;
13576
13577     case OPTION_MSSE2AVX:
13578       sse2avx = 1;
13579       break;
13580
13581     case OPTION_MUSE_UNALIGNED_VECTOR_MOVE:
13582       use_unaligned_vector_move = 1;
13583       break;
13584
13585     case OPTION_MSSE_CHECK:
13586       if (strcasecmp (arg, "error") == 0)
13587         sse_check = check_error;
13588       else if (strcasecmp (arg, "warning") == 0)
13589         sse_check = check_warning;
13590       else if (strcasecmp (arg, "none") == 0)
13591         sse_check = check_none;
13592       else
13593         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
13594       break;
13595
13596     case OPTION_MOPERAND_CHECK:
13597       if (strcasecmp (arg, "error") == 0)
13598         operand_check = check_error;
13599       else if (strcasecmp (arg, "warning") == 0)
13600         operand_check = check_warning;
13601       else if (strcasecmp (arg, "none") == 0)
13602         operand_check = check_none;
13603       else
13604         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
13605       break;
13606
13607     case OPTION_MAVXSCALAR:
13608       if (strcasecmp (arg, "128") == 0)
13609         avxscalar = vex128;
13610       else if (strcasecmp (arg, "256") == 0)
13611         avxscalar = vex256;
13612       else
13613         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
13614       break;
13615
13616     case OPTION_MVEXWIG:
13617       if (strcmp (arg, "0") == 0)
13618         vexwig = vexw0;
13619       else if (strcmp (arg, "1") == 0)
13620         vexwig = vexw1;
13621       else
13622         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
13623       break;
13624
13625     case OPTION_MADD_BND_PREFIX:
13626       add_bnd_prefix = 1;
13627       break;
13628
13629     case OPTION_MEVEXLIG:
13630       if (strcmp (arg, "128") == 0)
13631         evexlig = evexl128;
13632       else if (strcmp (arg, "256") == 0)
13633         evexlig = evexl256;
13634       else  if (strcmp (arg, "512") == 0)
13635         evexlig = evexl512;
13636       else
13637         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
13638       break;
13639
13640     case OPTION_MEVEXRCIG:
13641       if (strcmp (arg, "rne") == 0)
13642         evexrcig = rne;
13643       else if (strcmp (arg, "rd") == 0)
13644         evexrcig = rd;
13645       else if (strcmp (arg, "ru") == 0)
13646         evexrcig = ru;
13647       else if (strcmp (arg, "rz") == 0)
13648         evexrcig = rz;
13649       else
13650         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
13651       break;
13652
13653     case OPTION_MEVEXWIG:
13654       if (strcmp (arg, "0") == 0)
13655         evexwig = evexw0;
13656       else if (strcmp (arg, "1") == 0)
13657         evexwig = evexw1;
13658       else
13659         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
13660       break;
13661
13662 # if defined (TE_PE) || defined (TE_PEP)
13663     case OPTION_MBIG_OBJ:
13664       use_big_obj = 1;
13665       break;
13666 #endif
13667
13668     case OPTION_MOMIT_LOCK_PREFIX:
13669       if (strcasecmp (arg, "yes") == 0)
13670         omit_lock_prefix = 1;
13671       else if (strcasecmp (arg, "no") == 0)
13672         omit_lock_prefix = 0;
13673       else
13674         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
13675       break;
13676
13677     case OPTION_MFENCE_AS_LOCK_ADD:
13678       if (strcasecmp (arg, "yes") == 0)
13679         avoid_fence = 1;
13680       else if (strcasecmp (arg, "no") == 0)
13681         avoid_fence = 0;
13682       else
13683         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
13684       break;
13685
13686     case OPTION_MLFENCE_AFTER_LOAD:
13687       if (strcasecmp (arg, "yes") == 0)
13688         lfence_after_load = 1;
13689       else if (strcasecmp (arg, "no") == 0)
13690         lfence_after_load = 0;
13691       else
13692         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
13693       break;
13694
13695     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
13696       if (strcasecmp (arg, "all") == 0)
13697         {
13698           lfence_before_indirect_branch = lfence_branch_all;
13699           if (lfence_before_ret == lfence_before_ret_none)
13700             lfence_before_ret = lfence_before_ret_shl;
13701         }
13702       else if (strcasecmp (arg, "memory") == 0)
13703         lfence_before_indirect_branch = lfence_branch_memory;
13704       else if (strcasecmp (arg, "register") == 0)
13705         lfence_before_indirect_branch = lfence_branch_register;
13706       else if (strcasecmp (arg, "none") == 0)
13707         lfence_before_indirect_branch = lfence_branch_none;
13708       else
13709         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
13710                   arg);
13711       break;
13712
13713     case OPTION_MLFENCE_BEFORE_RET:
13714       if (strcasecmp (arg, "or") == 0)
13715         lfence_before_ret = lfence_before_ret_or;
13716       else if (strcasecmp (arg, "not") == 0)
13717         lfence_before_ret = lfence_before_ret_not;
13718       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
13719         lfence_before_ret = lfence_before_ret_shl;
13720       else if (strcasecmp (arg, "none") == 0)
13721         lfence_before_ret = lfence_before_ret_none;
13722       else
13723         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
13724                   arg);
13725       break;
13726
13727     case OPTION_MRELAX_RELOCATIONS:
13728       if (strcasecmp (arg, "yes") == 0)
13729         generate_relax_relocations = 1;
13730       else if (strcasecmp (arg, "no") == 0)
13731         generate_relax_relocations = 0;
13732       else
13733         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
13734       break;
13735
13736     case OPTION_MALIGN_BRANCH_BOUNDARY:
13737       {
13738         char *end;
13739         long int align = strtoul (arg, &end, 0);
13740         if (*end == '\0')
13741           {
13742             if (align == 0)
13743               {
13744                 align_branch_power = 0;
13745                 break;
13746               }
13747             else if (align >= 16)
13748               {
13749                 int align_power;
13750                 for (align_power = 0;
13751                      (align & 1) == 0;
13752                      align >>= 1, align_power++)
13753                   continue;
13754                 /* Limit alignment power to 31.  */
13755                 if (align == 1 && align_power < 32)
13756                   {
13757                     align_branch_power = align_power;
13758                     break;
13759                   }
13760               }
13761           }
13762         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
13763       }
13764       break;
13765
13766     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
13767       {
13768         char *end;
13769         int align = strtoul (arg, &end, 0);
13770         /* Some processors only support 5 prefixes.  */
13771         if (*end == '\0' && align >= 0 && align < 6)
13772           {
13773             align_branch_prefix_size = align;
13774             break;
13775           }
13776         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
13777                   arg);
13778       }
13779       break;
13780
13781     case OPTION_MALIGN_BRANCH:
13782       align_branch = 0;
13783       saved = xstrdup (arg);
13784       type = saved;
13785       do
13786         {
13787           next = strchr (type, '+');
13788           if (next)
13789             *next++ = '\0';
13790           if (strcasecmp (type, "jcc") == 0)
13791             align_branch |= align_branch_jcc_bit;
13792           else if (strcasecmp (type, "fused") == 0)
13793             align_branch |= align_branch_fused_bit;
13794           else if (strcasecmp (type, "jmp") == 0)
13795             align_branch |= align_branch_jmp_bit;
13796           else if (strcasecmp (type, "call") == 0)
13797             align_branch |= align_branch_call_bit;
13798           else if (strcasecmp (type, "ret") == 0)
13799             align_branch |= align_branch_ret_bit;
13800           else if (strcasecmp (type, "indirect") == 0)
13801             align_branch |= align_branch_indirect_bit;
13802           else
13803             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
13804           type = next;
13805         }
13806       while (next != NULL);
13807       free (saved);
13808       break;
13809
13810     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
13811       align_branch_power = 5;
13812       align_branch_prefix_size = 5;
13813       align_branch = (align_branch_jcc_bit
13814                       | align_branch_fused_bit
13815                       | align_branch_jmp_bit);
13816       break;
13817
13818     case OPTION_MAMD64:
13819       isa64 = amd64;
13820       break;
13821
13822     case OPTION_MINTEL64:
13823       isa64 = intel64;
13824       break;
13825
13826     case 'O':
13827       if (arg == NULL)
13828         {
13829           optimize = 1;
13830           /* Turn off -Os.  */
13831           optimize_for_space = 0;
13832         }
13833       else if (*arg == 's')
13834         {
13835           optimize_for_space = 1;
13836           /* Turn on all encoding optimizations.  */
13837           optimize = INT_MAX;
13838         }
13839       else
13840         {
13841           optimize = atoi (arg);
13842           /* Turn off -Os.  */
13843           optimize_for_space = 0;
13844         }
13845       break;
13846
13847     default:
13848       return 0;
13849     }
13850   return 1;
13851 }
13852
13853 #define MESSAGE_TEMPLATE \
13854 "                                                                                "
13855
13856 static char *
13857 output_message (FILE *stream, char *p, char *message, char *start,
13858                 int *left_p, const char *name, int len)
13859 {
13860   int size = sizeof (MESSAGE_TEMPLATE);
13861   int left = *left_p;
13862
13863   /* Reserve 2 spaces for ", " or ",\0" */
13864   left -= len + 2;
13865
13866   /* Check if there is any room.  */
13867   if (left >= 0)
13868     {
13869       if (p != start)
13870         {
13871           *p++ = ',';
13872           *p++ = ' ';
13873         }
13874       p = mempcpy (p, name, len);
13875     }
13876   else
13877     {
13878       /* Output the current message now and start a new one.  */
13879       *p++ = ',';
13880       *p = '\0';
13881       fprintf (stream, "%s\n", message);
13882       p = start;
13883       left = size - (start - message) - len - 2;
13884
13885       gas_assert (left >= 0);
13886
13887       p = mempcpy (p, name, len);
13888     }
13889
13890   *left_p = left;
13891   return p;
13892 }
13893
13894 static void
13895 show_arch (FILE *stream, int ext, int check)
13896 {
13897   static char message[] = MESSAGE_TEMPLATE;
13898   char *start = message + 27;
13899   char *p;
13900   int size = sizeof (MESSAGE_TEMPLATE);
13901   int left;
13902   const char *name;
13903   int len;
13904   unsigned int j;
13905
13906   p = start;
13907   left = size - (start - message);
13908
13909   if (!ext && check)
13910     {
13911       p = output_message (stream, p, message, start, &left,
13912                           STRING_COMMA_LEN ("default"));
13913       p = output_message (stream, p, message, start, &left,
13914                           STRING_COMMA_LEN ("push"));
13915       p = output_message (stream, p, message, start, &left,
13916                           STRING_COMMA_LEN ("pop"));
13917     }
13918
13919   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13920     {
13921       /* Should it be skipped?  */
13922       if (cpu_arch [j].skip)
13923         continue;
13924
13925       name = cpu_arch [j].name;
13926       len = cpu_arch [j].len;
13927       if (cpu_arch[j].type == PROCESSOR_NONE)
13928         {
13929           /* It is an extension.  Skip if we aren't asked to show it.  */
13930           if (!ext || cpu_flags_all_zero (&cpu_arch[j].enable))
13931             continue;
13932         }
13933       else if (ext)
13934         {
13935           /* It is an processor.  Skip if we show only extension.  */
13936           continue;
13937         }
13938       else if (check && ! cpu_arch[j].enable.bitfield.cpui386)
13939         {
13940           /* It is an impossible processor - skip.  */
13941           continue;
13942         }
13943
13944       p = output_message (stream, p, message, start, &left, name, len);
13945     }
13946
13947   /* Display disabled extensions.  */
13948   if (ext)
13949     for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
13950       {
13951         char *str;
13952
13953         if (cpu_arch[j].type != PROCESSOR_NONE
13954             || !cpu_flags_all_zero (&cpu_arch[j].enable))
13955           continue;
13956         str = xasprintf ("no%s", cpu_arch[j].name);
13957         p = output_message (stream, p, message, start, &left, str,
13958                             strlen (str));
13959         free (str);
13960       }
13961
13962   *p = '\0';
13963   fprintf (stream, "%s\n", message);
13964 }
13965
13966 void
13967 md_show_usage (FILE *stream)
13968 {
13969 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13970   fprintf (stream, _("\
13971   -Qy, -Qn                ignored\n\
13972   -V                      print assembler version number\n\
13973   -k                      ignored\n"));
13974 #endif
13975   fprintf (stream, _("\
13976   -n                      do not optimize code alignment\n\
13977   -O{012s}                attempt some code optimizations\n\
13978   -q                      quieten some warnings\n"));
13979 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13980   fprintf (stream, _("\
13981   -s                      ignored\n"));
13982 #endif
13983 #ifdef BFD64
13984 # if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13985   fprintf (stream, _("\
13986   --32/--64/--x32         generate 32bit/64bit/x32 object\n"));
13987 # elif defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O)
13988   fprintf (stream, _("\
13989   --32/--64               generate 32bit/64bit object\n"));
13990 # endif
13991 #endif
13992 #ifdef SVR4_COMMENT_CHARS
13993   fprintf (stream, _("\
13994   --divide                do not treat `/' as a comment character\n"));
13995 #else
13996   fprintf (stream, _("\
13997   --divide                ignored\n"));
13998 #endif
13999   fprintf (stream, _("\
14000   -march=CPU[,+EXTENSION...]\n\
14001                           generate code for CPU and EXTENSION, CPU is one of:\n"));
14002   show_arch (stream, 0, 1);
14003   fprintf (stream, _("\
14004                           EXTENSION is combination of (possibly \"no\"-prefixed):\n"));
14005   show_arch (stream, 1, 0);
14006   fprintf (stream, _("\
14007   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
14008   show_arch (stream, 0, 0);
14009   fprintf (stream, _("\
14010   -msse2avx               encode SSE instructions with VEX prefix\n"));
14011   fprintf (stream, _("\
14012   -muse-unaligned-vector-move\n\
14013                           encode aligned vector move as unaligned vector move\n"));
14014   fprintf (stream, _("\
14015   -msse-check=[none|error|warning] (default: warning)\n\
14016                           check SSE instructions\n"));
14017   fprintf (stream, _("\
14018   -moperand-check=[none|error|warning] (default: warning)\n\
14019                           check operand combinations for validity\n"));
14020   fprintf (stream, _("\
14021   -mavxscalar=[128|256] (default: 128)\n\
14022                           encode scalar AVX instructions with specific vector\n\
14023                            length\n"));
14024   fprintf (stream, _("\
14025   -mvexwig=[0|1] (default: 0)\n\
14026                           encode VEX instructions with specific VEX.W value\n\
14027                            for VEX.W bit ignored instructions\n"));
14028   fprintf (stream, _("\
14029   -mevexlig=[128|256|512] (default: 128)\n\
14030                           encode scalar EVEX instructions with specific vector\n\
14031                            length\n"));
14032   fprintf (stream, _("\
14033   -mevexwig=[0|1] (default: 0)\n\
14034                           encode EVEX instructions with specific EVEX.W value\n\
14035                            for EVEX.W bit ignored instructions\n"));
14036   fprintf (stream, _("\
14037   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
14038                           encode EVEX instructions with specific EVEX.RC value\n\
14039                            for SAE-only ignored instructions\n"));
14040   fprintf (stream, _("\
14041   -mmnemonic=[att|intel] "));
14042   if (SYSV386_COMPAT)
14043     fprintf (stream, _("(default: att)\n"));
14044   else
14045     fprintf (stream, _("(default: intel)\n"));
14046   fprintf (stream, _("\
14047                           use AT&T/Intel mnemonic\n"));
14048   fprintf (stream, _("\
14049   -msyntax=[att|intel] (default: att)\n\
14050                           use AT&T/Intel syntax\n"));
14051   fprintf (stream, _("\
14052   -mindex-reg             support pseudo index registers\n"));
14053   fprintf (stream, _("\
14054   -mnaked-reg             don't require `%%' prefix for registers\n"));
14055   fprintf (stream, _("\
14056   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
14057 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14058   fprintf (stream, _("\
14059   -mshared                disable branch optimization for shared code\n"));
14060   fprintf (stream, _("\
14061   -mx86-used-note=[no|yes] "));
14062   if (DEFAULT_X86_USED_NOTE)
14063     fprintf (stream, _("(default: yes)\n"));
14064   else
14065     fprintf (stream, _("(default: no)\n"));
14066   fprintf (stream, _("\
14067                           generate x86 used ISA and feature properties\n"));
14068 #endif
14069 #if defined (TE_PE) || defined (TE_PEP)
14070   fprintf (stream, _("\
14071   -mbig-obj               generate big object files\n"));
14072 #endif
14073   fprintf (stream, _("\
14074   -momit-lock-prefix=[no|yes] (default: no)\n\
14075                           strip all lock prefixes\n"));
14076   fprintf (stream, _("\
14077   -mfence-as-lock-add=[no|yes] (default: no)\n\
14078                           encode lfence, mfence and sfence as\n\
14079                            lock addl $0x0, (%%{re}sp)\n"));
14080   fprintf (stream, _("\
14081   -mrelax-relocations=[no|yes] "));
14082   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
14083     fprintf (stream, _("(default: yes)\n"));
14084   else
14085     fprintf (stream, _("(default: no)\n"));
14086   fprintf (stream, _("\
14087                           generate relax relocations\n"));
14088   fprintf (stream, _("\
14089   -malign-branch-boundary=NUM (default: 0)\n\
14090                           align branches within NUM byte boundary\n"));
14091   fprintf (stream, _("\
14092   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
14093                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
14094                            indirect\n\
14095                           specify types of branches to align\n"));
14096   fprintf (stream, _("\
14097   -malign-branch-prefix-size=NUM (default: 5)\n\
14098                           align branches with NUM prefixes per instruction\n"));
14099   fprintf (stream, _("\
14100   -mbranches-within-32B-boundaries\n\
14101                           align branches within 32 byte boundary\n"));
14102   fprintf (stream, _("\
14103   -mlfence-after-load=[no|yes] (default: no)\n\
14104                           generate lfence after load\n"));
14105   fprintf (stream, _("\
14106   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
14107                           generate lfence before indirect near branch\n"));
14108   fprintf (stream, _("\
14109   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
14110                           generate lfence before ret\n"));
14111   fprintf (stream, _("\
14112   -mamd64                 accept only AMD64 ISA [default]\n"));
14113   fprintf (stream, _("\
14114   -mintel64               accept only Intel64 ISA\n"));
14115 }
14116
14117 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
14118      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
14119      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
14120
14121 /* Pick the target format to use.  */
14122
14123 const char *
14124 i386_target_format (void)
14125 {
14126   if (startswith (default_arch, "x86_64"))
14127     {
14128       update_code_flag (CODE_64BIT, 1);
14129       if (default_arch[6] == '\0')
14130         x86_elf_abi = X86_64_ABI;
14131       else
14132         x86_elf_abi = X86_64_X32_ABI;
14133     }
14134   else if (!strcmp (default_arch, "i386"))
14135     update_code_flag (CODE_32BIT, 1);
14136   else if (!strcmp (default_arch, "iamcu"))
14137     {
14138       update_code_flag (CODE_32BIT, 1);
14139       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
14140         {
14141           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
14142           cpu_arch_name = "iamcu";
14143           free (cpu_sub_arch_name);
14144           cpu_sub_arch_name = NULL;
14145           cpu_arch_flags = iamcu_flags;
14146           cpu_arch_isa = PROCESSOR_IAMCU;
14147           cpu_arch_isa_flags = iamcu_flags;
14148           if (!cpu_arch_tune_set)
14149             {
14150               cpu_arch_tune = cpu_arch_isa;
14151               cpu_arch_tune_flags = cpu_arch_isa_flags;
14152             }
14153         }
14154       else if (cpu_arch_isa != PROCESSOR_IAMCU)
14155         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
14156                   cpu_arch_name);
14157     }
14158   else
14159     as_fatal (_("unknown architecture"));
14160
14161   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
14162     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
14163   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
14164     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].enable;
14165
14166   switch (OUTPUT_FLAVOR)
14167     {
14168 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
14169     case bfd_target_aout_flavour:
14170       return AOUT_TARGET_FORMAT;
14171 #endif
14172 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
14173 # if defined (TE_PE) || defined (TE_PEP)
14174     case bfd_target_coff_flavour:
14175       if (flag_code == CODE_64BIT)
14176         {
14177           object_64bit = 1;
14178           return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
14179         }
14180       return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
14181 # elif defined (TE_GO32)
14182     case bfd_target_coff_flavour:
14183       return "coff-go32";
14184 # else
14185     case bfd_target_coff_flavour:
14186       return "coff-i386";
14187 # endif
14188 #endif
14189 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14190     case bfd_target_elf_flavour:
14191       {
14192         const char *format;
14193
14194         switch (x86_elf_abi)
14195           {
14196           default:
14197             format = ELF_TARGET_FORMAT;
14198 #ifndef TE_SOLARIS
14199             tls_get_addr = "___tls_get_addr";
14200 #endif
14201             break;
14202           case X86_64_ABI:
14203             use_rela_relocations = 1;
14204             object_64bit = 1;
14205 #ifndef TE_SOLARIS
14206             tls_get_addr = "__tls_get_addr";
14207 #endif
14208             format = ELF_TARGET_FORMAT64;
14209             break;
14210           case X86_64_X32_ABI:
14211             use_rela_relocations = 1;
14212             object_64bit = 1;
14213 #ifndef TE_SOLARIS
14214             tls_get_addr = "__tls_get_addr";
14215 #endif
14216             disallow_64bit_reloc = 1;
14217             format = ELF_TARGET_FORMAT32;
14218             break;
14219           }
14220         if (cpu_arch_isa == PROCESSOR_IAMCU)
14221           {
14222             if (x86_elf_abi != I386_ABI)
14223               as_fatal (_("Intel MCU is 32bit only"));
14224             return ELF_TARGET_IAMCU_FORMAT;
14225           }
14226         else
14227           return format;
14228       }
14229 #endif
14230 #if defined (OBJ_MACH_O)
14231     case bfd_target_mach_o_flavour:
14232       if (flag_code == CODE_64BIT)
14233         {
14234           use_rela_relocations = 1;
14235           object_64bit = 1;
14236           return "mach-o-x86-64";
14237         }
14238       else
14239         return "mach-o-i386";
14240 #endif
14241     default:
14242       abort ();
14243       return NULL;
14244     }
14245 }
14246
14247 #endif /* OBJ_MAYBE_ more than one  */
14248 \f
14249 symbolS *
14250 md_undefined_symbol (char *name)
14251 {
14252   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
14253       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
14254       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
14255       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
14256     {
14257       if (!GOT_symbol)
14258         {
14259           if (symbol_find (name))
14260             as_bad (_("GOT already in symbol table"));
14261           GOT_symbol = symbol_new (name, undefined_section,
14262                                    &zero_address_frag, 0);
14263         };
14264       return GOT_symbol;
14265     }
14266   return 0;
14267 }
14268
14269 /* Round up a section size to the appropriate boundary.  */
14270
14271 valueT
14272 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
14273 {
14274 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
14275   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
14276     {
14277       /* For a.out, force the section size to be aligned.  If we don't do
14278          this, BFD will align it for us, but it will not write out the
14279          final bytes of the section.  This may be a bug in BFD, but it is
14280          easier to fix it here since that is how the other a.out targets
14281          work.  */
14282       int align;
14283
14284       align = bfd_section_alignment (segment);
14285       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
14286     }
14287 #endif
14288
14289   return size;
14290 }
14291
14292 /* On the i386, PC-relative offsets are relative to the start of the
14293    next instruction.  That is, the address of the offset, plus its
14294    size, since the offset is always the last part of the insn.  */
14295
14296 long
14297 md_pcrel_from (fixS *fixP)
14298 {
14299   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
14300 }
14301
14302 #ifndef I386COFF
14303
14304 static void
14305 s_bss (int ignore ATTRIBUTE_UNUSED)
14306 {
14307   int temp;
14308
14309 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14310   if (IS_ELF)
14311     obj_elf_section_change_hook ();
14312 #endif
14313   temp = get_absolute_expression ();
14314   subseg_set (bss_section, (subsegT) temp);
14315   demand_empty_rest_of_line ();
14316 }
14317
14318 #endif
14319
14320 /* Remember constant directive.  */
14321
14322 void
14323 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
14324 {
14325   if (last_insn.kind != last_insn_directive
14326       && (bfd_section_flags (now_seg) & SEC_CODE))
14327     {
14328       last_insn.seg = now_seg;
14329       last_insn.kind = last_insn_directive;
14330       last_insn.name = "constant directive";
14331       last_insn.file = as_where (&last_insn.line);
14332       if (lfence_before_ret != lfence_before_ret_none)
14333         {
14334           if (lfence_before_indirect_branch != lfence_branch_none)
14335             as_warn (_("constant directive skips -mlfence-before-ret "
14336                        "and -mlfence-before-indirect-branch"));
14337           else
14338             as_warn (_("constant directive skips -mlfence-before-ret"));
14339         }
14340       else if (lfence_before_indirect_branch != lfence_branch_none)
14341         as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
14342     }
14343 }
14344
14345 int
14346 i386_validate_fix (fixS *fixp)
14347 {
14348   if (fixp->fx_addsy && S_GET_SEGMENT(fixp->fx_addsy) == reg_section)
14349     {
14350       reloc_howto_type *howto;
14351
14352       howto = bfd_reloc_type_lookup (stdoutput, fixp->fx_r_type);
14353       as_bad_where (fixp->fx_file, fixp->fx_line,
14354                     _("invalid %s relocation against register"),
14355                     howto ? howto->name : "<unknown>");
14356       return 0;
14357     }
14358
14359 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14360   if (fixp->fx_r_type == BFD_RELOC_SIZE32
14361       || fixp->fx_r_type == BFD_RELOC_SIZE64)
14362     return IS_ELF && fixp->fx_addsy
14363            && (!S_IS_DEFINED (fixp->fx_addsy)
14364                || S_IS_EXTERNAL (fixp->fx_addsy));
14365 #endif
14366
14367   if (fixp->fx_subsy)
14368     {
14369       if (fixp->fx_subsy == GOT_symbol)
14370         {
14371           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
14372             {
14373               if (!object_64bit)
14374                 abort ();
14375 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14376               if (fixp->fx_tcbit2)
14377                 fixp->fx_r_type = (fixp->fx_tcbit
14378                                    ? BFD_RELOC_X86_64_REX_GOTPCRELX
14379                                    : BFD_RELOC_X86_64_GOTPCRELX);
14380               else
14381 #endif
14382                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
14383             }
14384           else
14385             {
14386               if (!object_64bit)
14387                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
14388               else
14389                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
14390             }
14391           fixp->fx_subsy = 0;
14392         }
14393     }
14394 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14395   else
14396     {
14397       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
14398          to section.  Since PLT32 relocation must be against symbols,
14399          turn such PLT32 relocation into PC32 relocation.  */
14400       if (fixp->fx_addsy
14401           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
14402               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
14403           && symbol_section_p (fixp->fx_addsy))
14404         fixp->fx_r_type = BFD_RELOC_32_PCREL;
14405       if (!object_64bit)
14406         {
14407           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
14408               && fixp->fx_tcbit2)
14409             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
14410         }
14411     }
14412 #endif
14413
14414   return 1;
14415 }
14416
14417 arelent *
14418 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
14419 {
14420   arelent *rel;
14421   bfd_reloc_code_real_type code;
14422
14423   switch (fixp->fx_r_type)
14424     {
14425 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14426       symbolS *sym;
14427
14428     case BFD_RELOC_SIZE32:
14429     case BFD_RELOC_SIZE64:
14430       if (fixp->fx_addsy
14431           && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))
14432           && (!fixp->fx_subsy
14433               || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))))
14434         sym = fixp->fx_addsy;
14435       else if (fixp->fx_subsy
14436                && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))
14437                && (!fixp->fx_addsy
14438                    || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))))
14439         sym = fixp->fx_subsy;
14440       else
14441         sym = NULL;
14442       if (IS_ELF && sym && S_IS_DEFINED (sym) && !S_IS_EXTERNAL (sym))
14443         {
14444           /* Resolve size relocation against local symbol to size of
14445              the symbol plus addend.  */
14446           valueT value = S_GET_SIZE (sym);
14447
14448           if (symbol_get_bfdsym (sym)->flags & BSF_SECTION_SYM)
14449             value = bfd_section_size (S_GET_SEGMENT (sym));
14450           if (sym == fixp->fx_subsy)
14451             {
14452               value = -value;
14453               if (fixp->fx_addsy)
14454                 value += S_GET_VALUE (fixp->fx_addsy);
14455             }
14456           else if (fixp->fx_subsy)
14457             value -= S_GET_VALUE (fixp->fx_subsy);
14458           value += fixp->fx_offset;
14459           if (fixp->fx_r_type == BFD_RELOC_SIZE32
14460               && object_64bit
14461               && !fits_in_unsigned_long (value))
14462             as_bad_where (fixp->fx_file, fixp->fx_line,
14463                           _("symbol size computation overflow"));
14464           fixp->fx_addsy = NULL;
14465           fixp->fx_subsy = NULL;
14466           md_apply_fix (fixp, (valueT *) &value, NULL);
14467           return NULL;
14468         }
14469       if (!fixp->fx_addsy || fixp->fx_subsy)
14470         {
14471           as_bad_where (fixp->fx_file, fixp->fx_line,
14472                         "unsupported expression involving @size");
14473           return NULL;
14474         }
14475 #endif
14476       /* Fall through.  */
14477
14478     case BFD_RELOC_X86_64_PLT32:
14479     case BFD_RELOC_X86_64_GOT32:
14480     case BFD_RELOC_X86_64_GOTPCREL:
14481     case BFD_RELOC_X86_64_GOTPCRELX:
14482     case BFD_RELOC_X86_64_REX_GOTPCRELX:
14483     case BFD_RELOC_386_PLT32:
14484     case BFD_RELOC_386_GOT32:
14485     case BFD_RELOC_386_GOT32X:
14486     case BFD_RELOC_386_GOTOFF:
14487     case BFD_RELOC_386_GOTPC:
14488     case BFD_RELOC_386_TLS_GD:
14489     case BFD_RELOC_386_TLS_LDM:
14490     case BFD_RELOC_386_TLS_LDO_32:
14491     case BFD_RELOC_386_TLS_IE_32:
14492     case BFD_RELOC_386_TLS_IE:
14493     case BFD_RELOC_386_TLS_GOTIE:
14494     case BFD_RELOC_386_TLS_LE_32:
14495     case BFD_RELOC_386_TLS_LE:
14496     case BFD_RELOC_386_TLS_GOTDESC:
14497     case BFD_RELOC_386_TLS_DESC_CALL:
14498     case BFD_RELOC_X86_64_TLSGD:
14499     case BFD_RELOC_X86_64_TLSLD:
14500     case BFD_RELOC_X86_64_DTPOFF32:
14501     case BFD_RELOC_X86_64_DTPOFF64:
14502     case BFD_RELOC_X86_64_GOTTPOFF:
14503     case BFD_RELOC_X86_64_TPOFF32:
14504     case BFD_RELOC_X86_64_TPOFF64:
14505     case BFD_RELOC_X86_64_GOTOFF64:
14506     case BFD_RELOC_X86_64_GOTPC32:
14507     case BFD_RELOC_X86_64_GOT64:
14508     case BFD_RELOC_X86_64_GOTPCREL64:
14509     case BFD_RELOC_X86_64_GOTPC64:
14510     case BFD_RELOC_X86_64_GOTPLT64:
14511     case BFD_RELOC_X86_64_PLTOFF64:
14512     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14513     case BFD_RELOC_X86_64_TLSDESC_CALL:
14514     case BFD_RELOC_RVA:
14515     case BFD_RELOC_VTABLE_ENTRY:
14516     case BFD_RELOC_VTABLE_INHERIT:
14517 #ifdef TE_PE
14518     case BFD_RELOC_32_SECREL:
14519     case BFD_RELOC_16_SECIDX:
14520 #endif
14521       code = fixp->fx_r_type;
14522       break;
14523     case BFD_RELOC_X86_64_32S:
14524       if (!fixp->fx_pcrel)
14525         {
14526           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
14527           code = fixp->fx_r_type;
14528           break;
14529         }
14530       /* Fall through.  */
14531     default:
14532       if (fixp->fx_pcrel)
14533         {
14534           switch (fixp->fx_size)
14535             {
14536             default:
14537               as_bad_where (fixp->fx_file, fixp->fx_line,
14538                             _("can not do %d byte pc-relative relocation"),
14539                             fixp->fx_size);
14540               code = BFD_RELOC_32_PCREL;
14541               break;
14542             case 1: code = BFD_RELOC_8_PCREL;  break;
14543             case 2: code = BFD_RELOC_16_PCREL; break;
14544             case 4: code = BFD_RELOC_32_PCREL; break;
14545 #ifdef BFD64
14546             case 8: code = BFD_RELOC_64_PCREL; break;
14547 #endif
14548             }
14549         }
14550       else
14551         {
14552           switch (fixp->fx_size)
14553             {
14554             default:
14555               as_bad_where (fixp->fx_file, fixp->fx_line,
14556                             _("can not do %d byte relocation"),
14557                             fixp->fx_size);
14558               code = BFD_RELOC_32;
14559               break;
14560             case 1: code = BFD_RELOC_8;  break;
14561             case 2: code = BFD_RELOC_16; break;
14562             case 4: code = BFD_RELOC_32; break;
14563 #ifdef BFD64
14564             case 8: code = BFD_RELOC_64; break;
14565 #endif
14566             }
14567         }
14568       break;
14569     }
14570
14571   if ((code == BFD_RELOC_32
14572        || code == BFD_RELOC_32_PCREL
14573        || code == BFD_RELOC_X86_64_32S)
14574       && GOT_symbol
14575       && fixp->fx_addsy == GOT_symbol)
14576     {
14577       if (!object_64bit)
14578         code = BFD_RELOC_386_GOTPC;
14579       else
14580         code = BFD_RELOC_X86_64_GOTPC32;
14581     }
14582   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
14583       && GOT_symbol
14584       && fixp->fx_addsy == GOT_symbol)
14585     {
14586       code = BFD_RELOC_X86_64_GOTPC64;
14587     }
14588
14589   rel = XNEW (arelent);
14590   rel->sym_ptr_ptr = XNEW (asymbol *);
14591   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
14592
14593   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
14594
14595   if (!use_rela_relocations)
14596     {
14597       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
14598          vtable entry to be used in the relocation's section offset.  */
14599       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
14600         rel->address = fixp->fx_offset;
14601 #if defined (OBJ_COFF) && defined (TE_PE)
14602       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
14603         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
14604       else
14605 #endif
14606       rel->addend = 0;
14607     }
14608   /* Use the rela in 64bit mode.  */
14609   else
14610     {
14611       if (disallow_64bit_reloc)
14612         switch (code)
14613           {
14614           case BFD_RELOC_X86_64_DTPOFF64:
14615           case BFD_RELOC_X86_64_TPOFF64:
14616           case BFD_RELOC_64_PCREL:
14617           case BFD_RELOC_X86_64_GOTOFF64:
14618           case BFD_RELOC_X86_64_GOT64:
14619           case BFD_RELOC_X86_64_GOTPCREL64:
14620           case BFD_RELOC_X86_64_GOTPC64:
14621           case BFD_RELOC_X86_64_GOTPLT64:
14622           case BFD_RELOC_X86_64_PLTOFF64:
14623             as_bad_where (fixp->fx_file, fixp->fx_line,
14624                           _("cannot represent relocation type %s in x32 mode"),
14625                           bfd_get_reloc_code_name (code));
14626             break;
14627           default:
14628             break;
14629           }
14630
14631       if (!fixp->fx_pcrel)
14632         rel->addend = fixp->fx_offset;
14633       else
14634         switch (code)
14635           {
14636           case BFD_RELOC_X86_64_PLT32:
14637           case BFD_RELOC_X86_64_GOT32:
14638           case BFD_RELOC_X86_64_GOTPCREL:
14639           case BFD_RELOC_X86_64_GOTPCRELX:
14640           case BFD_RELOC_X86_64_REX_GOTPCRELX:
14641           case BFD_RELOC_X86_64_TLSGD:
14642           case BFD_RELOC_X86_64_TLSLD:
14643           case BFD_RELOC_X86_64_GOTTPOFF:
14644           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
14645           case BFD_RELOC_X86_64_TLSDESC_CALL:
14646             rel->addend = fixp->fx_offset - fixp->fx_size;
14647             break;
14648           default:
14649             rel->addend = (section->vma
14650                            - fixp->fx_size
14651                            + fixp->fx_addnumber
14652                            + md_pcrel_from (fixp));
14653             break;
14654           }
14655     }
14656
14657   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
14658   if (rel->howto == NULL)
14659     {
14660       as_bad_where (fixp->fx_file, fixp->fx_line,
14661                     _("cannot represent relocation type %s"),
14662                     bfd_get_reloc_code_name (code));
14663       /* Set howto to a garbage value so that we can keep going.  */
14664       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
14665       gas_assert (rel->howto != NULL);
14666     }
14667
14668   return rel;
14669 }
14670
14671 #include "tc-i386-intel.c"
14672
14673 void
14674 tc_x86_parse_to_dw2regnum (expressionS *exp)
14675 {
14676   int saved_naked_reg;
14677   char saved_register_dot;
14678
14679   saved_naked_reg = allow_naked_reg;
14680   allow_naked_reg = 1;
14681   saved_register_dot = register_chars['.'];
14682   register_chars['.'] = '.';
14683   allow_pseudo_reg = 1;
14684   expression_and_evaluate (exp);
14685   allow_pseudo_reg = 0;
14686   register_chars['.'] = saved_register_dot;
14687   allow_naked_reg = saved_naked_reg;
14688
14689   if (exp->X_op == O_register && exp->X_add_number >= 0)
14690     {
14691       if ((addressT) exp->X_add_number < i386_regtab_size)
14692         {
14693           exp->X_op = O_constant;
14694           exp->X_add_number = i386_regtab[exp->X_add_number]
14695                               .dw2_regnum[flag_code >> 1];
14696         }
14697       else
14698         exp->X_op = O_illegal;
14699     }
14700 }
14701
14702 void
14703 tc_x86_frame_initial_instructions (void)
14704 {
14705   static unsigned int sp_regno[2];
14706
14707   if (!sp_regno[flag_code >> 1])
14708     {
14709       char *saved_input = input_line_pointer;
14710       char sp[][4] = {"esp", "rsp"};
14711       expressionS exp;
14712
14713       input_line_pointer = sp[flag_code >> 1];
14714       tc_x86_parse_to_dw2regnum (&exp);
14715       gas_assert (exp.X_op == O_constant);
14716       sp_regno[flag_code >> 1] = exp.X_add_number;
14717       input_line_pointer = saved_input;
14718     }
14719
14720   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
14721   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
14722 }
14723
14724 int
14725 x86_dwarf2_addr_size (void)
14726 {
14727 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
14728   if (x86_elf_abi == X86_64_X32_ABI)
14729     return 4;
14730 #endif
14731   return bfd_arch_bits_per_address (stdoutput) / 8;
14732 }
14733
14734 int
14735 i386_elf_section_type (const char *str, size_t len)
14736 {
14737   if (flag_code == CODE_64BIT
14738       && len == sizeof ("unwind") - 1
14739       && startswith (str, "unwind"))
14740     return SHT_X86_64_UNWIND;
14741
14742   return -1;
14743 }
14744
14745 #ifdef TE_SOLARIS
14746 void
14747 i386_solaris_fix_up_eh_frame (segT sec)
14748 {
14749   if (flag_code == CODE_64BIT)
14750     elf_section_type (sec) = SHT_X86_64_UNWIND;
14751 }
14752 #endif
14753
14754 #ifdef TE_PE
14755 void
14756 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
14757 {
14758   expressionS exp;
14759
14760   exp.X_op = O_secrel;
14761   exp.X_add_symbol = symbol;
14762   exp.X_add_number = 0;
14763   emit_expr (&exp, size);
14764 }
14765 #endif
14766
14767 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14768 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
14769
14770 bfd_vma
14771 x86_64_section_letter (int letter, const char **ptr_msg)
14772 {
14773   if (flag_code == CODE_64BIT)
14774     {
14775       if (letter == 'l')
14776         return SHF_X86_64_LARGE;
14777
14778       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
14779     }
14780   else
14781     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
14782   return -1;
14783 }
14784
14785 bfd_vma
14786 x86_64_section_word (char *str, size_t len)
14787 {
14788   if (len == 5 && flag_code == CODE_64BIT && startswith (str, "large"))
14789     return SHF_X86_64_LARGE;
14790
14791   return -1;
14792 }
14793
14794 static void
14795 handle_large_common (int small ATTRIBUTE_UNUSED)
14796 {
14797   if (flag_code != CODE_64BIT)
14798     {
14799       s_comm_internal (0, elf_common_parse);
14800       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
14801     }
14802   else
14803     {
14804       static segT lbss_section;
14805       asection *saved_com_section_ptr = elf_com_section_ptr;
14806       asection *saved_bss_section = bss_section;
14807
14808       if (lbss_section == NULL)
14809         {
14810           flagword applicable;
14811           segT seg = now_seg;
14812           subsegT subseg = now_subseg;
14813
14814           /* The .lbss section is for local .largecomm symbols.  */
14815           lbss_section = subseg_new (".lbss", 0);
14816           applicable = bfd_applicable_section_flags (stdoutput);
14817           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
14818           seg_info (lbss_section)->bss = 1;
14819
14820           subseg_set (seg, subseg);
14821         }
14822
14823       elf_com_section_ptr = &_bfd_elf_large_com_section;
14824       bss_section = lbss_section;
14825
14826       s_comm_internal (0, elf_common_parse);
14827
14828       elf_com_section_ptr = saved_com_section_ptr;
14829       bss_section = saved_bss_section;
14830     }
14831 }
14832 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */